Friday 8 January 2016

Testing - Jupyter with IRIS

In [3]:
import numpy as np
import math
from sklearn import datasets, neighbors, linear_model 
In [15]:
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
print y_digits
print X_digits
[0 1 2 ..., 8 9 8]
[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]
In [5]:
np.random.seed(123)
indices = np.random.permutation(len(X_digits))
In [8]:
num_samples = len(digits.data)
test_set_size = math.floor(.10 * num_samples)
print "number of samples: ", num_samples
print "test_set_size: " ,test_set_size
number of samples:  1797
test_set_size:  179.0
In [9]:
digits_X_train = X_digits[indices[:-test_set_size]]
digits_y_train = y_digits[indices[:-test_set_size]]
digits_X_test = X_digits[indices[-test_set_size:]]
digits_y_test = y_digits[indices[-test_set_size:]]
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  if __name__ == '__main__':
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:2: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  from ipykernel import kernelapp as app
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  app.launch_new_instance()
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:4: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
In [11]:
knn = neighbors.KNeighborsClassifier()
knn.fit(digits_X_train, digits_y_train)
print "The Percentage of Correct Classification when using the K Nearest Neighbour algorithm - KNN score: "
print knn.score(digits_X_test, digits_y_test)
The Percentage of Correct Classification when using the K Nearest Neighbour algorithm - KNN score: 
0.988826815642
In [13]:
logistic = linear_model.LogisticRegression(C=1e5)
logistic.fit(digits_X_train, digits_y_train)
print "The Percentage of Correct Classification when using the - Logistic Regression Model is - Logistic Regression score: "
print logistic.score(digits_X_test, digits_y_test)
The Percentage of Correct Classification when using the - Logistic Regression Model is - Logistic Regression score: 
0.960893854749
In [14]:
print y_digits
[0 1 2 ..., 8 9 8]
In [16]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
In [19]:
# Scatter points
fig, ax = plt.subplots()
np.random.seed(0)
x, y = np.random.normal(size=(2, 600))
color, size = np.random.random((2, 600))

ax.scatter(x, y, c=color, s=500 * size, alpha=0.3)
ax.grid(color='lightgray', alpha=0.7)
In [20]:
import numpy as np
from sklearn import datasets, svm
In [24]:
iris = datasets.load_iris()
num_samples = len(iris.data)
print "The Length of the data set is :"
print num_samples
test_set_size = round(.1 * num_samples)
print "The Length of the test data set / test sample is :"
print test_set_size
The Length of the data set is :
150
The Length of the test data set / test sample is :
15.0
In [25]:
iris_X = iris.data
iris_y = iris.target
In [27]:
iris_X_train_class1 = iris_X[iris_y == 1][:-5, :2]
iris_X_train_class2 = iris_X[iris_y == 2][:-5, :2]
iris_X_train = np.concatenate((iris_X_train_class1, iris_X_train_class2), axis=0)

iris_y_train_class1 = iris_y[iris_y == 1][:-5]
iris_y_train_class2 = iris_y[iris_y == 2][:-5]
iris_y_train = np.concatenate((iris_y_train_class1, iris_y_train_class2), axis=0)

iris_X_test_class1 = iris_X[iris_y == 1][-5:, :2]
iris_X_test_class2 = iris_X[iris_y == 2][-5:, :2]
iris_X_test = np.concatenate((iris_X_test_class1, iris_X_test_class2), axis=0)

iris_y_train_class1 = iris_y[iris_y == 1][:-5]
iris_y_train_class2 = iris_y[iris_y == 2][:-5]
iris_y_train = np.concatenate((iris_y_train_class1, iris_y_train_class2), axis=0)
In [30]:
from sklearn import svm
In [32]:
svm.SVC(kernel='linear')
svc.fit(iris_X_train, iris_y_train)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-72eed4cf7c15> in <module>()
      1 svm.SVC(kernel='linear')
----> 2 svc.fit(iris_X_train, iris_y_train)

NameError: name 'svc' is not defined
In [33]:
from sklearn import datasets
iris = datasets.load_iris()
data = iris.data
data.shape
Out[33]:
(150L, 4L)
In [34]:
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target
np.unique(iris_y)
Out[34]:
array([0, 1, 2])
In [35]:
# Split iris data in train and test data
# A random permutation, to split the data randomly
np.random.seed(123)
indices = np.random.permutation(len(iris_X))
iris_X_train = iris_X[indices[:-10]]
iris_y_train = iris_y[indices[:-10]]
iris_X_test = iris_X[indices[-10:]]
iris_y_test = iris_y[indices[-10:]]
# Create and fit a nearest-neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(iris_X_train, iris_y_train)
Out[35]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')
In [36]:
knn.predict(iris_X_test)
Out[36]:
array([1, 1, 2, 1, 2, 0, 1, 1, 2, 2])
In [37]:
iris_y_test
Out[37]:
array([1, 1, 2, 2, 1, 0, 1, 1, 2, 2])
In [39]:
from sklearn import svm
svc = svm.SVC(kernel='linear')
svc.fit(iris_X_train, iris_y_train)
# SVMs can be used in regression –SVR (Support Vector Regression)–, 
# or in classification –SVC (Support Vector Classification).
Out[39]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
In [41]:
from sklearn import datasets, svm
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
svc = svm.SVC(C=1, kernel='linear')
svc.fit(X_digits[:-100], y_digits[:-100]).score(X_digits[-100:], y_digits[-100:])
Out[41]:
0.97999999999999998
In [47]:
import numpy as np
X_folds = np.array_split(X_digits, 3)
y_folds = np.array_split(y_digits, 3)
scores = list()
for k in range(3):
# We use ’list’ to copy, in order to ’pop’ later on
    X_train = list(X_folds)
    X_test = X_train.pop(k)
    X_train = np.concatenate(X_train)
    y_train = list(y_folds)
    y_test = y_train.pop(k)
    y_train = np.concatenate(y_train)
scores.append(svc.fit(X_train, y_train).score(X_test, y_test))
print scores # Check the problem - Page 21 -  Scikit learn - [0.93489148580968284, 0.95659432387312182, 0.93989983305509184]
[0.93989983305509184]
In [ ]:
 
In [ ]:
 

No comments:

Post a Comment