In [3]:

import numpy as np
import math
from sklearn import datasets, neighbors, linear_model

In [15]:

digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
print y_digits
print X_digits

[0 1 2 ..., 8 9 8]
[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]

In [5]:

np.random.seed(123)
indices = np.random.permutation(len(X_digits))

In [8]:

num_samples = len(digits.data)
test_set_size = math.floor(.10 * num_samples)
print "number of samples: ", num_samples
print "test_set_size: " ,test_set_size

number of samples:  1797
test_set_size:  179.0

In [9]:

digits_X_train = X_digits[indices[:-test_set_size]]
digits_y_train = y_digits[indices[:-test_set_size]]
digits_X_test = X_digits[indices[-test_set_size:]]
digits_y_test = y_digits[indices[-test_set_size:]]

C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:1: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  if __name__ == '__main__':
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:2: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  from ipykernel import kernelapp as app
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  app.launch_new_instance()
C:\Anaconda2\lib\site-packages\ipykernel\__main__.py:4: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

In [11]:

knn = neighbors.KNeighborsClassifier()
knn.fit(digits_X_train, digits_y_train)
print "The Percentage of Correct Classification when using the K Nearest Neighbour algorithm - KNN score: "
print knn.score(digits_X_test, digits_y_test)

The Percentage of Correct Classification when using the K Nearest Neighbour algorithm - KNN score: 
0.988826815642

In [13]:

logistic = linear_model.LogisticRegression(C=1e5)
logistic.fit(digits_X_train, digits_y_train)
print "The Percentage of Correct Classification when using the - Logistic Regression Model is - Logistic Regression score: "
print logistic.score(digits_X_test, digits_y_test)

The Percentage of Correct Classification when using the - Logistic Regression Model is - Logistic Regression score: 
0.960893854749

In [14]:

print y_digits

[0 1 2 ..., 8 9 8]

In [16]:

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

In [19]:

# Scatter points
fig, ax = plt.subplots()
np.random.seed(0)
x, y = np.random.normal(size=(2, 600))
color, size = np.random.random((2, 600))

ax.scatter(x, y, c=color, s=500 * size, alpha=0.3)
ax.grid(color='lightgray', alpha=0.7)

In [20]:

import numpy as np
from sklearn import datasets, svm

In [24]:

iris = datasets.load_iris()
num_samples = len(iris.data)
print "The Length of the data set is :"
print num_samples
test_set_size = round(.1 * num_samples)
print "The Length of the test data set / test sample is :"
print test_set_size

The Length of the data set is :
150
The Length of the test data set / test sample is :
15.0

In [25]:

iris_X = iris.data
iris_y = iris.target

In [27]:

iris_X_train_class1 = iris_X[iris_y == 1][:-5, :2]
iris_X_train_class2 = iris_X[iris_y == 2][:-5, :2]
iris_X_train = np.concatenate((iris_X_train_class1, iris_X_train_class2), axis=0)

iris_y_train_class1 = iris_y[iris_y == 1][:-5]
iris_y_train_class2 = iris_y[iris_y == 2][:-5]
iris_y_train = np.concatenate((iris_y_train_class1, iris_y_train_class2), axis=0)

iris_X_test_class1 = iris_X[iris_y == 1][-5:, :2]
iris_X_test_class2 = iris_X[iris_y == 2][-5:, :2]
iris_X_test = np.concatenate((iris_X_test_class1, iris_X_test_class2), axis=0)

iris_y_train_class1 = iris_y[iris_y == 1][:-5]
iris_y_train_class2 = iris_y[iris_y == 2][:-5]
iris_y_train = np.concatenate((iris_y_train_class1, iris_y_train_class2), axis=0)

In [30]:

from sklearn import svm

In [32]:

svm.SVC(kernel='linear')
svc.fit(iris_X_train, iris_y_train)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-32-72eed4cf7c15> in <module>()
      1 svm.SVC(kernel='linear')
----> 2 svc.fit(iris_X_train, iris_y_train)

NameError: name 'svc' is not defined

In [33]:

from sklearn import datasets
iris = datasets.load_iris()
data = iris.data
data.shape

Out[33]:

(150L, 4L)

In [34]:

import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target
np.unique(iris_y)

Out[34]:

array([0, 1, 2])

In [35]:

# Split iris data in train and test data
# A random permutation, to split the data randomly
np.random.seed(123)
indices = np.random.permutation(len(iris_X))
iris_X_train = iris_X[indices[:-10]]
iris_y_train = iris_y[indices[:-10]]
iris_X_test = iris_X[indices[-10:]]
iris_y_test = iris_y[indices[-10:]]
# Create and fit a nearest-neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(iris_X_train, iris_y_train)

Out[35]:

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [36]:

knn.predict(iris_X_test)

Out[36]:

array([1, 1, 2, 1, 2, 0, 1, 1, 2, 2])

In [37]:

iris_y_test

Out[37]:

array([1, 1, 2, 2, 1, 0, 1, 1, 2, 2])

In [39]:

from sklearn import svm
svc = svm.SVC(kernel='linear')
svc.fit(iris_X_train, iris_y_train)
# SVMs can be used in regression –SVR (Support Vector Regression)–, 
# or in classification –SVC (Support Vector Classification).

Out[39]:

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [41]:

from sklearn import datasets, svm
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
svc = svm.SVC(C=1, kernel='linear')
svc.fit(X_digits[:-100], y_digits[:-100]).score(X_digits[-100:], y_digits[-100:])

Out[41]:

0.97999999999999998

In [47]:

import numpy as np
X_folds = np.array_split(X_digits, 3)
y_folds = np.array_split(y_digits, 3)
scores = list()
for k in range(3):
# We use ’list’ to copy, in order to ’pop’ later on
    X_train = list(X_folds)
    X_test = X_train.pop(k)
    X_train = np.concatenate(X_train)
    y_train = list(y_folds)
    y_test = y_train.pop(k)
    y_train = np.concatenate(y_train)
scores.append(svc.fit(X_train, y_train).score(X_test, y_test))
print scores # Check the problem - Page 21 -  Scikit learn - [0.93489148580968284, 0.95659432387312182, 0.93989983305509184]

[0.93989983305509184]

In [ ]:

In [ ]:

Data Science with R and Python

Friday, 8 January 2016

Testing - Jupyter with IRIS

No comments:

Post a Comment