In [3]:
import numpy as np
import math
from sklearn import datasets, neighbors, linear_model
In [15]:
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
print y_digits
print X_digits
In [5]:
np.random.seed(123)
indices = np.random.permutation(len(X_digits))
In [8]:
num_samples = len(digits.data)
test_set_size = math.floor(.10 * num_samples)
print "number of samples: ", num_samples
print "test_set_size: " ,test_set_size
In [9]:
digits_X_train = X_digits[indices[:-test_set_size]]
digits_y_train = y_digits[indices[:-test_set_size]]
digits_X_test = X_digits[indices[-test_set_size:]]
digits_y_test = y_digits[indices[-test_set_size:]]
In [11]:
knn = neighbors.KNeighborsClassifier()
knn.fit(digits_X_train, digits_y_train)
print "The Percentage of Correct Classification when using the K Nearest Neighbour algorithm - KNN score: "
print knn.score(digits_X_test, digits_y_test)
In [13]:
logistic = linear_model.LogisticRegression(C=1e5)
logistic.fit(digits_X_train, digits_y_train)
print "The Percentage of Correct Classification when using the - Logistic Regression Model is - Logistic Regression score: "
print logistic.score(digits_X_test, digits_y_test)
In [14]:
print y_digits
In [16]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
In [19]:
# Scatter points
fig, ax = plt.subplots()
np.random.seed(0)
x, y = np.random.normal(size=(2, 600))
color, size = np.random.random((2, 600))
ax.scatter(x, y, c=color, s=500 * size, alpha=0.3)
ax.grid(color='lightgray', alpha=0.7)
In [20]:
import numpy as np
from sklearn import datasets, svm
In [24]:
iris = datasets.load_iris()
num_samples = len(iris.data)
print "The Length of the data set is :"
print num_samples
test_set_size = round(.1 * num_samples)
print "The Length of the test data set / test sample is :"
print test_set_size
In [25]:
iris_X = iris.data
iris_y = iris.target
In [27]:
iris_X_train_class1 = iris_X[iris_y == 1][:-5, :2]
iris_X_train_class2 = iris_X[iris_y == 2][:-5, :2]
iris_X_train = np.concatenate((iris_X_train_class1, iris_X_train_class2), axis=0)
iris_y_train_class1 = iris_y[iris_y == 1][:-5]
iris_y_train_class2 = iris_y[iris_y == 2][:-5]
iris_y_train = np.concatenate((iris_y_train_class1, iris_y_train_class2), axis=0)
iris_X_test_class1 = iris_X[iris_y == 1][-5:, :2]
iris_X_test_class2 = iris_X[iris_y == 2][-5:, :2]
iris_X_test = np.concatenate((iris_X_test_class1, iris_X_test_class2), axis=0)
iris_y_train_class1 = iris_y[iris_y == 1][:-5]
iris_y_train_class2 = iris_y[iris_y == 2][:-5]
iris_y_train = np.concatenate((iris_y_train_class1, iris_y_train_class2), axis=0)
In [30]:
from sklearn import svm
In [32]:
svm.SVC(kernel='linear')
svc.fit(iris_X_train, iris_y_train)
In [33]:
from sklearn import datasets
iris = datasets.load_iris()
data = iris.data
data.shape
Out[33]:
In [34]:
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target
np.unique(iris_y)
Out[34]:
In [35]:
# Split iris data in train and test data
# A random permutation, to split the data randomly
np.random.seed(123)
indices = np.random.permutation(len(iris_X))
iris_X_train = iris_X[indices[:-10]]
iris_y_train = iris_y[indices[:-10]]
iris_X_test = iris_X[indices[-10:]]
iris_y_test = iris_y[indices[-10:]]
# Create and fit a nearest-neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(iris_X_train, iris_y_train)
Out[35]:
In [36]:
knn.predict(iris_X_test)
Out[36]:
In [37]:
iris_y_test
Out[37]:
In [39]:
from sklearn import svm
svc = svm.SVC(kernel='linear')
svc.fit(iris_X_train, iris_y_train)
# SVMs can be used in regression –SVR (Support Vector Regression)–,
# or in classification –SVC (Support Vector Classification).
Out[39]:
In [41]:
from sklearn import datasets, svm
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target
svc = svm.SVC(C=1, kernel='linear')
svc.fit(X_digits[:-100], y_digits[:-100]).score(X_digits[-100:], y_digits[-100:])
Out[41]:
In [47]:
import numpy as np
X_folds = np.array_split(X_digits, 3)
y_folds = np.array_split(y_digits, 3)
scores = list()
for k in range(3):
# We use ’list’ to copy, in order to ’pop’ later on
X_train = list(X_folds)
X_test = X_train.pop(k)
X_train = np.concatenate(X_train)
y_train = list(y_folds)
y_test = y_train.pop(k)
y_train = np.concatenate(y_train)
scores.append(svc.fit(X_train, y_train).score(X_test, y_test))
print scores # Check the problem - Page 21 - Scikit learn - [0.93489148580968284, 0.95659432387312182, 0.93989983305509184]
In [ ]:
In [ ]:
No comments:
Post a Comment