x = {% post_link 2, 0 %} # 特征向量 y = [0, 0, 1] # yi clf = svm.SVC(kernel = 'linear') clf.fit(x, y)
print clf
# get support vectors print clf.support_vectors_
# get indices of support vectors print clf.support_ # 支持向量点的索引
# get number of support vectors for each class print clf.n_support_ # 每个类中有几个支持向量,yi所代表的类
把结果画出来
import numpy as np import pylab as pl from sklearn import svm
# we create 40 separable points X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]] Y = [0]*20 +[1]*20
#fit the model clf = svm.SVC(kernel='linear') clf.fit(X, Y)
# get the separating hyperplane w = clf.coef_[0] # 获得w a = -w[0]/w[1] # 斜率 xx = np.linspace(-5, 5) yy = a*xx - (clf.intercept_[0])/w[1] # 截距
# plot the parallels to the separating hyperplane that pass through the support vectors b = clf.support_vectors_[0] yy_down = a*xx + (b[1] - a*b[0]) b = clf.support_vectors_[-1] yy_up = a*xx + (b[1] - a*b[0])
# switching to the generic n-dimensional parameterization of the hyperplan to the 2D-specific equation # of a line y=a.x +b: the generic w_0x + w_1y +w_3=0 can be rewritten y = -(w_0/w_1) x + (w_3/w_1)
# plot the line, the points, and the nearest vectors to the plane pl.plot(xx, yy, 'k-') pl.plot(xx, yy_down, 'k--') pl.plot(xx, yy_up, 'k--')
from time import time import logging import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split from sklearn.datasets import fetch_lfw_people from sklearn.grid_search import GridSearchCV from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.decomposition import RandomizedPCA from sklearn.svm import SVC
print(__doc__)
# Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
############################################################################### # Download the data, if not already on disk and load it as numpy arrays
# introspect the images arrays to find the shapes (for plotting) n_samples, h, w = lfw_people.images.shape
# for machine learning we use the 2 data directly (as relative pixel # positions info is ignored by this model) X = lfw_people.data # 特征向量 n_features = X.shape[1] #有多少列
# the label to predict is the id of the person y = lfw_people.target # 类 target_names = lfw_people.target_names # 所挑选的图片的人名 n_classes = target_names.shape[0] # 有多少行
############################################################################### # Split into a training set and a test set using a stratified k fold
# split into a training and testing set X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25) # train_test_split把实例分成训练集和测试集
############################################################################### # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction n_components = 150
print("Extracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0])) t0 = time() pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train) print("done in %0.3fs" % (time() - t0)) # RandomizedPCA使用来降维的,因为这个维度抬高难以计算
print("Projecting the input data on the eigenfaces orthonormal basis") t0 = time() X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) print("done in %0.3fs" % (time() - t0))
############################################################################### # Train a SVM classification model
print("Fitting the classifier to the training set") t0 = time() param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV(SVC(kernel='rbf', class_weight='auto'), param_grid)#核函数kernel, GridSearchCV是用来寻找最好的参数比例 clf = clf.fit(X_train_pca, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_)
############################################################################### # Quantitative evaluation of the model quality on the test set
print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) print("done in %0.3fs" % (time() - t0))