knn实现流程示例
# -*- coding: utf-8 -*-
# @Time: 2020/5/15 11:24
# @Emali: mgc5320@163.com
# @Author: Ma Gui Chang
import pickle
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
# 获取数据集
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
"""
萼片长度(sepal length (cm))、萼片宽度(sepal width (cm))、花瓣长度(petal length (cm))、花瓣宽度(petal width (cm))
Iris setosa(山鸢尾)Iris versicolor(杂色鸢尾)Iris virginica(维吉尼亚鸢尾)
"""
feature_names = iris.feature_names
target_names = iris.target_names
# 数据预处理(归一化)
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)
# 切割训练与测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0)
# 训练与评估
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train, y_train)
print(knn_clf.score(X_test, y_test))
# 优化,网格搜索法,寻找最佳的k值
param_grid = [
{
'n_neighbors': [i for i in range(1, 10)],
'weights':['uniform', 'distance'],
'p':[i for i in range(1, 6)]
}
]
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5) # 5折交叉验证
grid_search.fit(X_train, y_train)
print(grid_search.best_estimator_)
print(grid_search.best_score_)
# 使用优化之后的模型进行拟合、预测
knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None,
n_jobs=1, n_neighbors=3, p=1, weights='uniform')
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
params_best = knn.get_params()
print(params_best)
"""
查看分类模型评估报告
precision(准确率),recall(召回率),f1-score(f1分数),support(支持率)
"""
report = classification_report(y_test, y_pred)
print(report)
# 模型的持久化
with open('knn.pkl', 'wb') as f:
pickle.dump(knn, f) # 存
with open('knn.pkl', 'rb') as f:
knn2 = pickle.load(f) # 取
y2_pred = knn2.predict(X_test)