KNN

knn实现流程示例

# -*- coding: utf-8 -*-

# @Time: 2020/5/15 11:24
# @Emali: mgc5320@163.com
# @Author: Ma Gui Chang

import pickle
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# 获取数据集
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
"""
萼片长度(sepal length (cm))、萼片宽度(sepal width (cm))、花瓣长度(petal length (cm))、花瓣宽度(petal width (cm))
Iris setosa(山鸢尾)Iris versicolor(杂色鸢尾)Iris virginica(维吉尼亚鸢尾)
"""
feature_names = iris.feature_names
target_names = iris.target_names
# 数据预处理(归一化)
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)
# 切割训练与测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0)

# 训练与评估

knn_clf = KNeighborsClassifier()

knn_clf.fit(X_train, y_train)
print(knn_clf.score(X_test, y_test))
# 优化,网格搜索法,寻找最佳的k值
param_grid = [
    {
        'n_neighbors': [i for i in range(1, 10)],
        'weights':['uniform', 'distance'],
        'p':[i for i in range(1, 6)]
    }
]
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5)  # 5折交叉验证
grid_search.fit(X_train, y_train)
print(grid_search.best_estimator_)
print(grid_search.best_score_)

# 使用优化之后的模型进行拟合、预测
knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None,
                           n_jobs=1, n_neighbors=3, p=1, weights='uniform')

knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
params_best = knn.get_params()
print(params_best)
"""
查看分类模型评估报告
precision(准确率),recall(召回率),f1-score(f1分数),support(支持率)
"""
report = classification_report(y_test, y_pred)
print(report)

# 模型的持久化
with open('knn.pkl', 'wb') as f:
    pickle.dump(knn, f)    # 存

with open('knn.pkl', 'rb') as f:
    knn2 = pickle.load(f)  # 取

y2_pred = knn2.predict(X_test)