1 Star 0 Fork 2

hzcc/Machine-Learning-with-Python

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
adspy_shared_utilities.py 9.75 KB
一键复制 编辑 原始数据 按行查看 历史
Susan Li 提交于 2017-08-05 12:02 . Add files
# version 1.0
import numpy
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap, BoundaryNorm
from sklearn import neighbors
import matplotlib.patches as mpatches
import graphviz
from sklearn.tree import export_graphviz
import matplotlib.patches as mpatches
def load_crime_dataset():
# Communities and Crime dataset for regression
# https://archive.ics.uci.edu/ml/datasets/Communities+and+Crime+Unnormalized
crime = pd.read_table('CommViolPredUnnormalizedData.txt', sep=',', na_values='?')
# remove features with poor coverage or lower relevance, and keep ViolentCrimesPerPop target column
columns_to_keep = [5, 6] + list(range(11,26)) + list(range(32, 103)) + [145]
crime = crime.ix[:,columns_to_keep].dropna()
X_crime = crime.ix[:,range(0,88)]
y_crime = crime['ViolentCrimesPerPop']
return (X_crime, y_crime)
def plot_decision_tree(clf, feature_names, class_names):
# This function requires the pydotplus module and assumes it's been installed.
# In some cases (typically under Windows) even after running conda install, there is a problem where the
# pydotplus module is not found when running from within the notebook environment. The following code
# may help to guarantee the module is installed in the current notebook environment directory.
#
# import sys; sys.executable
# !{sys.executable} -m pip install pydotplus
export_graphviz(clf, out_file="adspy_temp.dot", feature_names=feature_names, class_names=class_names, filled = True, impurity = False)
with open("adspy_temp.dot") as f:
dot_graph = f.read()
# Alternate method using pydotplus, if installed.
# graph = pydotplus.graphviz.graph_from_dot_data(dot_graph)
# return graph.create_png()
return graphviz.Source(dot_graph)
def plot_feature_importances(clf, feature_names):
c_features = len(feature_names)
plt.barh(range(c_features), clf.feature_importances_)
plt.xlabel("Feature importance")
plt.ylabel("Feature name")
plt.yticks(numpy.arange(c_features), feature_names)
def plot_labelled_scatter(X, y, class_labels):
num_labels = len(class_labels)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
marker_array = ['o', '^', '*']
color_array = ['#FFFF00', '#00AAFF', '#000000', '#FF00AA']
cmap_bold = ListedColormap(color_array)
bnorm = BoundaryNorm(numpy.arange(0, num_labels + 1, 1), ncolors=num_labels)
plt.figure()
plt.scatter(X[:, 0], X[:, 1], s=65, c=y, cmap=cmap_bold, norm = bnorm, alpha = 0.40, edgecolor='black', lw = 1)
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
h = []
for c in range(0, num_labels):
h.append(mpatches.Patch(color=color_array[c], label=class_labels[c]))
plt.legend(handles=h)
plt.show()
def plot_class_regions_for_classifier_subplot(clf, X, y, X_test, y_test, title, subplot, target_names = None, plot_decision_regions = True):
numClasses = numpy.amax(y) + 1
color_list_light = ['#FFFFAA', '#EFEFEF', '#AAFFAA', '#AAAAFF']
color_list_bold = ['#EEEE00', '#000000', '#00CC00', '#0000CC']
cmap_light = ListedColormap(color_list_light[0:numClasses])
cmap_bold = ListedColormap(color_list_bold[0:numClasses])
h = 0.03
k = 0.5
x_plot_adjust = 0.1
y_plot_adjust = 0.1
plot_symbol_size = 50
x_min = X[:, 0].min()
x_max = X[:, 0].max()
y_min = X[:, 1].min()
y_max = X[:, 1].max()
x2, y2 = numpy.meshgrid(numpy.arange(x_min-k, x_max+k, h), numpy.arange(y_min-k, y_max+k, h))
P = clf.predict(numpy.c_[x2.ravel(), y2.ravel()])
P = P.reshape(x2.shape)
if plot_decision_regions:
subplot.contourf(x2, y2, P, cmap=cmap_light, alpha = 0.8)
subplot.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, s=plot_symbol_size, edgecolor = 'black')
subplot.set_xlim(x_min - x_plot_adjust, x_max + x_plot_adjust)
subplot.set_ylim(y_min - y_plot_adjust, y_max + y_plot_adjust)
if (X_test is not None):
subplot.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cmap_bold, s=plot_symbol_size, marker='^', edgecolor = 'black')
train_score = clf.score(X, y)
test_score = clf.score(X_test, y_test)
title = title + "\nTrain score = {:.2f}, Test score = {:.2f}".format(train_score, test_score)
subplot.set_title(title)
if (target_names is not None):
legend_handles = []
for i in range(0, len(target_names)):
patch = mpatches.Patch(color=color_list_bold[i], label=target_names[i])
legend_handles.append(patch)
subplot.legend(loc=0, handles=legend_handles)
def plot_class_regions_for_classifier(clf, X, y, X_test=None, y_test=None, title=None, target_names = None, plot_decision_regions = True):
numClasses = numpy.amax(y) + 1
color_list_light = ['#FFFFAA', '#EFEFEF', '#AAFFAA', '#AAAAFF']
color_list_bold = ['#EEEE00', '#000000', '#00CC00', '#0000CC']
cmap_light = ListedColormap(color_list_light[0:numClasses])
cmap_bold = ListedColormap(color_list_bold[0:numClasses])
h = 0.03
k = 0.5
x_plot_adjust = 0.1
y_plot_adjust = 0.1
plot_symbol_size = 50
x_min = X[:, 0].min()
x_max = X[:, 0].max()
y_min = X[:, 1].min()
y_max = X[:, 1].max()
x2, y2 = numpy.meshgrid(numpy.arange(x_min-k, x_max+k, h), numpy.arange(y_min-k, y_max+k, h))
P = clf.predict(numpy.c_[x2.ravel(), y2.ravel()])
P = P.reshape(x2.shape)
plt.figure()
if plot_decision_regions:
plt.contourf(x2, y2, P, cmap=cmap_light, alpha = 0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, s=plot_symbol_size, edgecolor = 'black')
plt.xlim(x_min - x_plot_adjust, x_max + x_plot_adjust)
plt.ylim(y_min - y_plot_adjust, y_max + y_plot_adjust)
if (X_test is not None):
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cmap_bold, s=plot_symbol_size, marker='^', edgecolor = 'black')
train_score = clf.score(X, y)
test_score = clf.score(X_test, y_test)
title = title + "\nTrain score = {:.2f}, Test score = {:.2f}".format(train_score, test_score)
if (target_names is not None):
legend_handles = []
for i in range(0, len(target_names)):
patch = mpatches.Patch(color=color_list_bold[i], label=target_names[i])
legend_handles.append(patch)
plt.legend(loc=0, handles=legend_handles)
if (title is not None):
plt.title(title)
plt.show()
def plot_fruit_knn(X, y, n_neighbors, weights):
X_mat = X[['height', 'width']].as_matrix()
y_mat = y.as_matrix()
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#AFAFAF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF','#AFAFAF'])
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(X_mat, y_mat)
# Plot the decision boundary by assigning a color in the color map
# to each mesh point.
mesh_step_size = .01 # step size in the mesh
plot_symbol_size = 50
x_min, x_max = X_mat[:, 0].min() - 1, X_mat[:, 0].max() + 1
y_min, y_max = X_mat[:, 1].min() - 1, X_mat[:, 1].max() + 1
xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, mesh_step_size),
numpy.arange(y_min, y_max, mesh_step_size))
Z = clf.predict(numpy.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot training points
plt.scatter(X_mat[:, 0], X_mat[:, 1], s=plot_symbol_size, c=y, cmap=cmap_bold, edgecolor = 'black')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
patch0 = mpatches.Patch(color='#FF0000', label='apple')
patch1 = mpatches.Patch(color='#00FF00', label='mandarin')
patch2 = mpatches.Patch(color='#0000FF', label='orange')
patch3 = mpatches.Patch(color='#AFAFAF', label='lemon')
plt.legend(handles=[patch0, patch1, patch2, patch3])
plt.xlabel('height (cm)')
plt.ylabel('width (cm)')
plt.show()
def plot_two_class_knn(X, y, n_neighbors, weights, X_test, y_test):
X_mat = X
y_mat = y
# Create color maps
cmap_light = ListedColormap(['#FFFFAA', '#AAFFAA', '#AAAAFF','#EFEFEF'])
cmap_bold = ListedColormap(['#FFFF00', '#00FF00', '#0000FF','#000000'])
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
clf.fit(X_mat, y_mat)
# Plot the decision boundary by assigning a color in the color map
# to each mesh point.
mesh_step_size = .01 # step size in the mesh
plot_symbol_size = 50
x_min, x_max = X_mat[:, 0].min() - 1, X_mat[:, 0].max() + 1
y_min, y_max = X_mat[:, 1].min() - 1, X_mat[:, 1].max() + 1
xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, mesh_step_size),
numpy.arange(y_min, y_max, mesh_step_size))
Z = clf.predict(numpy.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot training points
plt.scatter(X_mat[:, 0], X_mat[:, 1], s=plot_symbol_size, c=y, cmap=cmap_bold, edgecolor = 'black')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
title = "Neighbors = {}".format(n_neighbors)
if (X_test is not None):
train_score = clf.score(X_mat, y_mat)
test_score = clf.score(X_test, y_test)
title = title + "\nTrain score = {:.2f}, Test score = {:.2f}".format(train_score, test_score)
patch0 = mpatches.Patch(color='#FFFF00', label='class 0')
patch1 = mpatches.Patch(color='#000000', label='class 1')
plt.legend(handles=[patch0, patch1])
plt.xlabel('Feature 0')
plt.ylabel('Feature 1')
plt.title(title)
plt.show()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/hwang_zc/Machine-Learning-with-Python.git
git@gitee.com:hwang_zc/Machine-Learning-with-Python.git
hwang_zc
Machine-Learning-with-Python
Machine-Learning-with-Python
master

搜索帮助