1 Star 1 Fork 0

林冰漫/sklearn

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
shiyan002.py 3.29 KB
一键复制 编辑 原始数据 按行查看 历史
林冰漫 提交于 2023-04-27 11:16 . Initial commit
# 导入所需模块
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# 读取数据集
data = pd.read_csv('student-mat.csv', sep=';')
# 对数据进行预处理,将类别型变量转换为数值型变量
data['school'] = data['school'].map({'GP':0, 'MS':1})
data['sex'] = data['sex'].map({'F':0, 'M':1})
data['address'] = data['address'].map({'U':0, 'R':1})
data['famsize'] = data['famsize'].map({'LE3':0, 'GT3':1})
data['Pstatus'] = data['Pstatus'].map({'A':0, 'T':1})
data['Mjob'] = data['Mjob'].map({'teacher':0, 'health':1, 'services':2, 'at_home':3, 'other':4})
data['Fjob'] = data['Fjob'].map({'teacher':0, 'health':1, 'services':2, 'at_home':3, 'other':4})
data['reason'] = data['reason'].map({'home':0, 'reputation':1, 'course':2, 'other':3})
data['guardian'] = data['guardian'].map({'mother':0, 'father':1, 'other':2})
data['schoolsup'] = data['schoolsup'].map({'no':0, 'yes':1})
data['famsup'] = data['famsup'].map({'no':0, 'yes':1})
data['paid'] = data['paid'].map({'no':0, 'yes':1})
data['activities'] = data['activities'].map({'no':0, 'yes':1})
data['nursery'] = data['nursery'].map({'no':0, 'yes':1})
data['higher'] = data['higher'].map({'no':0, 'yes':1})
data['internet'] = data['internet'].map({'no':0, 'yes':1})
data['romantic'] = data['romantic'].map({'no':0, 'yes':1})
# 将输出的G3列转换为二元类别分类问题,'fail'为0,其他为1
data['G3'] = data['G3'].apply(lambda x: 0 if x < 10 else 1)
# 使用train_test_split进行数据集划分,划分出训练集、校验集和测试集
train_validation, test = train_test_split(data, test_size=0.2, stratify=data['G3'], random_state=42)
train, validation = train_test_split(train_validation, test_size=0.25, stratify=train_validation['G3'], random_state=42)
# 定义分类算法函数
def classify(model):
# 训练模型
model.fit(train.drop('G3', axis=1), train['G3'])
# 在校验集中进行预测
validation_predicted = model.predict(validation.drop('G3', axis=1))
# 输出模型在校验集上的准确率和分类报告
print("Accuracy on validation set: %.2f%%" % (accuracy_score(validation['G3'], validation_predicted)*100))
print(classification_report(validation['G3'], validation_predicted))
# 绘制混淆矩阵
cm = confusion_matrix(validation['G3'], validation_predicted)
print("\nConfusion Matrix:\n", cm)
# 输出模型在测试集上的准确率和分类报告
test_predicted = model.predict(test.drop('G3', axis=1))
print("\nAccuracy on test set: %.2f%%" % (accuracy_score(test['G3'], test_predicted)*100))
print(classification_report(test['G3'], test_predicted))
# 绘制混淆矩阵
cm = confusion_matrix(test['G3'], test_predicted)
print("\nConfusion Matrix:\n", cm)
# 使用决策树分类算法进行建模
tree = DecisionTreeClassifier(random_state=42)
print("Decision Tree Classifier:")
classify(tree)
# 使用神经网络进行建模
nn = MLPClassifier(random_state=42)
print("\n3-Layer Neural Network Classifier:")
classify(nn)
# 使用SVM分类算法进行建模
svm = SVC(random_state=42)
print("\nSupport Vector Machine Classifier:")
classify(svm)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/lin-bingman/sklearn.git
git@gitee.com:lin-bingman/sklearn.git
lin-bingman
sklearn
sklearn
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385