代码拉取完成,页面将自动刷新
同步操作将从 hyesc/machine-learning-stu 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
# __author__ = 'heyin'
# __date__ = '2018/11/22 9:22'
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
def logic():
column = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape',
'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli',
'Mitoses', 'Class']
# 读取数据
data = pd.read_csv('./cancer.csv', names=column)
# 替换数据中的问号
data.replace(to_replace='?', value=np.nan, inplace=True)
# 然后删除nan所在的行
data.dropna(how='any', inplace=True)
x = data.iloc[:, 1:10]
y = data.loc[:, 'Class']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)
sd = StandardScaler()
x_train = sd.fit_transform(x_train)
x_test = sd.transform(x_test)
# lr = LogisticRegression()
# params = {'solver': ['lbfgs'], 'penalty': ['l2'], 'C': [0.01, 0.1, 1.0, 10.0, 100.0]}
# gv = GridSearchCV(lr, param_grid=params, cv=4)
#
# gv.fit(x_train, y_train)
# y_pred = gv.predict(x_test)
#
# print(gv.best_score_)
# print(gv.best_params_)
#
# print('训练集score:', gv.score(x_train, y_train))
# print('测试集score:', gv.score(x_test, y_test))
# 以下是通过上边的网格搜索得出的C值
c = [0.01, 0.1, 1.0, 10.0, 100.0]
for i in range(5):
print('*' * 20, i)
lr = LogisticRegression(C=c[i], solver='lbfgs', penalty='l2')
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
print('训练集score:', lr.score(x_train, y_train))
print('测试集score:', lr.score(x_test, y_test))
print(classification_report(y_test, y_pred, labels=[2, 4], target_names=['良性', '恶性']))
def l_stock():
# 从csv文件获取数据
df = pd.read_csv('./stockdata/sh.csv')
df.pop('date')
y = df.pop('up_down')
x = df
# 特征工程需要拆分训练集和测试集后进行
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)
# 数据标准化处理
std = StandardScaler()
x_train = std.fit_transform(x_train)
x_test = std.transform(x_test)
c = [0.01, 0.1, 1.0, 10.0, 100.0]
for i in range(5):
print('*' * 20, i)
lr = LogisticRegression(C=c[i])
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
print("训练集score:", lr.score(x_train, y_train))
print("测试集score:", lr.score(x_test, y_test))
print(classification_report(y_test, y_pred, labels=[0, 1], target_names=['跌', '涨']))
if __name__ == '__main__':
# logic()
l_stock()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。