1 Star 0 Fork 3

spider/machine-learning-stu

forked from hyesc/machine-learning-stu 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
no8逻辑回归.py 2.89 KB
一键复制 编辑 原始数据 按行查看 历史
hyesc 提交于 2018-11-22 11:59 . 逻辑回归,岭回归代码提交
# __author__ = 'heyin'
# __date__ = '2018/11/22 9:22'
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
def logic():
column = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape',
'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli',
'Mitoses', 'Class']
# 读取数据
data = pd.read_csv('./cancer.csv', names=column)
# 替换数据中的问号
data.replace(to_replace='?', value=np.nan, inplace=True)
# 然后删除nan所在的行
data.dropna(how='any', inplace=True)
x = data.iloc[:, 1:10]
y = data.loc[:, 'Class']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)
sd = StandardScaler()
x_train = sd.fit_transform(x_train)
x_test = sd.transform(x_test)
# lr = LogisticRegression()
# params = {'solver': ['lbfgs'], 'penalty': ['l2'], 'C': [0.01, 0.1, 1.0, 10.0, 100.0]}
# gv = GridSearchCV(lr, param_grid=params, cv=4)
#
# gv.fit(x_train, y_train)
# y_pred = gv.predict(x_test)
#
# print(gv.best_score_)
# print(gv.best_params_)
#
# print('训练集score:', gv.score(x_train, y_train))
# print('测试集score:', gv.score(x_test, y_test))
# 以下是通过上边的网格搜索得出的C值
c = [0.01, 0.1, 1.0, 10.0, 100.0]
for i in range(5):
print('*' * 20, i)
lr = LogisticRegression(C=c[i], solver='lbfgs', penalty='l2')
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
print('训练集score:', lr.score(x_train, y_train))
print('测试集score:', lr.score(x_test, y_test))
print(classification_report(y_test, y_pred, labels=[2, 4], target_names=['良性', '恶性']))
def l_stock():
# 从csv文件获取数据
df = pd.read_csv('./stockdata/sh.csv')
df.pop('date')
y = df.pop('up_down')
x = df
# 特征工程需要拆分训练集和测试集后进行
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)
# 数据标准化处理
std = StandardScaler()
x_train = std.fit_transform(x_train)
x_test = std.transform(x_test)
c = [0.01, 0.1, 1.0, 10.0, 100.0]
for i in range(5):
print('*' * 20, i)
lr = LogisticRegression(C=c[i])
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
print("训练集score:", lr.score(x_train, y_train))
print("测试集score:", lr.score(x_test, y_test))
print(classification_report(y_test, y_pred, labels=[0, 1], target_names=['跌', '涨']))
if __name__ == '__main__':
# logic()
l_stock()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/spiderking/machine-learning-stu.git
git@gitee.com:spiderking/machine-learning-stu.git
spiderking
machine-learning-stu
machine-learning-stu
master

搜索帮助