代码拉取完成,页面将自动刷新
#导入相关库
import tushare as ts
import numpy as np
import pandas as pd
import talib
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# 1.股票基本数据获取
import tushare as ts
# 初始化pro接口
pro = ts.pro_api('1c7f85b9026518588c0d0cdac712c2d17344332c9c8cfe6bc83ee75c')
# 拉取数据
df = pro.daily(**{
"ts_code": "",
"trade_date": "",
"start_date": 20230101,
"end_date": 20240101,
"offset": "",
"limit": ""
}, fields=[
"ts_code",
"trade_date",
"open",
"high",
"low",
"close",
"pre_close",
"change",
"pct_chg",
"vol",
"amount"
])
print(df)
# 2.简单衍生变量数据构造
df['close-open'] = (df['close'] - df['open']) / df['open']
df['high-low'] = (df['high'] - df['low']) / df['low']
df['pre_close'] = df['close'].shift(1)
df['price_change'] = df['close'] - df['pre_close']
df['p_change'] = (df['close'] - df['pre_close']) / df['pre_close'] * 100
# 3.移动平均线相关数据构造
df['MA5'] = df['close'].rolling(5).mean()
df['MA10'] = df['close'].rolling(10).mean()
df.dropna(inplace=True)
# 4.通过TA-Lib库构造衍生变量数据
df['RSI'] = talib.RSI(df['close'], timeperiod=12)
df['MOM'] = talib.MOM(df['close'], timeperiod=5)
df['EMA12'] = talib.EMA(df['close'], timeperiod=12) # 12日指移动平均值数
df['EMA26'] = talib.EMA(df['close'], timeperiod=26) # 26日指移动平均值数
df['MACD'], df['MACDsignal'], df['MACDhist'] = talib.MACD(df['close'], fastperiod=6, slowperiod=12, signalperiod=9)
df.dropna(inplace=True)
#提取特征变量和目标变量
X = df[['close','vol','close-open','MA5','MA10','high-low','RSI','MOM','EMA12','MACD','MACDsignal','MACDhist']]
y = np.where(df['price_change'].shift(-1) > 0,1,-1)
#划分训练集测试集
X_length = X.shape[0]
split = int(X_length * 0.9)
X_train,X_test = X[:split],X[split:]
y_train,y_test = y[:split],y[split:]
#模型搭建
model = RandomForestClassifier(max_depth=3,n_estimators=10,min_samples_leaf=10,random_state=123)
model.fit(X_train,y_train)
#预测下一天的股价涨跌情况
y_pred = model.predict(X_test)
a = pd.DataFrame()
a['预测值'] = list(y_pred)
a['实际值'] = list(y_test)
a.head()
print(a)
#用predict_proba()函数预测属于各个分类的概率
y_pred_proba = model.predict_proba(X_test)
a = pd.DataFrame(y_pred_proba,columns=['分类为-1的概率','分类为1的概率'])
a.head()
print(a)
#模型准确度评估
accuracy = accuracy_score(y_pred,y_test)
accuracy
model.score(X_test,y_test)
#分析特征变量的特征重要性
importances = model.feature_importances_
a = pd.DataFrame()
a['特征'] = X.columns
a['特征重要性'] = importances
a = a.sort_values('特征重要性',ascending=False)
a
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。