1 Star 3 Fork 2

疯狂星期(4)/金融数据分析小组作业(一)

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
lkn.py 2.72 KB
一键复制 编辑 原始数据 按行查看 历史
普尼托的矮行星 提交于 2024-04-25 15:44 . 大数据2102励卡诺
#导入相关库
import tushare as ts
import numpy as np
import pandas as pd
import talib
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# 1.股票基本数据获取
import tushare as ts
# 初始化pro接口
pro = ts.pro_api('1c7f85b9026518588c0d0cdac712c2d17344332c9c8cfe6bc83ee75c')
# 拉取数据
df = pro.daily(**{
"ts_code": "",
"trade_date": "",
"start_date": 20230101,
"end_date": 20240101,
"offset": "",
"limit": ""
}, fields=[
"ts_code",
"trade_date",
"open",
"high",
"low",
"close",
"pre_close",
"change",
"pct_chg",
"vol",
"amount"
])
print(df)
# 2.简单衍生变量数据构造
df['close-open'] = (df['close'] - df['open']) / df['open']
df['high-low'] = (df['high'] - df['low']) / df['low']
df['pre_close'] = df['close'].shift(1)
df['price_change'] = df['close'] - df['pre_close']
df['p_change'] = (df['close'] - df['pre_close']) / df['pre_close'] * 100
# 3.移动平均线相关数据构造
df['MA5'] = df['close'].rolling(5).mean()
df['MA10'] = df['close'].rolling(10).mean()
df.dropna(inplace=True)
# 4.通过TA-Lib库构造衍生变量数据
df['RSI'] = talib.RSI(df['close'], timeperiod=12)
df['MOM'] = talib.MOM(df['close'], timeperiod=5)
df['EMA12'] = talib.EMA(df['close'], timeperiod=12) # 12日指移动平均值数
df['EMA26'] = talib.EMA(df['close'], timeperiod=26) # 26日指移动平均值数
df['MACD'], df['MACDsignal'], df['MACDhist'] = talib.MACD(df['close'], fastperiod=6, slowperiod=12, signalperiod=9)
df.dropna(inplace=True)
#提取特征变量和目标变量
X = df[['close','vol','close-open','MA5','MA10','high-low','RSI','MOM','EMA12','MACD','MACDsignal','MACDhist']]
y = np.where(df['price_change'].shift(-1) > 0,1,-1)
#划分训练集测试集
X_length = X.shape[0]
split = int(X_length * 0.9)
X_train,X_test = X[:split],X[split:]
y_train,y_test = y[:split],y[split:]
#模型搭建
model = RandomForestClassifier(max_depth=3,n_estimators=10,min_samples_leaf=10,random_state=123)
model.fit(X_train,y_train)
#预测下一天的股价涨跌情况
y_pred = model.predict(X_test)
a = pd.DataFrame()
a['预测值'] = list(y_pred)
a['实际值'] = list(y_test)
a.head()
print(a)
#用predict_proba()函数预测属于各个分类的概率
y_pred_proba = model.predict_proba(X_test)
a = pd.DataFrame(y_pred_proba,columns=['分类为-1的概率','分类为1的概率'])
a.head()
print(a)
#模型准确度评估
accuracy = accuracy_score(y_pred,y_test)
accuracy
model.score(X_test,y_test)
#分析特征变量的特征重要性
importances = model.feature_importances_
a = pd.DataFrame()
a['特征'] = X.columns
a['特征重要性'] = importances
a = a.sort_values('特征重要性',ascending=False)
a
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/crazy-week-4/a1.git
git@gitee.com:crazy-week-4/a1.git
crazy-week-4
a1
金融数据分析小组作业(一)
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385