1 Star 0 Fork 3

spider/machine-learning-stu

forked from hyesc/machine-learning-stu 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
房价_predict.py 4.31 KB
一键复制 编辑 原始数据 按行查看 历史
hyesc 提交于 2019-05-10 15:28 . 柱图折线图叠加
import pandas as pd
import numpy as np
from pyecharts import Line
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
pd.set_option('expand_frame_repr', False)
def bubble_sort(nums, nums1):
for i in range(len(nums) - 1): # 这个循环负责设置冒泡排序进行的次数
for j in range(len(nums) - i - 1): # j为列表下标
if nums[j] > nums[j + 1]:
nums[j], nums[j + 1] = nums[j + 1], nums[j]
nums1[j], nums1[j + 1] = nums1[j + 1], nums1[j]
return nums, nums1
def parse():
# 读取数据
data = pd.read_csv('./bk_esf.csv')
# print(data)
# print('---------------')
# 处理有无电梯 有-2,无-1
data['elevator'] = data['elevator'].replace('有', 2)
data['elevator'] = data['elevator'].fillna(1)
# 产权 转化成数值类型
data['property'] = data['property'].replace('未知', np.nan)
data['property'] = data['property'].replace('[^\d]', '', regex=True)
data['property'] = pd.to_numeric(data['property'])
data['property'] = data['property'].fillna(data['property'].mean())
# # 房屋结构 暂无数据-1,平层-2,复式-3,
# data['house_category'] = data['house_category'].replace('暂无数据', 1)
# data['house_category'] = data['house_category'].replace('平层', 2)
# data['house_category'] = data['house_category'].replace('复式', 3)
# 建筑年代 转化成数值类型
data['building_age'] = data['building_age'].replace('[^\d]+', '', regex=True)
data['building_age'] = pd.to_numeric(data['building_age'])
data['building_age'] = data['building_age'].fillna(data['building_age'].mean())
# 总楼层,转化成数值
data['floor_count'] = data['floor_count'].replace('[^\d]+', '', regex=True)
data['floor_count'] = pd.to_numeric(data['floor_count'])
data['floor_count'] = data['floor_count'].fillna(data['floor_count'].mean())
# # 楼层,低-1,中-2,高-3,地下室-4
# data['floor'] = data['floor'].replace('低楼层', 1)
# data['floor'] = data['floor'].replace('中楼层', 2)
# data['floor'] = data['floor'].replace('高楼层', 3)
# data['floor'] = data['floor'].replace('地下室', 4)
# 户型 2室1厅-转化为21 3室1厅-转化为31
data['house_type'] = data['house_type'].replace('室|厅', '', regex=True)
data['house_type'] = pd.to_numeric(data['house_type'])
# 删除不影响目标值的特征
data = data.drop(['category', 'title', 'unit', 'average', 'community', 'status', 'crawl_time'], axis=1)
# 拿到目标值
target = data['amount']
data = data.drop(['amount'], axis=1)
# data.insert(data.shape[1], 'amount', target)
print(data, data.shape)
print('---------------------')
# one hot编码
data = pd.get_dummies(data)
print(data, data.shape)
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=1)
# 标准化
std_x = StandardScaler()
std_y = StandardScaler()
# 标准化x
x_train = std_x.fit_transform(x_train)
x_test = std_x.transform(x_test)
# 标准化y
y_train = std_y.fit_transform(y_train.values.reshape(-1, 1))
y_test = std_y.transform(y_test.values.reshape(-1, 1))
# 正规方程
# lr = LinearRegression()
# 梯度下降
sgd = SGDRegressor(random_state=1)
# lr.fit(x_train, y_train)
sgd.fit(x_train, y_train)
# print('参数权重:', lr.coef_)
print('参数权重:', sgd.coef_)
# y_predict = std_y.inverse_transform(lr.predict(x_test))
y_predict = std_y.inverse_transform(sgd.predict(x_test))
print('预测结果:', y_predict)
xx = std_x.transform(data[:1])
yy = std_y.inverse_transform(sgd.predict(xx))
print(yy)
# 绘制折线图展示
# x_axis = list(range(1, y_test.shape[0] + 1))
# line = Line("线性回归测试集预测结果可视化", width=1200)
# # y_test_s, y_pred_s = bubble_sort(y_test, y_pred)
# line.add("真实值", x_axis, std_y.inverse_transform(y_test), is_smooth=True)
# line.add("预测值", x_axis, y_predict, is_smooth=True)
# line.render(path='./echart_html/房价回归测试集预测结果可视化.html')
if __name__ == '__main__':
parse()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/spiderking/machine-learning-stu.git
git@gitee.com:spiderking/machine-learning-stu.git
spiderking
machine-learning-stu
machine-learning-stu
master

搜索帮助