1 Star 12 Fork 0

Truda/房价预测-线性回归

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
main.py 6.09 KB
一键复制 编辑 原始数据 按行查看 历史
Truda 提交于 2020-09-27 22:39 . update main.py.
# -*- coding: utf-8 -*-
# @Task : Linear Regression with One Variable
# @Author: 陈海坤
# @Time : 2019-09-27
import random
import numpy as np
import matplotlib.pyplot as plt
import json
# 可视化设置
fontsize = 20
plt.figure('Linear Regression')
plt.ion()
class Linear_Regression:
def __init__(self):
# 定义超参
self.EPOCH = 50 # 迭代次数
self.BATCH = 20 # 每批大小
self.learning_rate = 0.01
self.data_path = "dataset/data.txt"
self.train_num = 0
self.test_num = 0
# 标准化参数
self.x_mean = 0
self.x_std = 0
self.y_mean = 0
self.y_std = 0
# 读取数据集
self.train_data, self.test_data = self.read_dataset()
# 定义参数
self.a, self.b = np.zeros((2, 1))
# Z-score标准化
def standardization(self, x):
x = (x - self.x_mean) / self.x_std
return x
# 反标准化
def inverse_standardization(self, x):
x = x * self.x_std + self.x_mean
return x
# 数据处理
def read_dataset(self):
with open(self.data_path, encoding="utf-8") as file:
dataset = file.readlines()
random.shuffle(dataset) # 打乱
data_num = len(dataset) # 数据集大小
train_num = int(data_num * 0.7)
test_num = data_num - train_num
self.train_num = train_num
self.test_num = test_num
dataset_list = []
for item in dataset:
data_item = []
item_list = item.split(', ')
data_item.append(float(item_list[0]))
data_item.append(float(item_list[1].replace('\n', '')))
dataset_list.append(data_item)
# 拆分
x = np.array([i[0] for i in dataset_list])
y = np.array([i[1] for i in dataset_list])
# 计算均值和方差
self.x_mean = x.mean(axis=0)
self.x_std = x.std(axis=0)
self.y_mean = y.mean(axis=0)
self.y_std = y.std(axis=0)
train_data = dataset_list[0:train_num]
test_data = dataset_list[train_num:]
return train_data, test_data
# 可视化
def show(self):
plt.cla() # 清空画布
plt.ylim(0, 2000) # y轴值的范围,不写定会造成y轴的抖动
plt.title("Train")
# 创建图并命名
ax = plt.gca()
# 设置x轴、y轴名称
ax.set_xlabel('area', fontsize=fontsize)
ax.set_ylabel('price', fontsize=fontsize)
# 绘制散点
x_list = [i[0] for i in self.train_data]
y_list = [i[1] for i in self.train_data]
ax.scatter(x_list, y_list, c='r', s=20, alpha=0.5)
# 绘制函数
plt_x = np.arange(25, 200, 0.1)
plt_y = self.a * plt_x + self.b
ax.plot(plt_x, plt_y)
plt.pause(0.01) # 暂停0.01s,方便观察
# 目标函数
def H_func(self, x):
y_ = self.a * x + self.b
return y_
# 代价函数
def J_func(self, y, y_):
loss_sum = np.sum((y_ - y) ** 2) # 平方和
loss = 1 / (self.BATCH * 2) * loss_sum
return loss
def backward(self, x, y, y_):
# 计算梯度
a_gradient = 1 / self.BATCH * np.sum((y_ - y) * x)
b_gradient = 1 / self.BATCH * np.sum((y_ - y))
# 更新梯度
self.a = self.a - self.learning_rate * a_gradient
self.b = self.b - self.learning_rate * b_gradient
# 训练
def train(self):
for epoch in range(self.EPOCH):
for batch in range(int(self.train_num / self.BATCH)):
batch_data = self.train_data[batch *
self.BATCH:batch * self.BATCH + self.BATCH]
x = [i[0] for i in batch_data]
y = [i[1] for i in batch_data]
x = np.array(x)
y = np.array(y)
# 标准化
x = self.standardization(x)
y = self.standardization(y)
y_ = self.H_func(x) # 预测值
loss = self.J_func(y, y_) # 计算损失
self.backward(x, y, y_) # 反向传播
if batch % 50 == 0:
self.show()
print("a:%.2f, b:%.2f" % (self.a, self.b))
print("Epoch: %d, Batch: %d, Loss: %.4f\n" % (epoch + 1, batch + 1, loss))
# 测试
def test(self):
loss_sum = 0
for test_data_item in self.test_data:
# 标准化
x = self.standardization(test_data_item[0])
y = self.standardization(test_data_item[1])
x = np.array(x)
y = np.array(y)
y_ = self.H_func(x) # 预测值
loss = self.J_func(y, y_)
loss_sum += loss
print("Average Loss:%.4f\n" % (loss_sum / self.test_num))
plt.cla() # 清空画布
plt.title("Test")
# 创建图并命名
ax = plt.gca()
# 设置x轴、y轴名称
ax.set_xlabel('area', fontsize=fontsize)
ax.set_ylabel('price', fontsize=fontsize)
# 绘制散点
x_list = [i[0] for i in self.test_data]
y_list = [i[1] for i in self.test_data]
ax.scatter(x_list, y_list, c='r', s=20, alpha=0.5)
# 绘制函数
plt_x = np.arange(25, 200, 0.1)
plt_y = self.a * plt_x + self.b
ax.plot(plt_x, plt_y)
# 保存参数
def save(self):
model = {
'param': {
'a': self.a[0],
'b': self.b[0]
},
'x_mean': self.x_mean,
'x_std': self.x_std,
'y_mean': self.y_mean,
'y_std': self.y_std
}
with open("output.json", "w", encoding="utf-8") as file:
file.write(str(json.dumps(model)))
print("output file: output.json")
if __name__ == '__main__':
Linear_Regression = Linear_Regression()
Linear_Regression.train() # 训练
Linear_Regression.test() # 测试
Linear_Regression.save() # 保存参数
plt.ioff() # 关闭交互模式
plt.pause(0) # 出图,这种方式绘图最终画面会停止在屏幕上
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/chk668/Linear-Regression.git
git@gitee.com:chk668/Linear-Regression.git
chk668
Linear-Regression
房价预测-线性回归
master

搜索帮助