1 Star 0 Fork 0

AIpi-machine/LinearRegression

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main.py 7.46 KB
一键复制 编辑 原始数据 按行查看 历史
吴官骏 提交于 2021-12-21 01:47 . firstcommit
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
matplotlib.use('Agg')
from matplotlib.pyplot import plot,savefig
# 用来绘图的,封装了matplot
# 要注意的是一旦导入了seaborn,
# matplotlib的默认作图风格就会被覆盖成seaborn的格式
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
#OverallQual:总体评价
class solutions:
def __init__(self):
self.data_train=pd.read_csv('train.csv')
self.data_sample_submission=pd.read_csv('sample_submission.csv')
self.data_test=pd.read_csv('test.csv')
pd.set_option('display.width',10000)
pd.set_option('display.max_columns', None)
def print(self):
print(self.data_train.keys())
print(self.data_train.head())
print(self.data_train.shape[:])
# print(self.data_train.head()['Neighborhood'])
# print(self.data_sample_submission.head())
# print(self.data_train.head())
# print(self.data_train.dtypes)
# print(self.data_train['Alley'].shape)
def plotdatastest1(self):
var = 'MSSubClass'
data = pd.concat([self.data_train['SalePrice'], self.data_train[var]], axis=1)
fig = sns.boxplot(x=var, y="SalePrice", data=data)
fig.axis(ymin=0, ymax=800000)
# print(self.data_train[var])
plt.show()
plt.savefig('temp.png')
def plotdatastest2(self):
# CentralAir
data_train=self.data_train
var = 'CentralAir'
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
fig = sns.boxplot(x=var, y="SalePrice", data=data)
plt.show()
plt.savefig('temp.png')
def plotdata(self):
j=1
k=100
for index,row in self.data_train.iteritems():
plt.figure(j)
var = index
data = pd.concat([self.data_train['SalePrice'], self.data_train[var]], axis=1)
if self.data_train[index].dtype=='object':
sns.boxplot(x=var,y="SalePrice", data=data)
plt.savefig("C:\\Users\\LENOVO\\OneDrive\\桌面\\aip\\House_Price\\picture\\boxplot\\{}scatter.jpg".format(index))
else:
plt.scatter(x=var, y="SalePrice", data=data)
plt.savefig("C:\\Users\\LENOVO\\OneDrive\\桌面\\aip\\House_Price\\picture\\scatter\\{}scatter.jpg".format(index))
plt.figure(k)
sns.boxplot(x=var, y="SalePrice", data=data)
plt.savefig("C:\\Users\\LENOVO\\OneDrive\\桌面\\aip\\House_Price\\picture\\boxplot\\{}boxplot.jpg".format(index))
k+=1
j+=1
def dataencoding(self):
self.newdata = pd.get_dummies(self.data_train)
self.newdatatest=pd.get_dummies(self.data_test)
def corrplot(self):
data_train=self.data_train
corrmat = data_train.corr()
print(corrmat)
f, ax = plt.subplots(figsize=(20, 9))
sns.heatmap(corrmat, vmax=0.8, square=True)
plt.savefig("C:\\Users\\LENOVO\\OneDrive\\桌面\\aip\\House_Price\\corr.jpg")
plt.show()
def datasweep(self):
#删除相关系数小于0.3的列。
data_train = self.newdata
self.newdata=data_train
# print(data_train.shape)
s = data_train.corr()
s = s['SalePrice']
for index, row in s.items():
if row < 0.3 and row > -0.3:
self.newdata = self.newdata.drop(index, axis=1)
if index in self.newdatatest.columns:
self.newdatatest = self.newdatatest.drop(index,axis=1)
# print(s)
self.SalePrice=data_train['SalePrice'].copy()
self.SalePrice=np.array(self.SalePrice)
self.newdata=self.newdata.drop('SalePrice',axis=1)
self.newdata['SalePrice']=1
self.newdata=self.newdata.fillna(self.newdata.mean())
self.newdatatest=self.newdatatest.fillna(self.newdatatest.mean())
self.newdatatest['SalePrice']=1
self.martix=np.array(self.newdata)
self.martixtest=np.array(self.newdatatest)
def olsbysklearn(self):
from sklearn.linear_model import LinearRegression
Model=LinearRegression()
Model.fit(self.martix,self.SalePrice)
score=Model.score(self.martix,self.SalePrice)
print("The train score is:{}".format(score))
def olsregression_gradientdescent(self):
data_train=self.newdata
# print(data_train.shape)
data_train['SalePrice']=data_train['SalePrice'].apply(lambda x:1)
# colunmnum=data_train.shape[1]
# indexnum=data_train.shape[0]
print("shape is:{}".format(self.martix.shape))
delta=np.zeros([1,self.martix.shape[1]])
delta=delta[0]
print("delta is:{}".format(delta))
# print(delta.shape[:])
eps=10000
alpha=0.0000001
count=0
print(data_train.head(10))
while eps>1000:
rangeJfun=np.zeros([1,data_train.shape[1]])
rangeJfun = rangeJfun[0]
self.err_list = []
for indexj,theta in enumerate(delta):
#遍历所有的行参数。
price=0
# 计算h(x)
error=0
for i in range(0,self.martix.shape[0]):
t=np.dot(delta,self.martix[i,:])
price = price+ (np.dot(delta,self.martix[i,:])-self.SalePrice[i])*self.martix[i,indexj]
error = error+(np.dot(delta,self.martix[i,:])-self.SalePrice[i])**2
error=error/self.martix.shape[0]
rangeJfun[indexj]=price
self.err_list.append(error)
for indexj,theta in enumerate(delta):
delta[indexj]=delta[indexj]-(alpha*rangeJfun[indexj])/self.martix.shape[0]
print("count num is:{},error is:{}".format(count,error))
count+=1
if count>100:
self.delta=delta
break
def test(self):
error=0
mean=self.SalePrice.mean()
# pre_list=[]
SSE=0
for i in range(0,self.martix.shape[0]):
prediction=np.dot(self.martix[i,:],self.delta)
error+=(prediction-self.SalePrice[i])**2
SSE+=(prediction-mean)**2
# sum+=self.SalePrice[i]**2
# pre_list.append(prediction)
print("prediction is :{},initial is:{}".format(prediction,self.SalePrice[i]))
SST=error+SSE
# pre_arr=np.array(pre_list)
plt.plot(np.arange(0,len(self.err_list)),np.array(self.err_list))
plt.show()
plt.savefig('C:\\Users\\LENOVO\\OneDrive\\桌面\\aip\\House_Price\\trace.jpg')
# print("The R-square is :{}".format(np.corr))
for i in range(0,self.martixtest.shape[0]):
answer=np.dot(self.martixtest[i,:],self.delta)
self.data_sample_submission.loc[i,1]=answer
self.data_sample_submission.to_csv('dataoutput.csv')
R_square=SSE/SST
print("R-square is:{}".format(R_square))
segama=error/(1459-46-1)
for i in range(self.martix.shape[1]):
var=self.martix[:,i].var()
S=np.sqrt(segama/var)
boundary=self.delta[i]
print("delta is:{},t is:{}".format(self.delta[i],S))
# 回归系数的标准误忘了怎么算了,计量经济学没学到家,对不起。
#计算梯度,进行迭代
s=solutions()
s.dataencoding()
s.print()
# s.corranal()
# s.test()
# s.olsregression()
# s.olsre2()
s.datasweep()
s.olsregression_gradientdescent()
s.test()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/aipi-machine/linear-regression.git
git@gitee.com:aipi-machine/linear-regression.git
aipi-machine
linear-regression
LinearRegression
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385