1 Star 0 Fork 1

平安/第一届泰迪杯数据分析职业技能大赛

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
task2.py 5.65 KB
一键复制 编辑 原始数据 按行查看 历史
Szwyz 提交于 2018-11-16 20:12 . Upload task4.py task1.py task3.py task2.py
# -*- coding: utf-8 -*-
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
df1 = pd.read_csv(r'file1.csv',encoding = 'gbk')
df1 = df1.drop(70679,axis=0)#脏数据所在70679
def timefunc(time):
'''提取当前月份'''
m = time.split('/')[1]
return eval(m)
f = lambda x : timefunc(x)
df1['mounth'] = df1['支付时间'].map(f)
#任务2.1
#图像中文显示
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
six_m_data = df1[df1['mounth'] == 6]
six_m_front_data = six_m_data.groupby('商品')['订单号'].count().sort_values(ascending=False)[:5]
d = dict(six_m_front_data)
plt.title('销量前5的商品')
sns.barplot(x = list(d.keys()), y = list(d.values()))
plt.savefig(r'1.png',dpi=800)
plt.show()
#任务2.2
#月交易额
def group1(data, key):
totals = data.groupby(key)['实际金额'].sum()
return totals
grouped = df1.groupby('地点')
m_p = grouped.apply(group1,'mounth')
def group1(data, key):
totals = data.groupby(key)['订单号'].count()
return totals
grouped = df1.groupby('地点')
m_d = grouped.apply(group1,'mounth')
#每个月交易额的折线
plt.rcParams['figure.figsize'] = (10, 8)
plt.rcParams['figure.dpi'] = 60 #分辨率
m_p.T.plot(kind = 'line',title = '交易额折线图')
plt.savefig(r'2.png',dpi=800)
plt.show()
#交易额月环比增长率柱状图
temp_data = m_d // 100000
for i in range(2, 13):
temp_data[i] = (m_p[i] - m_p[i-1]) / m_p[i-1]
temp_data = temp_data.drop(1,axis = 1)
temp_data.T.plot(kind = 'bar',title = '交易额月环比增长率')
plt.savefig(r'3.png',dpi=800)
plt.show()
#任务2.3
df2 = pd.read_csv('file2.csv', encoding = 'gbk')
mapp = {'饮料':1,'非饮料':0}
f = lambda x: mapp.get(x,x)
df2['大类'] = df2['大类'].map(f)
kind_data = df2.drop('二级类',axis = 1)
kind_dict = dict(zip(kind_data['商品'], kind_data['大类']))
f = lambda x: kind_dict.get(x)
df1['大类'] = df1['商品'].map(f)
def Profit(price, kind):
price = eval(price)
if kind == 0:
return price*0.25
return price * 0.2
#增加利润标签
df1['profit'] = df1['实际金额'] * (df1['大类']*0.05 + 0.2)
plt.axis('equal')
df1.groupby('地点')['profit'].sum().plot(kind = 'pie', title = '售货机毛利润占总毛利润比例',
autopct= '%1.1f%%')
plt.savefig(r'4.png',dpi=800)
plt.show()
#任务2.4
def GroupFunc1(data):
totals = data.groupby('mounth')['订单号'].count()
return totals
def GroupFunc2(data):
totals = data.groupby('mounth')['实际金额'].sum()
return totals
grouped = df1.groupby('地点')
m_d = grouped.apply(GroupFunc1)
m_p = grouped.apply(GroupFunc2)
four_m_d = m_d[4]
four_m_p = m_p[4]
dict(four_m_d)
labelx = list(dict(four_m_d).keys())
labely = list(dict(four_m_d).values())
labelz = list(dict(four_m_p).values())
#绘制气泡图
cm = plt.cm.get_cmap('RdYlBu')
fig,ax = plt.subplots(figsize = (10,8))
bubble = ax.scatter(labelx,labely,s = [i/2 for i in labelz], c = labelz, cmap = cm, linewidth = 0.5, alpha = 0.5)
ax.grid()
fig.colorbar(bubble)
ax.set_xlabel('售货机编号',fontsize = 15)
ax.set_ylabel('月订单量',fontsize = 15)
plt.title('售货机交易额与订单量气泡图')
plt.savefig(r'5.png',dpi=800)
plt.show()
#任务2.5
data = df1[df1['地点'] == 'C']
f = lambda x: int(x.split('/')[2].split(' ')[0])
data['day'] = data['支付时间'].map(f)
f = lambda x: int(x.split('/')[2].split(' ')[1].split(':')[0])
data['hour'] = data['支付时间'].map(f)
data = data[['订单号', 'mounth','day','hour']]
data = pd.concat([data[data['mounth'] == 6],data[data['mounth'] == 7],data[data['mounth'] == 8]])
data.to_csv('data_file.csv')
data = pd.read_csv('data_file.csv')
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
temp_6 = pd.DataFrame(np.zeros((24,30)))
fp = pd.read_csv('data_file.csv')
list1 = []
list2 = []
list3 = []
for i in fp['day']:
list2.append(i)
for i in fp['mounth']:
list1.append(i)
for i in fp['hour']:
list3.append(i)
print(len(list1))
list4=[]
for i in range(0,len(list1)):
l = []
l.append(list1[i])
l.append(list2[i])
l.append(list3[i])
list4.append(l)
s = 1
for i in range(1,len(list1)):
# print(list4[i][0],list4[i-1][0])
# print(list4[i][1], list4[i - 1][1])
# print(list4[i][2], list4[i - 1][2])
if list4[i][0] == list4[i-1][0] and list4[i][1] == list4[i-1][1]:
if list4[i][2] == list4[i-1][2]:
s=s+1
else:
list4[i-1].append(s)
s = 1
# print(list4[i])
# if list4[i][0] != list4[i-1][0] or list4[i][1] == list4[i-1][1]:
# list4[i].append(s)
# s = 1
list_end = []
s = 1
for i in range(1,len(list1)):
if len(list4[i]) == 4:
list_end.append(list4[i])
list_end.append([8,31,23,2])
list_end[len(list_end)-1]
for i in list_end:
if i[0] == 6:
temp_6[i[1]-1][i[2]] = i[3]
temp_7 = pd.DataFrame(np.zeros((24,31)))
for i in list_end:
if i[0] == 7:
temp_7[i[1]-1][i[2]] = i[3]
temp_8 = pd.DataFrame(np.zeros((24,31)))
for i in list_end:
if i[0] == 8:
temp_8[i[1]-1][i[2]] = i[3]
import matplotlib.pyplot as plt
f,(ax2) = plt.subplots(figsize = (10, 8))
sns.heatmap(temp_6,linewidths = 0.05,ax = ax2, vmax=13, vmin=0, cmap='rainbow')
plt.show()
end = pd.concat([temp_6,temp_7,temp_8],axis=1)
f,(ax2) = plt.subplots(figsize = (20, 8))
sns.heatmap(end,linewidths = 0.05,ax = ax2, vmax=13, vmin=0, cmap='rainbow')
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/safetyh/Taidi.git
git@gitee.com:safetyh/Taidi.git
safetyh
Taidi
第一届泰迪杯数据分析职业技能大赛
master

搜索帮助

D67c1975 1850385 1daf7b77 1850385