1 Star 0 Fork 1

平安/第一届泰迪杯数据分析职业技能大赛

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
task3.py 4.81 KB
一键复制 编辑 原始数据 按行查看 历史
Szwyz 提交于 2018-11-16 20:12 . Upload task4.py task1.py task3.py task2.py
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
#数据预处理
df1 = pd.read_csv(r'file1.csv',encoding = 'gbk')
df1 = df1.drop(70679,axis=0)#脏数据所在70679
df2 = pd.read_csv('file2.csv', encoding = 'gbk')
mapp = {'饮料':1,'非饮料':0}
f = lambda x: mapp.get(x,x)
df2['大类'] = df2['大类'].map(f)
kind_data = df2.drop('二级类',axis = 1)
kind_dict = dict(zip(kind_data['商品'], kind_data['大类']))
f = lambda x: kind_dict.get(x)
df1['大类'] = df1['商品'].map(f)
temp_df1 = df1[df1['大类'] == 1]
def Func(num, a,b):
answer = ['滞销','正常','热销 ']
if num < a:
return answer[0]
elif num < b:
return answer[1]
else:
return answer[2]
data_a = temp_df1[df1['地点']=='A']
data_b = temp_df1[df1['地点']== 'B']
data_c = temp_df1[df1['地点'] == 'C']
data_d = temp_df1[df1['地点'] == 'D']
data_e = temp_df1[df1['地点'] == 'E']
data_a = data_a[['商品', '订单号']]
data_a = pd.DataFrame(data_a.groupby('商品')['订单号'].count())
data_a.to_csv('datafileA.csv')
data_a = pd.read_csv('datafileA.csv')
f = lambda x: Func(x,19,67)
data_a['标签'] = data_a['订单号'].map(f)
data_a = data_a.drop('订单号', axis = 1)
data_a.to_csv('task3-1A.csv')
#b
data_b = data_b[['商品', '订单号']]
data_b = pd.DataFrame(data_b.groupby('商品')['订单号'].count())
data_b.to_csv('datafileA.csv')
data_b = pd.read_csv('datafileA.csv')
f = lambda x: Func(x,17,94)
data_b['标签'] = data_b['订单号'].map(f)
data_b = data_b.drop('订单号', axis = 1)
data_b.to_csv('task3-1B.csv')
#C
data_c = data_c[['商品', '订单号']]
data_c = pd.DataFrame(data_c.groupby('商品')['订单号'].count())
data_c.to_csv('datafileA.csv')
data_c = pd.read_csv('datafileA.csv')
f = lambda x: Func(x, 24,79)
data_c['标签'] = data_c['订单号'].map(f)
data_c = data_c.drop('订单号', axis = 1)
data_c.to_csv('task3-1C.csv')
#D
data_d = data_d[['商品', '订单号']]
data_d = pd.DataFrame(data_d.groupby('商品')['订单号'].count())
data_d.to_csv('datafileA.csv')
data_d = pd.read_csv('datafileA.csv')
data_d.describe()
f = lambda x: Func(x, 24,79)
data_d['标签'] = data_d['订单号'].map(f)
data_d = data_d.drop('订单号', axis = 1)
data_d.to_csv('task3-1D.csv')
#E
data_e = data_e[['商品', '订单号']]
data_e = pd.DataFrame(data_e.groupby('商品')['订单号'].count())
data_e.to_csv('datafileA.csv')
data_e = pd.read_csv('datafileA.csv')
f = lambda x: Func(x, 47,169)
data_e['标签'] = data_e['订单号'].map(f)
data_e = data_e.drop('订单号', axis = 1)
data_e.to_csv('task3-1E.csv')
#任务3.2
'''
!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!
在生成不同的词云图时
需要更改路径运行如下代码即可生成不同售货机的标签词云图
!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!
'''
xx = pd.read_csv(r'D:\taidi\task3-1E.csv',encoding='gbk')
cc = pd.read_csv(r'D:\taidi\file2.csv',encoding='gbk')
xx['rd_kind'] = cc['二级类']
xx.to_csv('task3-2E.csv')
with open(r'word_c.txt','a+') as f:
f.write(str(xx[xx['标签']=='热销']['商品'])*7)
f.write(str(xx[xx['标签']=='正常']['商品'])*3)
f.write(str(xx[xx['标签']=='滞销']['商品'])*1)
f.write(str(xx[xx['标签']=='热销']['rd_kind'])*7)
f.write(str(xx[xx['标签']=='正常']['rd_kind'])*3)
f.write(str(xx[xx['标签']=='滞销']['rd_kind'])*1)
from os import path
from scipy.misc import imread
import matplotlib.pyplot as plt
import jieba
from wordcloud import WordCloud,ImageColorGenerator
stop = ['500ml','250ml','600ml','330ml','Name','dtype','object','Series','480ml','rd_kind','rd','kind']
d = path.dirname('.')#当前路径
isCN = 1 #默认启用中文分词
back_coloring_path = "timg.jpg"#背景文件路径
text_path = r"word_c.txt"#要分析的文本路径
png_path = "demo.jpg"#保存路径
font_path = "SIMYOU.TTF"
with open(text_path,'r',encoding = 'gbk') as f:
txt = f.read()
txt = ' '.join(jieba.cut(txt))#字体路径
wordcloud = WordCloud(font_path = font_path,#字体
background_color = 'white',
max_words = 500,
stopwords = stop,
mask = imread(path.join(d,back_coloring_path)),
max_font_size = 270,
random_state =20 ).generate(txt)
image_colors = ImageColorGenerator(imread(path.join(d,back_coloring_path)))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
wordcloud.to_file(path.join(d,png_path))
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/safetyh/Taidi.git
git@gitee.com:safetyh/Taidi.git
safetyh
Taidi
第一届泰迪杯数据分析职业技能大赛
master

搜索帮助

D67c1975 1850385 1daf7b77 1850385