1 Star 0 Fork 8

郭金朋/ 基于spark大数据的音乐推荐

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
main.py 3.39 KB
一键复制 编辑 原始数据 按行查看 历史
Huang_Daxian 提交于 2021-07-14 07:44 . 初始化提交所有代码
from pyspark.ml.recommendation import ALS
from pyspark.ml.recommendation import ALSModel
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit
import json
#if __name__ == "main":
def GenreList(sc, spark, trainData):
genre_list = trainData.groupBy('artist').count().orderBy('count',
ascending = False).rdd.map(lambda v: v.artist).take(10)
return genre_list
list = GenreList(sc,spark,trainData)
#分析总收听人数前十的类型的艺术家
def GenYearSales(sc, spark, artistByID, list):
#过滤出类型为总销量前五的专辑,将相同类型、相同年份的专辑的销量相加,并进行排序。
sss = trainData.groupBy('artist').count().orderBy('count',ascending = False).take(10)
for j in range(len(sss)):
i = sss[j]
print(i.artist)
name = artistByID.filter(artistByID['artist'] == i.artist).select('name').collect()[0]
i = i.asDict()
print(name)
i.update({'artist':name.name })
print(i)
sss[j] = i
f = open('/usr/local/spark/test/code/static/data/genre-year-sales.json', 'w')
f.write(json.dumps(sss))
f.close()
artist_list = trainData.groupBy('artist').sum("count").orderBy('sum(count)',ascending = False).take(10)
def genreSales(sc, spark, artistByID,artist_list):
for j in range(len(artist_list)):
i = artist_list[j]
#print(i.artist)
name = artistByID.filter(artistByID['artist'] == i.artist).select('name').collect()[0]
i = i.asDict()
#print(name)
i.update({'artist':name.name })
print(i)
artist_list[j] = i
f = open('/usr/local/spark/test/code/static/data/genre-sales.json', 'w')
f.write(json.dumps(artist_list))
f.close()
def makeRecommendations(model,userID,number):
toRecommend = modelnew.itemFactors.selectExpr("id as artist").withColumn("user",lit(userID))
toRecommend2 = toRecommend.withColumn("artist",toRecommend['artist'].cast("Int")).withColumn("user",toRecommend['user'].cast('Int'))
toRecommend2.printSchema()
www = modelnew.transform(toRecommend2).select("artist","prediction").orderBy('prediction',ascending = False).take(10)
return www
def artistPredict(userID):
recommend = makeRecommendations(modelnew,userID,10)
www = recommend
for j in range(len(www)):
i = www[j]
#print(i.artist)
name = artistByID.filter(artistByID['artist'] == str(i.artist)).select('name').collect()[0]
i = i.asDict()
#print(name)
i.update({'name':name.name })
www[j] = i
f = open('/usr/local/spark/test/code/static/data/predict.json', 'w')
f.write(json.dumps(www))
f.close()
if __name__ == "__main__":
sc = SparkContext('local','test')
sc.setLogLevel("WARN")
spark = SparkSession.builder.getOrCreate()
modelnew = ALSModel.load("/usr/local/spark/Model/modelnew")
artistByID = spark.read.csv("/usr/local/spark/Model/artistByID").toDF("artist","name")
trainData = spark.read.csv("/usr/local/spark/Model/trainData").toDF("user","artist","count")
trainData.cache()
artistByID.cache()
trainData= trainData.withColumn('count',trainData['count'].cast('int'))
trainData.printSchema()
genreSales(sc, spark, artistByID,artist_list)
GenYearSales(sc, spark, artistByID, list)
artistPredict(1000112)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Scala
1
https://gitee.com/groining/bigdata.git
git@gitee.com:groining/bigdata.git
groining
bigdata
基于spark大数据的音乐推荐
master

搜索帮助