1 Star 0 Fork 0

qinhanmin2005/woc-dashboard

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
get_expertise_field.py 2.49 KB
一键复制 编辑 原始数据 按行查看 历史
qinhanmin2005 提交于 2021-05-19 21:27 . 代码重构
import os
import bisect
import numpy as np
import pandas as pd
from collections import defaultdict
from sqlitedict import SqliteDict
def get_expertise_field(authors, limit=None):
emails = []
for author in authors:
name, email = author.split("<")
email = email[:-1].strip()
if email not in emails:
emails.append(email)
developer_field = defaultdict(lambda :[0, 0])
with SqliteDict("data/developer_expertise_field.sqlite") as info:
for email in emails:
cur_info = info.get(email, None)
if not cur_info:
continue
split = cur_info.split(";")
assert len(split) % 3 == 0
for j in range(0, len(split), 3):
developer_field[split[j]][0] += int(split[j + 1])
developer_field[split[j]][1] += float(split[j + 2])
ret = sorted(developer_field.items(), key=lambda x:x[1][1], reverse=True)
if limit:
ret = ret[:limit]
input_file = open("data/developer_expertise_field_cnt.txt")
lines = input_file.readlines()
input_file.close()
field_info_dict = {}
for line in lines:
split = line.strip().split("\t")
field_info_dict[split[0]] = (int(split[1]), float(split[2]))
field_percentile = {}
input_file = open("data/developer_expertise_field_percentile.txt")
lines = input_file.readlines()
input_file.close()
for line in lines:
splits = line.strip().split(";")
assert len(splits) == 2
field_percentile[splits[0]] = list(map(lambda x:float(x), splits[1].split()))
for i in range(len(ret)):
cur_percentile = round(bisect.bisect_left(field_percentile[ret[i][0]], ret[i][1][1]) * 100 / 1002.0, 2)
ret[i] = (ret[i][0], (int(ret[i][1][0]), float(ret[i][1][1]), field_info_dict[ret[i][0]][0], field_info_dict[ret[i][0]][1], cur_percentile))
return ret
if __name__ == '__main__':
print(get_expertise_field(['qinhanmin2014 <qinhanmin2005@sina.com>', 'Hanmin Qin <qinhanmin2005@sina.com>'], limit=10))
print(get_expertise_field(['Joel Nothman <joel.nothman@gmail.com>', 'jnothman <joel.nothman@gmail.com>'], limit=10))
print(get_expertise_field(['Thomas Fan <thomasjpfan@gmail.com>', 'Thomas J Fan <thomasjpfan@gmail.com>'], limit=10))
print(get_expertise_field(['adrin.jalali <adrin.jalali@gmail.com>', 'adrinjalali <adrin.jalali@gmail.com>', 'Adrin Jalali <adrin.jalali@gmail.com>', 'adrin jalali <adrin.jalali@gmail.com>', 'adrin <adrin.jalali@gmail.com>'], limit=10))
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/qinhanmin/woc-dashboard.git
git@gitee.com:qinhanmin/woc-dashboard.git
qinhanmin
woc-dashboard
woc-dashboard
master

搜索帮助