代码拉取完成,页面将自动刷新
import os
import bisect
import numpy as np
import pandas as pd
from collections import defaultdict
from sqlitedict import SqliteDict
def get_expertise_field(authors, limit=None):
emails = []
for author in authors:
name, email = author.split("<")
email = email[:-1].strip()
if email not in emails:
emails.append(email)
developer_field = defaultdict(lambda :[0, 0])
with SqliteDict("data/developer_expertise_field.sqlite") as info:
for email in emails:
cur_info = info.get(email, None)
if not cur_info:
continue
split = cur_info.split(";")
assert len(split) % 3 == 0
for j in range(0, len(split), 3):
developer_field[split[j]][0] += int(split[j + 1])
developer_field[split[j]][1] += float(split[j + 2])
ret = sorted(developer_field.items(), key=lambda x:x[1][1], reverse=True)
if limit:
ret = ret[:limit]
input_file = open("data/developer_expertise_field_cnt.txt")
lines = input_file.readlines()
input_file.close()
field_info_dict = {}
for line in lines:
split = line.strip().split("\t")
field_info_dict[split[0]] = (int(split[1]), float(split[2]))
field_percentile = {}
input_file = open("data/developer_expertise_field_percentile.txt")
lines = input_file.readlines()
input_file.close()
for line in lines:
splits = line.strip().split(";")
assert len(splits) == 2
field_percentile[splits[0]] = list(map(lambda x:float(x), splits[1].split()))
for i in range(len(ret)):
cur_percentile = round(bisect.bisect_left(field_percentile[ret[i][0]], ret[i][1][1]) * 100 / 1002.0, 2)
ret[i] = (ret[i][0], (int(ret[i][1][0]), float(ret[i][1][1]), field_info_dict[ret[i][0]][0], field_info_dict[ret[i][0]][1], cur_percentile))
return ret
if __name__ == '__main__':
print(get_expertise_field(['qinhanmin2014 <qinhanmin2005@sina.com>', 'Hanmin Qin <qinhanmin2005@sina.com>'], limit=10))
print(get_expertise_field(['Joel Nothman <joel.nothman@gmail.com>', 'jnothman <joel.nothman@gmail.com>'], limit=10))
print(get_expertise_field(['Thomas Fan <thomasjpfan@gmail.com>', 'Thomas J Fan <thomasjpfan@gmail.com>'], limit=10))
print(get_expertise_field(['adrin.jalali <adrin.jalali@gmail.com>', 'adrinjalali <adrin.jalali@gmail.com>', 'Adrin Jalali <adrin.jalali@gmail.com>', 'adrin jalali <adrin.jalali@gmail.com>', 'adrin <adrin.jalali@gmail.com>'], limit=10))
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。