代码拉取完成,页面将自动刷新
import os
import numpy as np
import pandas as pd
from sqlitedict import SqliteDict
def get_author_projects(authors, personal_limit=None, community_limit=None):
ret = {}
emails = []
for author in authors:
name, email = author.split("<")
email = email[:-1].strip()
if email not in emails:
emails.append(email)
for email in emails:
with SqliteDict('data/author_projects_db/author_projects_{}.sqlite'.format(hash(email) % 32)) as info:
cur_info = info.get(email, None)
if not cur_info:
continue
cur_info = cur_info.split(";")
assert len(cur_info) % 6 == 0
for i in range(0, len(cur_info), 6):
if cur_info[i] not in ret:
ret[cur_info[i]] = [int(cur_info[i + 1]), int(cur_info[i + 2]), int(cur_info[i + 3]), int(cur_info[i + 4]), int(cur_info[i + 5])]
else:
assert ret[cur_info[i]][0] == int(cur_info[i + 1])
assert ret[cur_info[i]][1] == int(cur_info[i + 2])
ret[cur_info[i]][2] += int(cur_info[i + 3])
assert ret[cur_info[i]][3] == int(cur_info[i + 4])
ret[cur_info[i]][4] = max(ret[cur_info[i]][4], int(cur_info[i + 5]))
personal_projects = {}
community_projects = {}
for k, v in ret.items():
if v[2] / float(v[1]) >= 0.9:
personal_projects[k] = v
else:
community_projects[k] = v
personal_projects = sorted(personal_projects.items(), key=lambda x: (-x[1][0], -x[1][1]))
community_projects = sorted(community_projects.items(), key=lambda x: (-x[1][0] * (x[1][2] / float(x[1][1])), -x[1][1]))
num_personal_projects = len(personal_projects)
num_community_projects = len(community_projects)
if personal_limit:
personal_projects = personal_projects[:personal_limit]
if community_limit:
community_projects = community_projects[:community_limit]
project_summary = SqliteDict("data/project_summary.sqlite")
for i in range(len(personal_projects)):
cur_summary = project_summary.get(personal_projects[i][0], None)
if cur_summary:
personal_projects[i][1].append(cur_summary)
for i in range(len(community_projects)):
cur_summary = project_summary.get(community_projects[i][0], None)
if cur_summary:
community_projects[i][1].append(project_summary[community_projects[i][0]])
return personal_projects, community_projects, num_personal_projects, num_community_projects
if __name__ == '__main__':
print(get_author_projects(['qinhanmin2014 <qinhanmin2005@sina.com>', 'Hanmin Qin <qinhanmin2005@sina.com>'], personal_limit=10, community_limit=10))
print(get_author_projects(['Joel Nothman <joel.nothman@gmail.com>', 'jnothman <joel.nothman@gmail.com>'], personal_limit=10, community_limit=10))
print(get_author_projects(['Thomas Fan <thomasjpfan@gmail.com>', 'Thomas J Fan <thomasjpfan@gmail.com>'], personal_limit=10, community_limit=10))
print(get_author_projects(['adrin.jalali <adrin.jalali@gmail.com>', 'adrinjalali <adrin.jalali@gmail.com>', 'Adrin Jalali <adrin.jalali@gmail.com>', 'adrin jalali <adrin.jalali@gmail.com>', 'adrin <adrin.jalali@gmail.com>'], personal_limit=10, community_limit=10))
print(get_author_projects(["Joel Nothman <joel@vlan-2666-10-17-16-173.staff.wireless.sydney.edu.au>", "Joel Nothman <joel.nothman@gmail.com>", "jnothman <joel.nothman@gmail.com>", "jnothman <jnothman@student.usyd.edu.au>", "Joel Nothman <jnothman@student.usyd.edu.au>"], personal_limit=10, community_limit=10))
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。