1 Star 0 Fork 0

qinhanmin2005/woc-dashboard

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
get_author_projects.py 3.53 KB
一键复制 编辑 原始数据 按行查看 历史
qinhanmin2005 提交于 2021-05-20 22:52 . small bug
import os
import numpy as np
import pandas as pd
from sqlitedict import SqliteDict
def get_author_projects(authors, personal_limit=None, community_limit=None):
ret = {}
emails = []
for author in authors:
name, email = author.split("<")
email = email[:-1].strip()
if email not in emails:
emails.append(email)
for email in emails:
with SqliteDict('data/author_projects_db/author_projects_{}.sqlite'.format(hash(email) % 32)) as info:
cur_info = info.get(email, None)
if not cur_info:
continue
cur_info = cur_info.split(";")
assert len(cur_info) % 6 == 0
for i in range(0, len(cur_info), 6):
if cur_info[i] not in ret:
ret[cur_info[i]] = [int(cur_info[i + 1]), int(cur_info[i + 2]), int(cur_info[i + 3]), int(cur_info[i + 4]), int(cur_info[i + 5])]
else:
assert ret[cur_info[i]][0] == int(cur_info[i + 1])
assert ret[cur_info[i]][1] == int(cur_info[i + 2])
ret[cur_info[i]][2] += int(cur_info[i + 3])
assert ret[cur_info[i]][3] == int(cur_info[i + 4])
ret[cur_info[i]][4] = max(ret[cur_info[i]][4], int(cur_info[i + 5]))
personal_projects = {}
community_projects = {}
for k, v in ret.items():
if v[2] / float(v[1]) >= 0.9:
personal_projects[k] = v
else:
community_projects[k] = v
personal_projects = sorted(personal_projects.items(), key=lambda x: (-x[1][0], -x[1][1]))
community_projects = sorted(community_projects.items(), key=lambda x: (-x[1][0] * (x[1][2] / float(x[1][1])), -x[1][1]))
num_personal_projects = len(personal_projects)
num_community_projects = len(community_projects)
if personal_limit:
personal_projects = personal_projects[:personal_limit]
if community_limit:
community_projects = community_projects[:community_limit]
project_summary = SqliteDict("data/project_summary.sqlite")
for i in range(len(personal_projects)):
cur_summary = project_summary.get(personal_projects[i][0], None)
if cur_summary:
personal_projects[i][1].append(cur_summary)
for i in range(len(community_projects)):
cur_summary = project_summary.get(community_projects[i][0], None)
if cur_summary:
community_projects[i][1].append(project_summary[community_projects[i][0]])
return personal_projects, community_projects, num_personal_projects, num_community_projects
if __name__ == '__main__':
print(get_author_projects(['qinhanmin2014 <qinhanmin2005@sina.com>', 'Hanmin Qin <qinhanmin2005@sina.com>'], personal_limit=10, community_limit=10))
print(get_author_projects(['Joel Nothman <joel.nothman@gmail.com>', 'jnothman <joel.nothman@gmail.com>'], personal_limit=10, community_limit=10))
print(get_author_projects(['Thomas Fan <thomasjpfan@gmail.com>', 'Thomas J Fan <thomasjpfan@gmail.com>'], personal_limit=10, community_limit=10))
print(get_author_projects(['adrin.jalali <adrin.jalali@gmail.com>', 'adrinjalali <adrin.jalali@gmail.com>', 'Adrin Jalali <adrin.jalali@gmail.com>', 'adrin jalali <adrin.jalali@gmail.com>', 'adrin <adrin.jalali@gmail.com>'], personal_limit=10, community_limit=10))
print(get_author_projects(["Joel Nothman <joel@vlan-2666-10-17-16-173.staff.wireless.sydney.edu.au>", "Joel Nothman <joel.nothman@gmail.com>", "jnothman <joel.nothman@gmail.com>", "jnothman <jnothman@student.usyd.edu.au>", "Joel Nothman <jnothman@student.usyd.edu.au>"], personal_limit=10, community_limit=10))
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/qinhanmin/woc-dashboard.git
git@gitee.com:qinhanmin/woc-dashboard.git
qinhanmin
woc-dashboard
woc-dashboard
master

搜索帮助