1 Star 0 Fork 18

Stephen.Gao/Community-document

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
translate.py 8.42 KB
一键复制 编辑 原始数据 按行查看 历史
chenchi 提交于 2023-04-19 21:37 . 恢复被github强制覆盖的内容
import requests
import random
from hashlib import md5
import time
import os
import sys
import logging
# 日志模块
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# 创建一个handler,用于写入日志文件
rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))
log_path = os.path.dirname(os.path.abspath(__file__)) + '/logs/'
log_name = log_path + rq + '.log'
logfile = log_name
fh = logging.FileHandler(logfile, mode='w')
formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
fh.setFormatter(formatter)
logger.addHandler(fh)
# TODO QuecPython 翻译成Quecpthon了 已解决
# TODO 图片文件不翻译 已解决
# TODO 代码的首字母被大写了 已解决
# TODO 括号被汉化了 已解决
# TODO * * 之间多了个空格 无法正常加粗 已解决
# TODO 对表格处理 翻译后缺失|
# TODO yaml文件也需要补全处理 已解决
# TODO 注释多了个空格 <!-- * --> 已解决
# TODO 目录只翻译前半段 已解决
# Set your own appid/appkey.
appid = '20210722000894813'
appkey = 'QBqv9vS1CIgKSj_foFed'
# For list of language codes, please refer to `https://api.fanyi.baidu.com/doc/21`
# from_lang = 'auto'
from_lang = 'zh'
to_lang = 'en'
endpoint = 'http://api.fanyi.baidu.com'
path = '/api/trans/vip/translate'
url = endpoint + path
# 删除源文档
def delete_txt(file_path):
if os.path.exists(file_path):
try:
os.remove(file_path)
return True
except Exception as e:
info = sys.exc_info()
print("remove file error.", e)
print(info[0], info[1])
return False
else:
return True
# 写文档
def write_txt(file_path, content):
with open(file_path, 'a+', encoding='utf-8') as f:
f.write(content)
# 判断是否有中文字符
def check_contain_chinese(check_str):
for ch in check_str.decode('utf-8'):
if u'\u4e00' <= ch <= u'\u9fff':
return True
return False
# 用于保持和原文一样的格式(only for code-)
def space_str_handle(space_str):
# count = ''
# for i in space_str:
# if i == '#':
# count += ' '
# continue
# else:
# print('count:', len(count))
# return [count, space_str]
space_str_list = space_str.split('#', 1)
return space_str_list
# 用于保持和原文一样的格式 for yaml
def space_yaml_handle(space_str):
count = ''
for i in space_str:
if i == ' ':
count += ' '
continue
else:
# print('count:', len(count))
return [count, space_str]
# 用于处理路径翻译部分(路径不能翻译,翻译会多很多空格)
def space_path_handle(space_str: str) -> list:
if space_str.find('](') != -1:
print(space_str)
index = space_str.index(']')
return [space_str[:index+1], space_str[index+1:]]
else:
return [space_str, '']
# 获取英语翻译-> 百度翻译
def get_english(words: str) -> str:
"""
功能: 源语言(自动识别)=> 英语
传入翻译文字,自动识别,翻译后返回一个列表,每个元素为一段落
"""
# Set your own appid/appkey.
query = words
# Generate salt and sign
def make_md5(s, encoding='utf-8'):
return md5(s.encode(encoding)).hexdigest()
salt = random.randint(32768, 65536)
sign = make_md5(appid + query + str(salt) + appkey)
# Build request
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
payload = {'appid': appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
# Send request
time.sleep(2)
try:
r = requests.post(url, params=payload, headers=headers)
result = r.json()
print(result)
trans = result['trans_result']
ret = ''
for n in range(len(trans)):
ret += trans[n]['dst']
# 解决专用名词翻译问题
ret = ret.replace('Quecpthon', 'QuecPython')
except Exception as e:
ret = query
print(e)
logger.info('翻译失败的大哥:' + ret)
logger.info('翻译失败的原因:' + str(e))
return ret
# 从文件中取翻译内容
def main(src_path):
out_path = src_path.replace('zh', 'en')
code_flag = False
for line in open(src_path, 'r', encoding='utf-8'):
line = line.strip('\n')
if len(line) < 1: # 该行仅仅是换行
write_txt(out_path, "\n")
else:
# 图片文件路径不翻译(翻译后找不到文件了)
if line.startswith('!['):
write_txt(out_path, line + "\n")
continue
if line == '```python':
code_flag = True
if line == '```':
code_flag = False
if check_contain_chinese(line.encode('utf-8')):
# print(code_flag)
if code_flag:
line = space_str_handle(line)
if len(line) == 2:
print('待翻译内容:', line[1])
data = get_english(line[1])
write_txt(out_path, line[0] + '#' + data + "\n") # 百度API 方法
else:
print('待翻译内容:', line[0])
data = get_english(line[0])
write_txt(out_path, data + "\n") # 百度API 方法
else:
if line[0] == " ": # 不是目录
line = space_yaml_handle(line)
print('待翻译内容:', line[1])
data = get_english(line[1])
write_txt(out_path, line[0] + tran_handler(data) + "\n")
else:
line = space_path_handle(line)
print('待翻译内容:', line[0])
data = get_english(line[0])
write_txt(out_path, tran_handler(data) + line[1] + "\n")
else:
write_txt(out_path, line + "\n")
# 处理md格式问题 ####后面加空格
def tran_handler(hanle_str):
hanle_str = hanle_str.replace('* * *', '**') # 去除翻译过程中产生多余的空格(有时候会多一个)
hanle_str = hanle_str.replace('* *', '**') # 去除翻译过程中产生多余的空格
hanle_str = hanle_str.replace('- >', '-->') # 补全翻译过程缺失的内容
hanle_str = hanle_str.replace('-Label', '- label') # 补全翻译过程缺失的内容
# hanle_str = hanle_str.replace('] (', '](') # 补全翻译过程缺失的内容
if hanle_str.startswith('#'):
count = 0
for j in hanle_str:
if j == "#":
count += 1
continue
else:
break
str_list = list(hanle_str)
str_list.insert(count, " ")
return ''.join(str_list)
else:
return hanle_str
if __name__ == '__main__':
# 单个文件
root_list = 'E:\\teedoc_wiki\\新版文档中心\\Community-document\\docs\\sbs\\zh\\README.md'
delete_txt(root_list.replace('zh', 'en')) # 删除原文件
main(root_list)
# 单层目录
# root_list = 'E:\\teedoc_wiki\\新版文档中心\\Community-document\\docs\\FAQ\\zh'
# for root, dirs, files in os.walk(root_list, topdown=False):
# for name in files:
# if name.split(".")[-1] in ('md', 'yaml'):
# # print(os.path.join(root, name))
# delete_txt(os.path.join(root, name).replace('zh', 'en')) # 删除原文件
# main(os.path.join(root, name))
# time.sleep(5)
# 整个项目
# root_path = 'E:\\teedoc_wiki\\新版文档中心\\Community-document\\docs\\'
# root_list = os.listdir(root_path)
# for i in root_list:
# print(os.path.join(root_path, i))
# for root, dirs, files in os.walk(os.path.join(root_path, i+'\\zh'), topdown=False):
# for name in files:
# if name.split(".")[-1] in ('md', 'yaml'):
# delete_txt(os.path.join(root, name).replace('zh', 'en')) # 删除原文件
# main(os.path.join(root, name))
# time.sleep(5)
# 传参
# if len(sys.argv) < 2:
# print('No input file provided')
# exit()
# if sys.argv[1] == 'all':
# # 翻译全部
# pass
# else:
# # 单个文件
# pass
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/stephen-gao/Community-document.git
git@gitee.com:stephen-gao/Community-document.git
stephen-gao
Community-document
Community-document
develop

搜索帮助