1 Star 0 Fork 0

木翊/LabelHandle

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
label_handle.py 9.11 KB
一键复制 编辑 原始数据 按行查看 历史
木翊 提交于 2021-12-20 17:27 . Add find files with target tag function.
import os
import re
import sys
# 预设标签
RULE = r"(?ms)(\n^\s<object>\n\s*?<name>)({})(</name>.*?</object>$)" # 标签字符串匹配规则
TYPES = {
"TYPE_ANY": ".*?", # 匹配所有标签
"TYPE_NUM": "\d*?", # 匹配所有数字
"TYPE_CN": "[\u4e00-\u9fa5]*?", # 匹配所有汉字
"TYPE_EN": "[a-zA-Z]*?", # 匹配所有英文
}
def get_rule(labelortype):
return RULE.format(TYPES.get(labelortype, labelortype))
def usage():
print("用于批量处理xml格式的图像标注标签,使用方式如下:\n"
"python label_handle.py <命令>\n"
" <命令>,不区分大小写:\n"
" 查看帮助 :-h 或 help\n"
" 交互模式 :-i 或 interact\n"
" 查看标签 :-l 或 list\n"
" 删除标签 :-d 或 del\n"
" 替换标签 :-c 或 change\n"
" 大小写转换:-s 或 swap\n"
" 获取含指定标签的文件: -f 或 find\n"
" \n"
" -l <标签目录>\n"
" \n"
" -d <标签目录> <输出目录> <标签1> [标签2] [...]\n"
" \n"
" -c <标签目录> <输出目录> <修改标签> <替换标签>\n"
" \n"
" -s <标签目录> <输出目录> <标签1> [标签2] [...]\n"
" \n"
" -f <标签目录> <标签1> [标签2] [...]\n"
" \n"
" <标签>输入`TYPE_NUM`可以表示所有纯数字标签\n"
" 输入`TYPE_CN`可以表示所有纯汉字标签\n"
" 输入`TYPE_EN`可以表示所有纯英文标签\n"
" \n"
" <输出目录>输入`-`表示直接修改源文件\n"
)
sys.exit()
def path_handle(data_path, output_path):
# 判断路径合法性和是否存在
# 输出路径不存在则创建
assert os.path.exists(data_path)
assert os.path.exists(output_path)
def interact():
# 提示信息
print("首先根据提示输入路径,之后可以运行一下命令:\n"
" list:查看所有标签\n"
" change <修改标签> <替换标签>:替换标签\n"
" del <标签1> [标签2] [...]:删除指定标签\n"
" swap <标签1> [标签2] [...]:反转指定标签大小写\n"
" exit:退出\n"
)
# 设置路径
data_path = input("输入标签文件目录:")
output_path = input("输入输出文件目录,留空或输入`-`表示直接修改源文件:")
if not output_path or output_path=="-": output_path = data_path
path_handle(data_path, output_path)
# 运行交互模式
while True:
cmd = input(">> ").split(" ")
if len(cmd) < 0:
continue
cmd_type = cmd[0].lower()
if cmd_type in ["-l", "list"]:
# 查看所有标签
list_label(data_path, output_path, *cmd[1:])
elif cmd_type in ["-c", "change"]:
# 替换标签
change_label(data_path, output_path)
elif cmd_type in ["-d", "del"]:
# 删除标签
del_label(data_path, output_path, *cmd[1:])
elif cmd_type in ["-s", "swap"]:
# 交换大小写
swap_label(data_path, output_path, *cmd[1:])
elif cmd_type in ["-f", "find"]:
# 获取含指定标签的文件
search_file_with_label(data_path, *cmd[1:])
elif cmd_type in ["-e", "exit"]:
# 推出
exit()
else:
print("命令无效!")
def list_label(data_path):
# 获取文件列表
file_list = os.listdir(data_path)
# 遍历文件
label_dict = {}
label_cnt = 0
file_cnt = 0
for file in file_list:
# 筛选文件
if file.split(".")[-1] != "xml": # 跳过非xml文件
continue
file_cnt += 1
file_path = os.path.join(data_path, file)
# 读文件
with open(file_path, 'r', encoding="utf-8") as f:
match = re.findall(get_rule("TYPE_ANY"), f.read())
if match: # 如果文件中匹配到标签
for lab in match:
label_cnt += 1
label_dict[lab[1]] = label_dict.get(lab[1], 0) + 1
else:
# print(f"{file_path}中没有标签")
pass
# # 输出
# print(f"共计文件{file_cnt}个,标签{label_cnt}个")
# print("标签有:", end="")
# for l in sorted(label_dict):
# print(f"{l}:{label_dict[l]}", end=" ")
# print("")
return label_dict
def change_label(data_path, output_path, change_from, change_to):
# 获取文件列表
file_list = os.listdir(data_path)
if output_path == "-":
output_path = data_path
# 遍历文件
for file in file_list:
if file.split(".")[-1] != "xml": # 跳过非xml文件
continue
file_path = os.path.join(data_path, file)
outfile_path = os.path.join(output_path, file)
# 读文件
with open(file_path, 'r', encoding="utf-8") as f:
rst = f.read()
fun = lambda matchobj: matchobj.group(1) + change_to + matchobj.group(3)
rst = re.sub(get_rule(change_from), fun, rst)
# 写文件
with open(outfile_path, 'w', encoding="utf-8") as f:
f.write(rst)
# 提示
print("修改完成")
def del_label(data_path, output_path, *labels):
# 获取文件列表
file_list = os.listdir(data_path)
if output_path == "-":
output_path = data_path
if not os.path.exists(output_path):
os.mkdir(output_path)
# 遍历文件
for file in file_list:
if file.split(".")[-1] != "xml": # 跳过非xml文件
continue
file_path = os.path.join(data_path, file)
outfile_path = os.path.join(output_path, file)
# 读文件
with open(file_path, 'r', encoding="utf-8") as f:
rst = f.read()
# 删除指定标签
for label in labels:
rst = re.sub(get_rule(label), "", rst)
# 判断是否还有标签
match = re.findall(get_rule("TYPE_ANY"), rst)
if match:
# 写文件
with open(outfile_path, 'w', encoding="utf-8") as f:
f.write(rst)
elif os.path.exists(outfile_path):
os.remove(outfile_path)
# 提示
print("修改完成")
def swap_label(data_path, output_path, *labels):
# 获取文件列表
file_list = os.listdir(data_path)
if output_path == "-":
output_path = data_path
# 遍历文件
for file in file_list:
# 筛选和处理文件路径
if file.split(".")[-1] != "xml": # 跳过非xml文件
continue
file_path = os.path.join(data_path, file)
outfile_path = os.path.join(output_path, file)
# 读文件
with open(file_path, 'r', encoding="utf-8") as f:
rst = f.read()
# 处理标签
for label in labels:
# 处理预设类型
fun = lambda matchobj: matchobj.group(1) + \
matchobj.group(2).swapcase() + matchobj.group(3)
rst = re.sub(get_rule(label), fun, rst)
# 写文件
with open(outfile_path, 'w', encoding="utf-8") as f:
f.write(rst)
# 提示
print("修改完成")
def search_file_with_label(data_path, *labels):
'''
获取含有指定标签的文件列表
'''
# 获取文件列表
file_list = os.listdir(data_path)
if not labels:
match_file_list = file_list
return
match_file_list = []
# 遍历文件
for file in file_list:
if file.split(".")[-1] != "xml": # 跳过非xml文件
continue
file_path = os.path.join(data_path, file)
# 读文件
with open(file_path, 'r', encoding="utf-8") as f:
rst = f.read()
for label in labels:
match = re.findall(get_rule(label), rst)
if match:
match_file_list.append(file)
break
# print(match_file_list)
return match_file_list
if __name__ == "__main__":
# 处理参数,进行处理
if len(sys.argv) < 2:
# 交互模式
interact()
cmd = sys.argv[1].lower()
if cmd in ["-h", "help"]:
usage()
elif cmd in ["-i", "interact"]:
# 交互模式
interact()
elif cmd in ["-l", "list"]:
# 查看所有标签
list_label(sys.argv[2])
elif cmd in ["-c", "change"]:
# 替换标签
change_label(*sys.argv[2:])
elif cmd in ["-d", "delete"]:
# 删除标签
del_label(*sys.argv[2:])
elif cmd in ["-s", "swap"]:
# 交换大小写
swap_label(*sys.argv[2:])
elif cmd in ["-f", "find"]:
# 获取含指定标签的文件
search_file_with_label(*sys.argv[2:])
else:
usage()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/MuYiii/LabelHandle.git
git@gitee.com:MuYiii/LabelHandle.git
MuYiii
LabelHandle
LabelHandle
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385