1 Star 5 Fork 1

kikouousya/txt_tag2billfish

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
test2.py 7.11 KB
一键复制 编辑 原始数据 按行查看 历史
kikouousya 提交于 2023-03-30 01:20 . first commit
import os
import shutil
import threading
import time
import traceback
from concurrent.futures import ThreadPoolExecutor
import sqlite3
class DbController:
def __init__(self, ):
self.lock = threading.Lock()
def get_file_id(self, conn, file_path):
"""Get the ID of a file in the SQLite database."""
cursor = conn.cursor()
folders = file_path.split(os.sep)
if len(folders) > 1:
cursor.execute("SELECT id FROM bf_folder WHERE name = ? AND pid =0", (folders[0],))
folder_id = cursor.fetchone()[0]
for folder_name in folders[1:-1]:
cursor.execute("SELECT id FROM bf_folder WHERE name = ? AND pid = ?", (folder_name, folder_id))
folder_id = cursor.fetchone()[0]
else:
folder_id = 0
cursor.execute("SELECT id FROM bf_file WHERE name = ? AND pid = ?", (folders[-1], folder_id))
file_id = cursor.fetchone()[0]
print(file_id)
return file_id
def process_file(self, file_path, auto_tag_id, base_dir):
conn = sqlite3.connect('billfish.db')
cursor = conn.cursor()
try:
txt_file_name = os.path.basename(file_path)
base_name, txt_file_ext = os.path.splitext(txt_file_name)
if not txt_file_ext or txt_file_ext.lower() not in ('.txt',):
# print(f'Skipping {file_path}, unsupported file type')
return
# 只对txt进行操作
txt_file_path = file_path
print(f'processing: {txt_file_path}')
# Try to find an image file with the same name
if base_name.startswith('.'):
real_base_name = base_name[1:]
else:
real_base_name = base_name
for ext in ('bmp', 'dib', 'rle', 'png', 'jpg', 'jpeg', 'jpe', 'jfif', 'gif', 'tif', 'tiff', 'ico', 'icon',
'webp', 'svg', 'cdr', 'psd', 'psb', 'ai', 'pdf'):
image_file = real_base_name + '.' + ext
image_path = os.path.join(os.path.dirname(txt_file_path), image_file)
# print(image_path)
if os.path.exists(image_path):
break
else:
print(f'Image file not found for {txt_file_path}')
return
# If the file name doesn't start with a '.', rename it with a '.' prefix
if not txt_file_name.startswith('.'):
new_file_name = '.' + txt_file_name
os.rename(txt_file_path, os.path.join(os.path.dirname(txt_file_path), new_file_name))
txt_file_path = os.path.join(os.path.dirname(txt_file_path), new_file_name)
file_id = self.get_file_id(conn, os.path.relpath(image_path, base_dir))
# Read tags from the text file
if txt_file_ext.lower() == '.txt':
with open(txt_file_path, encoding='utf-8') as f:
tag_str = f.read()
tag_list = [tag.strip() for tag in tag_str.split(',')]
else:
tag_list = []
with self.lock:
# # Add "auto-generated" tag to the file
# cursor.execute('INSERT INTO bf_tag_join_file (file_id, tag_id, born) VALUES (?, ?, ?)',
# (file_id, auto_tag_id, int(time.time())))
# Add other tags to the file
for tag_name in tag_list:
cursor.execute('SELECT id FROM bf_tag_v2 WHERE name = ?', (tag_name,))
res = cursor.fetchone()
if res:
tag_id = res[0]
else:
cursor.execute(
'INSERT INTO bf_tag_v2 (name, born, seq, icon, color, pid) VALUES (?, ?, ?, ?, ?, ?)',
(tag_name, int(time.time()), 0.5, 0, 0, auto_tag_id))
tag_id = cursor.lastrowid
cursor.execute('SELECT id FROM bf_tag_join_file WHERE file_id = ? and tag_id = ?',
(file_id, tag_id))
res = cursor.fetchone()
if not res:
cursor.execute('INSERT INTO bf_tag_join_file (file_id, tag_id, born) VALUES (?, ?, ?)',
(file_id, tag_id, int(time.time())))
conn.commit()
print(f'Finished processing {txt_file_path}, with tags: {tag_list}')
except Exception as e:
traceback.print_exc()
finally:
cursor.close()
conn.close()
def update_seq(self, conn, auto_tag_id):
cursor = conn.cursor()
# Get the count of files associated with each tag
cursor.execute('''
SELECT t.id, COUNT(f.id) AS file_count
FROM bf_tag_v2 AS t
LEFT JOIN bf_tag_join_file AS jf ON jf.tag_id = t.id
LEFT JOIN bf_file AS f ON jf.file_id = f.id
WHERE t.pid = ?
GROUP BY t.id
''', (auto_tag_id,))
rows = cursor.fetchall()
# Sort rows by file_count in descending order
rows_sorted = sorted(rows, key=lambda x: x[1], reverse=True)
# Update the seq field for each row
for i, row in enumerate(rows_sorted):
tag_id = row[0]
new_seq = ((i + 1) / len(rows_sorted))
cursor.execute('UPDATE bf_tag_v2 SET seq = ? WHERE id = ?', (new_seq, tag_id))
conn.commit()
print('Seq field updated successfully.')
def main(self,):
# 备份数据库以防万一
shutil.copy('billfish.db', 'billfish备份(若要还原删除这些后缀).db')
# 连接数据库
conn = sqlite3.connect('billfish.db')
cursor = conn.cursor()
# 判断有没有'Auto-generated'标签, 若没有, 则生成
cursor.execute('SELECT id FROM bf_tag_v2 WHERE name = ?', ('Auto-generated',))
res = cursor.fetchone()
if not res:
cursor.execute("INSERT INTO bf_tag_v2 (name, born, seq, icon, color, pid) VALUES (?, ?, ?, ?, ?, ?)",
('Auto-generated', int(time.time()), 0.5, 0, 0, 0))
auto_tag_id = cursor.lastrowid
print(f'tag "Auto-generated" Not exist, generated by id={auto_tag_id}')
else:
auto_tag_id = res[0]
print(f'tag "Auto-generated" exist, id={auto_tag_id}')
base_dir = os.path.dirname(os.path.abspath('.'))
file_paths = [os.path.join(dp, f) for dp, dn, filenames in os.walk(base_dir) for f in filenames]
# file_paths = ['D:\本地资源库\画师\\.001.txt']
conn.commit()
# 遍历根目录所有文件进行赋值tag操作
with ThreadPoolExecutor(max_workers=10) as executor:
for file_path in file_paths:
executor.submit(self.process_file, file_path, auto_tag_id, base_dir)
executor.shutdown(wait=True)
# 为tag用有多到少进行排序
self.update_seq(conn, auto_tag_id)
cursor.close()
conn.close()
print('all done')
if __name__ == '__main__':
do = DbController()
do.main()
# do.update_seq(conn, 14)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/kikouousya/txt_tag2billfish.git
git@gitee.com:kikouousya/txt_tag2billfish.git
kikouousya
txt_tag2billfish
txt_tag2billfish
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385