master

分支 (1)

标签 (2)

管理

管理

master

1

1.0.0

txt_tag2billfish
/
test2.py

import os
import shutil
import threading
import time
import traceback
from concurrent.futures import ThreadPoolExecutor
import sqlite3


class DbController:
    def __init__(self, ):

        self.lock = threading.Lock()

    def get_file_id(self, conn, file_path):
        """Get the ID of a file in the SQLite database."""
        cursor = conn.cursor()
        folders = file_path.split(os.sep)

        if len(folders) > 1:

            cursor.execute("SELECT id FROM bf_folder WHERE name = ? AND pid =0", (folders[0],))
            folder_id = cursor.fetchone()[0]
            for folder_name in folders[1:-1]:
                cursor.execute("SELECT id FROM bf_folder WHERE name = ? AND pid = ?", (folder_name, folder_id))
                folder_id = cursor.fetchone()[0]
        else:
            folder_id = 0
        cursor.execute("SELECT id FROM bf_file WHERE name = ? AND pid = ?", (folders[-1], folder_id))

        file_id = cursor.fetchone()[0]
        print(file_id)
        return file_id

    def process_file(self, file_path, auto_tag_id, base_dir):
        conn = sqlite3.connect('billfish.db')
        cursor = conn.cursor()
        try:
            txt_file_name = os.path.basename(file_path)
            base_name, txt_file_ext = os.path.splitext(txt_file_name)
            if not txt_file_ext or txt_file_ext.lower() not in ('.txt',):
                # print(f'Skipping {file_path}, unsupported file type')
                return

            # 只对txt进行操作
            txt_file_path = file_path
            print(f'processing: {txt_file_path}')
            # Try to find an image file with the same name
            if base_name.startswith('.'):
                real_base_name = base_name[1:]
            else:
                real_base_name = base_name
            for ext in ('bmp', 'dib', 'rle', 'png', 'jpg', 'jpeg', 'jpe', 'jfif', 'gif', 'tif', 'tiff', 'ico', 'icon',
                        'webp', 'svg', 'cdr', 'psd', 'psb', 'ai', 'pdf'):
                image_file = real_base_name + '.' + ext
                image_path = os.path.join(os.path.dirname(txt_file_path), image_file)
                # print(image_path)
                if os.path.exists(image_path):
                    break
            else:
                print(f'Image file not found for {txt_file_path}')
                return

            # If the file name doesn't start with a '.', rename it with a '.' prefix
            if not txt_file_name.startswith('.'):
                new_file_name = '.' + txt_file_name
                os.rename(txt_file_path, os.path.join(os.path.dirname(txt_file_path), new_file_name))
                txt_file_path = os.path.join(os.path.dirname(txt_file_path), new_file_name)

            file_id = self.get_file_id(conn, os.path.relpath(image_path, base_dir))

            # Read tags from the text file
            if txt_file_ext.lower() == '.txt':
                with open(txt_file_path, encoding='utf-8') as f:
                    tag_str = f.read()
                tag_list = [tag.strip() for tag in tag_str.split(',')]
            else:
                tag_list = []
            with self.lock:
                # # Add "auto-generated" tag to the file
                # cursor.execute('INSERT INTO bf_tag_join_file (file_id, tag_id, born) VALUES (?, ?, ?)',
                #                (file_id, auto_tag_id, int(time.time())))

                # Add other tags to the file
                for tag_name in tag_list:
                    cursor.execute('SELECT id FROM bf_tag_v2 WHERE name = ?', (tag_name,))
                    res = cursor.fetchone()
                    if res:
                        tag_id = res[0]
                    else:
                        cursor.execute(
                            'INSERT INTO bf_tag_v2 (name, born, seq, icon, color, pid) VALUES (?, ?, ?, ?, ?, ?)',
                            (tag_name, int(time.time()), 0.5, 0, 0, auto_tag_id))
                        tag_id = cursor.lastrowid
                    cursor.execute('SELECT id FROM bf_tag_join_file WHERE file_id = ? and tag_id = ?',
                                   (file_id, tag_id))
                    res = cursor.fetchone()
                    if not res:
                        cursor.execute('INSERT INTO bf_tag_join_file (file_id, tag_id, born) VALUES (?, ?, ?)',
                                       (file_id, tag_id, int(time.time())))

                conn.commit()
            print(f'Finished processing {txt_file_path}, with tags: {tag_list}')
        except Exception as e:
            traceback.print_exc()
        finally:
            cursor.close()
            conn.close()

    def update_seq(self, conn, auto_tag_id):
        cursor = conn.cursor()
        # Get the count of files associated with each tag
        cursor.execute('''
            SELECT t.id, COUNT(f.id) AS file_count
            FROM bf_tag_v2 AS t
            LEFT JOIN bf_tag_join_file AS jf ON jf.tag_id = t.id
            LEFT JOIN bf_file AS f ON jf.file_id = f.id
            WHERE t.pid = ?
            GROUP BY t.id
        ''', (auto_tag_id,))
        rows = cursor.fetchall()

        # Sort rows by file_count in descending order
        rows_sorted = sorted(rows, key=lambda x: x[1], reverse=True)

        # Update the seq field for each row
        for i, row in enumerate(rows_sorted):
            tag_id = row[0]
            new_seq = ((i + 1) / len(rows_sorted))
            cursor.execute('UPDATE bf_tag_v2 SET seq = ? WHERE id = ?', (new_seq, tag_id))

        conn.commit()
        print('Seq field updated successfully.')

    def main(self,):

        # 备份数据库以防万一
        shutil.copy('billfish.db', 'billfish备份(若要还原删除这些后缀).db')
        # 连接数据库
        conn = sqlite3.connect('billfish.db')
        cursor = conn.cursor()
        # 判断有没有'Auto-generated'标签, 若没有, 则生成
        cursor.execute('SELECT id FROM bf_tag_v2 WHERE name = ?', ('Auto-generated',))
        res = cursor.fetchone()
        if not res:
            cursor.execute("INSERT INTO bf_tag_v2 (name, born, seq, icon, color, pid) VALUES (?, ?, ?, ?, ?, ?)",
                           ('Auto-generated', int(time.time()), 0.5, 0, 0, 0))
            auto_tag_id = cursor.lastrowid
            print(f'tag "Auto-generated" Not exist, generated by id={auto_tag_id}')
        else:
            auto_tag_id = res[0]
            print(f'tag "Auto-generated" exist,  id={auto_tag_id}')
        base_dir = os.path.dirname(os.path.abspath('.'))
        file_paths = [os.path.join(dp, f) for dp, dn, filenames in os.walk(base_dir) for f in filenames]
        # file_paths = ['D:\本地资源库\画师\\.001.txt']
        conn.commit()

        # 遍历根目录所有文件进行赋值tag操作
        with ThreadPoolExecutor(max_workers=10) as executor:
            for file_path in file_paths:
                executor.submit(self.process_file, file_path, auto_tag_id, base_dir)

        executor.shutdown(wait=True)

        # 为tag用有多到少进行排序
        self.update_seq(conn, auto_tag_id)

        cursor.close()
        conn.close()

        print('all done')


if __name__ == '__main__':

    do = DbController()
    do.main()
    # do.update_seq(conn, 14)