1 Star 2 Fork 2

clw/LaTeX_OCR_PRO

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
build.py 1.47 KB
一键复制 编辑 原始数据 按行查看 历史
ジ兮尘 提交于 2019-07-20 17:09 . init
import click
from model.utils.data_generator import DataGenerator
from model.utils.text import build_vocab, write_vocab
from model.utils.image import build_images
from model.utils.general import Config
@click.command()
@click.option('--data', default="configs/data_small.json",
help='Path to data json config')
@click.option('--vocab', default="configs/vocab_small.json",
help='Path to vocab json config')
def main(data, vocab):
data_config = Config(data)
# datasets
train_set = DataGenerator(
path_formulas=data_config.path_formulas_train,
dir_images=data_config.dir_images_train,
path_matching=data_config.path_matching_train)
test_set = DataGenerator(
path_formulas=data_config.path_formulas_test,
dir_images=data_config.dir_images_test,
path_matching=data_config.path_matching_test)
val_set = DataGenerator(
path_formulas=data_config.path_formulas_val,
dir_images=data_config.dir_images_val,
path_matching=data_config.path_matching_val)
# produce images and matching files
train_set.build(buckets=data_config.buckets)
test_set.build(buckets=data_config.buckets)
val_set.build(buckets=data_config.buckets)
# vocab
vocab_config = Config(vocab)
vocab = build_vocab([train_set], min_count=vocab_config.min_count_tok)
write_vocab(vocab, vocab_config.path_vocab)
if __name__ == "__main__":
main()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/sazass/LaTeX_OCR_PRO.git
git@gitee.com:sazass/LaTeX_OCR_PRO.git
sazass
LaTeX_OCR_PRO
LaTeX_OCR_PRO
master

搜索帮助