1 Star 0 Fork 0

yangxin/SubCharTokenization

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
test.sh 3.15 KB
一键复制 编辑 原始数据 按行查看 历史
NoviScl 提交于 2021-12-22 21:11 . push
#!/bin/bash
mode=${mode:-"train train"}
echo $mode
exit
set -e
echo "Container nvidia build = " $NVIDIA_BUILD_ID
# Model
init_checkpoint=${init_checkpoint:-"results/checkpoints_raw_zh/ckpt_8601.pt"}
config_file=${config_file:-"configs/bert_config_vocab30k.json"}
vocab_file=${vocab_file:-"tokenizers/sp_raw_zh_30k.vocab"}
vocab_model_file=${vocab_model_file:-"tokenizers/sp_raw_zh_30k.model"}
tokenizer_type=${tokenizer_type:-"RawZh"}
# init_checkpoint=${init_checkpoint:-"results/checkpoints_bert_zh_22675/ckpt_8601.pt"}
# config_file=${config_file:-"configs/bert_config_vocab22675.json"}
# vocab_file=${vocab_file:-"tokenizers/bert_chinese_uncased_22675.vocab"}
# vocab_model_file=${vocab_model_file:-"tokenizers/bert_chinese_uncased_22675.model"}
# tokenizer_type=${tokenizer_type:-"BertZh"}
# Dataset
task_name=${task_name:-"tnews"}
data_dir=${data_dir:-"datasets/$task_name/split"}
train_dir=${train_dir:-"datasets/$task_name/split"}
dev_dir=${dev_dir:-"datasets/$task_name/split"}
test_dir=${test_dir:-"datasets/$task_name/split"}
seed=${seed:-"2"}
out_dir=${out_dir:-"logs/${task_name}/wubi_zh"}
# mode=${mode:-"prediction"}
mode=${mode:-"test"}
num_gpu=${num_gpu:-"8"}
# Hyperparameters
epochs=${epochs:-"6"}
max_steps=${13:-"-1.0"}
batch_size=${batch_size:-"32"}
gradient_accumulation_steps=${gradient_accumulation_steps:-"2"}
learning_rate=${10:-"2e-5"}
warmup_proportion=${11:-"0.1"}
max_seq_length=${max_seq_length:-128}
fewshot=${fewshot:-1}
two_level_embeddings=${two_level_embeddings:-"0"}
# precision=${14:-"fp16"}
echo "mode = $mode"
mkdir -p $out_dir
mkdir -p "$out_dir/$seed"
if [ "$mode" = "eval" ] ; then
num_gpu=1
fi
use_fp16=""
if [ "$precision" = "fp16" ] ; then
echo "fp16 activated!"
use_fp16="--fp16"
fi
if [ "$num_gpu" = "1" ] ; then
export CUDA_VISIBLE_DEVICES=0
mpi_command=""
else
unset CUDA_VISIBLE_DEVICES
mpi_command=" -m torch.distributed.launch --master_port=423333 --nproc_per_node=$num_gpu"
fi
# CMD="python $mpi_command run_glue.py "
CMD="python3"
# CMD="python"
CMD+=" run_glue.py "
CMD+="--task_name ${task_name} "
if [[ $mode == *"train"* ]] ; then
CMD+="--do_train "
CMD+="--train_batch_size=$batch_size "
fi
if [[ $mode == *"eval"* ]] || [[ $mode == *"test"* ]]; then
if [[ $mode == *"eval"* ]] ; then
CMD+="--do_eval "
fi
if [[ $mode == *"test"* ]] ; then
CMD+="--do_test "
fi
CMD+="--eval_batch_size=$batch_size "
fi
if [[ $two_level_embeddings == "1" ]] ; then
CMD+="--two_level_embeddings "
fi
CMD+="--tokenizer_type $tokenizer_type "
CMD+="--vocab_file=$vocab_file "
CMD+="--vocab_model_file $vocab_model_file "
CMD+="--init_checkpoint $init_checkpoint "
CMD+="--config_file=$config_file "
CMD+="--output_dir $out_dir "
CMD+="--data_dir $data_dir "
CMD+="--train_dir $train_dir "
CMD+="--dev_dir $dev_dir "
CMD+="--test_dir $test_dir "
CMD+="--bert_model bert-tiny "
CMD+="--seed $seed "
CMD+="--epochs $epochs "
CMD+="--warmup_proportion $warmup_proportion "
CMD+="--max_seq_length $max_seq_length "
CMD+="--learning_rate $learning_rate "
CMD+="--gradient_accumulation_steps=$gradient_accumulation_steps "
# CMD+="--max_steps $max_steps "
CMD+="--fewshot $fewshot "
CMD+="--do_lower_case "
# CMD+="$use_fp16"
echo CMD
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yx75/SubCharTokenization.git
git@gitee.com:yx75/SubCharTokenization.git
yx75
SubCharTokenization
SubCharTokenization
main

搜索帮助

23e8dbc6 1850385 7e0993f3 1850385