1 Star 2 Fork 1

pyq/DSSD

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
train.py 4.11 KB
一键复制 编辑 原始数据 按行查看 历史
爱笑的眼睛 提交于 2019-12-17 19:19 . dssd
import argparse
import logging
import os
import torch
import torch.distributed as dist
from dssd.engine.inference import do_evaluation
from dssd.config import cfg
from dssd.data.build import make_data_loader
from dssd.engine.trainer import do_train
from dssd.modeling.detector import build_detection_model
from dssd.solver.build import make_optimizer, make_lr_scheduler
from dssd.utils import dist_util, mkdir
from dssd.utils.checkpoint import CheckPointer
from dssd.utils.dist_util import synchronize
from dssd.utils.logger import setup_logger
from dssd.utils.misc import str2bool
def train(cfg, args):
logger = logging.getLogger('DSSD.trainer')
model = build_detection_model(cfg)
device = torch.device(cfg.MODEL.DEVICE)
model.to(device)
if args.distributed:
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank)
lr = cfg.SOLVER.LR * args.num_gpus # scale by num gpus
optimizer = make_optimizer(cfg, model, lr)
milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS]
scheduler = make_lr_scheduler(cfg, optimizer, milestones)
arguments = {"iteration": 0}
save_to_disk = dist_util.get_rank() == 0
checkpointer = CheckPointer(model, optimizer, scheduler, cfg.OUTPUT_DIR, save_to_disk, logger)
extra_checkpoint_data = checkpointer.load()
arguments.update(extra_checkpoint_data)
max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus
train_loader = make_data_loader(cfg, is_train=True, distributed=args.distributed, max_iter=max_iter, start_iter=arguments['iteration'])
model = do_train(cfg, model, train_loader, optimizer, scheduler, checkpointer, device, arguments, args)
return model
def main():
parser = argparse.ArgumentParser(description='DSSD: Deconvolutional Single Shot MultiBox Detector Training With PyTorch')
parser.add_argument(
"--config-file",
default="",
metavar="FILE",
help="path to config file",
type=str,
)
parser.add_argument("--local_rank", type=int, default=0)
parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step')
parser.add_argument('--save_step', default=10000, type=int, help='Save checkpoint every save_step')
parser.add_argument('--eval_step', default=10000, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0')
parser.add_argument('--use_tensorboard', default=True, type=str2bool)
parser.add_argument(
"--skip-test",
dest="skip_test",
help="Do not test the final model",
action="store_true",
)
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
args.distributed = num_gpus > 1
args.num_gpus = num_gpus
if torch.cuda.is_available():
# This flag allows you to enable the inbuilt cudnn auto-tuner to
# find the best algorithm to use for your hardware.
torch.backends.cudnn.benchmark = True
if args.distributed:
torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group(backend="nccl", init_method="env://")
synchronize()
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
if cfg.OUTPUT_DIR:
mkdir(cfg.OUTPUT_DIR)
logger = setup_logger("DSSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
logger.info("Using {} GPUs".format(num_gpus))
logger.info(args)
logger.info("Loaded configuration file {}".format(args.config_file))
with open(args.config_file, "r") as cf:
config_str = "\n" + cf.read()
logger.info(config_str)
logger.info("Running with config:\n{}".format(cfg))
model = train(cfg, args)
if not args.skip_test:
logger.info('Start evaluating...')
torch.cuda.empty_cache() # speed up evaluating after training finished
do_evaluation(cfg, model, distributed=args.distributed)
if __name__ == '__main__':
main()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/yq_love_yg/DSSD.git
git@gitee.com:yq_love_yg/DSSD.git
yq_love_yg
DSSD
DSSD
master

搜索帮助