代码拉取完成,页面将自动刷新
"""
data format:yolo style。
split train set and test set
数据保存
└── $arg.root_dir
├── images # 所有图像
└── labels # 对应的标注文件,.txt
return
└── $arg.root_dir
├── Main/train.txt
└── Main/test.txt
"""
import os
import cv2
import json
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import argparse
import shutil
import glob
parser = argparse.ArgumentParser()
parser.add_argument('--root_dir', default='./data', type=str,
help="root path of images and labels, include ./images and ./labels and classes.txt")
arg = parser.parse_args()
def self_train_test_split(img_paths, ratio_train=0.9, ratio_test=0.1):
# 这里可以修改数据集划分的比例。
assert int(ratio_train + ratio_test) == 1
train_img, test_img = train_test_split(img_paths, test_size=1 - ratio_train, random_state=233)
print("NUMS of train:test = {}:{}".format(len(train_img), len(test_img)))
return train_img, test_img
def save_train_test_list(root):
train_path = os.path.join(root, 'Main', 'train.txt')
test_path = os.path.join(root, 'Main', 'test.txt')
img_paths = glob.glob(os.path.join(root, 'images/**.jpg'))
for i in range(len(img_paths)):
img_paths[i] += '\n'
train_img, test_img = self_train_test_split(img_paths)
if len(train_img) > 0:
with open(train_path, 'w') as f:
f.writelines(train_img)
if len(test_img) > 0:
with open(test_path, 'w') as f:
f.writelines(test_img)
if __name__ == "__main__":
root_path = arg.root_dir
# root_path = '/home/cym/CYM/dataset/Engineering_vehicle/'
assert os.path.exists(root_path)
save_train_test_list(root_path)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。