From dfb5c52a2c6cabba506b12b9d1b531e25c498faf Mon Sep 17 00:00:00 2001 From: makai Date: Sat, 14 Dec 2024 17:46:10 +0800 Subject: [PATCH] upload onnx pipeline --- inference_tools/msit/utils/__init__.py | 14 ++ inference_tools/msit/utils/io.py | 206 +++++++++++++++++++++++++ inference_tools/msit/utils/path.py | 201 ++++++++++++++++++++++++ inference_tools/msit/utils/toolkits.py | 47 ++++++ 4 files changed, 468 insertions(+) create mode 100644 inference_tools/msit/utils/__init__.py create mode 100644 inference_tools/msit/utils/io.py create mode 100644 inference_tools/msit/utils/path.py create mode 100644 inference_tools/msit/utils/toolkits.py diff --git a/inference_tools/msit/utils/__init__.py b/inference_tools/msit/utils/__init__.py new file mode 100644 index 000000000..645430ff6 --- /dev/null +++ b/inference_tools/msit/utils/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/inference_tools/msit/utils/io.py b/inference_tools/msit/utils/io.py new file mode 100644 index 000000000..b8d91d2af --- /dev/null +++ b/inference_tools/msit/utils/io.py @@ -0,0 +1,206 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import csv +import json +from functools import wraps + +import yaml +import numpy as np +import pandas as pd +from onnx import load_model, save_model +from onnxruntime import SessionOptions, GraphOptimizationLevel, InferenceSession + +from msit.common.exceptions import MsitException +from msit.common.dependencies import import_tensorflow +from msit.common.constants import PathConst, MsgConst, DumpConst +from msit.utils.path import MsitPath, change_permission + +tf = import_tensorflow() + + +class SafelyOpen: + def __init__(self, file_path, mode, file_size_limitation=None, suffix=None, encoding="utf-8"): + self.file_path = MsitPath(file_path, PathConst.FILE, mode, file_size_limitation, suffix).check() + self.mode = mode + self.encoding = encoding + self._file = None + + def __enter__(self): + if PathConst.BINARY_MODE not in self.mode: + self._file = open(self.file_path, self.mode, encoding=self.encoding) + else: + self._file = open(self.file_path, self.mode) + return self._file + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + if self._file and not self._file.closed: + self._file.close() + + +def _load_file(mode, file_size, file_suffix, use_safely_open: bool, encoding="utf-8"): + def decorator(func): + @wraps(func) + def wrapper(path, *args, **kwargs): + try: + if use_safely_open: + with SafelyOpen(path, mode, file_size, file_suffix, encoding) as f: + return func(f) + else: + path = MsitPath(path, PathConst.FILE, mode, file_size, file_suffix).check() + return func(path, *args, **kwargs) + except Exception as e: + raise MsitException(MsgConst.IO_FAILURE, MsgConst.LOAD_ERROR.format(path, func.__name__)) from e + return wrapper + return decorator + + +def _load_dir(dir_size): + def decorator(func): + @wraps(func) + def wrapper(path, *args, **kwargs): + path = MsitPath(path, PathConst.DIR, "r", dir_size).check() + try: + return func(path, *args, **kwargs) + except Exception as e: + raise MsitException(MsgConst.IO_FAILURE, MsgConst.LOAD_ERROR.format(path, func.__name__)) from e + return wrapper + return decorator + + +def _save_file(mode, file_size, file_suffix, use_safely_open: bool): + def decorator(func): + @wraps(func) + def wrapper(data, path, *args, **kwargs): + try: + if use_safely_open: + with SafelyOpen(path, mode, file_size, file_suffix) as f: + func(data, f, *args, **kwargs) + else: + path = MsitPath(path, PathConst.FILE, mode, file_size, file_suffix).check() + func(data, path, *args, **kwargs) + except Exception as e: + raise MsitException(MsgConst.IO_FAILURE, \ + MsgConst.SAVE_ERROR.format(data.__class__.__name__, path, func.__name__)) from e + change_permission(path, PathConst.AUTHORITY_FILE) + return wrapper + return decorator + + +def _save_dir(dir_size): + def decorator(func): + @wraps(func) + def wrapper(data, path, *args, **kwargs): + path = MsitPath(path, PathConst.DIR, "w", dir_size).check() + try: + func(data, path, *args, **kwargs) + except Exception as e: + raise MsitException(MsgConst.IO_FAILURE, \ + MsgConst.SAVE_ERROR.format(data.__class__.__name__, path, func.__name__)) from e + change_permission(path, PathConst.AUTHORITY_DIR) + return wrapper + return decorator + + +@_load_file("r", PathConst.SIZE_20G, PathConst.SUFFIX_ONNX, use_safely_open=False) +def load_onnx_model(model_path): + return load_model(model_path) + + +@_load_file("r", PathConst.SIZE_20G, PathConst.SUFFIX_ONNX, use_safely_open=False) +def load_onnx_session(model_path, onnx_fusion_switch=True, provider=PathConst.CPUEXECUTE): + options = SessionOptions() + if not onnx_fusion_switch: + options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL + return InferenceSession(model_path, sess_options=options, providers=[provider]) + + +@_save_file("w", None, PathConst.SUFFIX_ONNX, use_safely_open=False) +def save_onnx_model(onnx_model, save_path): + model_size = onnx_model.ByteSize() + save_external_flag = model_size < 0 or model_size > DumpConst.MAX_PROTOBUF_2G + save_model(onnx_model, save_path, save_as_external_data=save_external_flag) + + +@_load_file("r", PathConst.SIZE_10G, PathConst.SUFFIX_NPY, use_safely_open=False) +def load_npy(npy_path): + return np.load(npy_path, allow_pickle=False) + + +@_save_file("w", None, PathConst.SUFFIX_NPY, use_safely_open=False) +def save_npy(npy_data, save_path): + np.save(save_path, npy_data) + + +@_save_file("w", None, PathConst.SUFFIX_BIN, use_safely_open=False) +def save_bin_from_ndarray(numpy_data: np.ndarray, save_path): + numpy_data.tofile(save_path) + + +@_load_file("r", PathConst.SIZE_10G, PathConst.SUFFIX_BIN, use_safely_open=False) +def load_bin_to_ndarray(bin_path, dtype=np.float16, shape=None): + if dtype == np.float32 and os.path.getsize(bin_path) == np.prod(shape) * 2: + return np.fromfile(bin_path, dtype=np.float16).astype(np.float32) + else: + return np.fromfile(bin_path, dtype=dtype) + + +@_load_dir(PathConst.SIZE_50G) +def load_saved_model(): + pass + + +@_save_dir(PathConst.SIZE_50G) +def save_saved_model(): + pass + + +@_load_file("r", PathConst.SIZE_500M, PathConst.SUFFIX_YAML, use_safely_open=True) +def load_yaml(f): + return yaml.safe_load(f) + + +@_save_file("w", None, PathConst.SUFFIX_YAML, use_safely_open=True) +def save_yaml(yaml_data, f): + yaml.dump(yaml_data, f) + + +@_load_file("r", PathConst.SIZE_500M, PathConst.SUFFIX_JSON, use_safely_open=True) +def load_json(f): + return json.load(f) + + +@_save_file("w", None, PathConst.SUFFIX_JSON, use_safely_open=True) +def save_json(json_data, f, indent=None): + json.dump(json_data, f, indent=indent, default=str) + + +@_load_file("r", PathConst.SIZE_500M, PathConst.SUFFIX_CSV, use_safely_open=True, encoding="utf-8-sig") +def load_csv_by_builtin(f, sep=","): + csv_reader = csv.reader(f, delimiter=sep) + return list(csv_reader) + + +@_load_file("r", PathConst.SIZE_500M, PathConst.SUFFIX_CSV, use_safely_open=False) +def load_csv_by_pandas(csv_path, sep=","): + return pd.read_csv(csv_path, sep=sep) + + +@_save_file("w", None, PathConst.SUFFIX_CSV, use_safely_open=False) +def save_csv_by_pandas(csv_data: pd.DataFrame, csv_path, sep=","): + csv_data.to_csv(csv_path, sep=sep) diff --git a/inference_tools/msit/utils/path.py b/inference_tools/msit/utils/path.py new file mode 100644 index 000000000..8035aa52c --- /dev/null +++ b/inference_tools/msit/utils/path.py @@ -0,0 +1,201 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from re import match +from pathlib import Path +from stat import S_IMODE, S_IRUSR, S_IWGRP, S_IWOTH, S_IWUSR, S_IXUSR + +from msit.common.log import logger +from msit.common.exceptions import MsitException +from msit.common.constants import PathConst, MsgConst +from msit.utils.toolkits import convert_bytes + + +class MsitPath: + def __init__(self, path: str, path_type, mode, size_limitation=None, suffix=None, max_dir_depth=5): + self.path = path + self.path_type = self._check_path_type(path_type) + self.mode = self._check_mode(mode) + self.size_limitation = self._check_positive_int(size_limitation) if size_limitation else None + self.suffix = suffix + self.max_dir_depth = self._check_positive_int(max_dir_depth) + + @property + def is_file(self): + return os.path.isfile(self.path) + + @property + def is_dir(self): + return os.path.isdir(self.path) + + @staticmethod + def _check_path_type(path_type): + if path_type not in [PathConst.FILE, PathConst.DIR]: + raise MsitException(MsgConst.INVALID_ARGU, \ + f"The path type must be one of {[PathConst.FILE, PathConst.DIR]}, " + f"currently: {path_type}.") + return path_type + + @staticmethod + def _check_mode(mode): + if mode not in PathConst.MODE: + raise MsitException(MsgConst.INVALID_ARGU, f"Mode must be one of {PathConst.MODE}, currently: {mode}.") + return mode + + @staticmethod + def _check_positive_int(value): + if not isinstance(value, int) or value <= 0: + raise MsitException(MsgConst.INVALID_ARGU, \ + f"The value must be an integer greater than 0, currently: {value}.") + return value + + @staticmethod + def _check_path_exist(path): + if not os.path.exists(path): + raise MsitException(MsgConst.INVALID_ARGU, f"Path not found: {path}.") + + @staticmethod + def _check_write_permission_for_group_others(path, permission): + if bool(permission & (S_IWGRP | S_IWOTH)): + raise MsitException(MsgConst.RISK_ALERT, \ + f"The path {path} is writable by group and others. " + f"Permissions for files (or directories) should not exceed 0o755 (rwxr-xr-x).") + + @classmethod + def _check_permission(cls, path, mode): + path_stat = os.stat(path) + owner_id = path_stat.st_uid + group_owner_id = path_stat.st_gid + if owner_id != os.geteuid() or group_owner_id not in os.getgroups(): + raise MsitException(MsgConst.RISK_ALERT, f"The current user isn't the owner or group owner of {path}.") + permission = S_IMODE(path_stat.st_mode) + cls._check_write_permission_for_group_others(path, permission) + if mode in PathConst.MODE_READ: + if not bool(permission & S_IRUSR): + raise MsitException(MsgConst.NO_PERMISSION, \ + f"The current user is not authorized to read the path: {path}.") + if mode in PathConst.MODE_WRITE: + if not bool(permission & S_IWUSR): + raise MsitException(MsgConst.NO_PERMISSION, \ + f"The current user is not authorized to write the path: {path}.") + if mode == PathConst.MODE_EXEC: + if not bool(permission & S_IXUSR): + raise MsitException(MsgConst.NO_PERMISSION, \ + f"The current user is not authorized to execute the path: {path}.") + + def check(self): + self.path = os.path.abspath(os.path.normpath(self.path)) + + if self.mode in PathConst.MODE_WRITE: + parent_dir = os.path.dirname(self.path) + self._check_path_exist(parent_dir) + if not os.path.isdir(parent_dir): + raise MsitException(MsgConst.INVALID_ARGU, f"The parent directory {parent_dir} is not valid.") + self._check_permission(parent_dir, self.mode) + else: + self._check_path_exist(self.path) + if self.path_type == PathConst.FILE: + if not self.is_file: + raise MsitException(MsgConst.INVALID_ARGU, f"The path {self.path} is not a file.") + self._check_file_size() + elif self.path_type == PathConst.DIR: + if not self.is_dir: + raise MsitException(MsgConst.INVALID_ARGU, f"The path {self.path} is not a directory.") + self._check_dir_size() + self._check_permission(self.path, self.mode) + + self.path = self._check_soft_link() + self._check_path_length() + self._check_special_chars() + + if self.path_type == PathConst.FILE: + self._check_file_suffix() + elif self.path_type == PathConst.DIR: + self.path += "/" + return self.path + + def _check_soft_link(self): + if os.path.islink(self.path): + real_path = os.path.realpath(self.path) + logger.info(f"Found a symlink, path {self.path} points to {real_path}.") + return real_path + return self.path + + def _check_path_length(self): + if len(self.path) > PathConst.MAX_PATH_LENGTH: + raise MsitException(MsgConst.RISK_ALERT, \ + f"Current path length ({len(self.path)}) exceeds " + f"the limit ({PathConst.MAX_PATH_LENGTH}).") + dir_depth = 0 + for dir_name in self.path.split("/"): + dir_depth += 1 + if dir_depth > PathConst.MAX_DIR_DEPTH: + raise MsitException(MsgConst.RISK_ALERT, f"Exceeded max directory depth ({PathConst.MAX_DIR_DEPTH}).") + if len(dir_name) > PathConst.MAX_LAST_NAME_LENGTH: + raise MsitException(MsgConst.RISK_ALERT, \ + f"Current {self.path_type} length ({len(dir_name)}) " + f"exceeds the limit ({PathConst.MAX_LAST_NAME_LENGTH}).") + + def _check_special_chars(self): + if not match(PathConst.VALID_PATH_PATTERN, self.path): + raise MsitException(MsgConst.INVALID_ARGU, "Path contains special characters.") + + def _check_file_suffix(self): + if self.suffix and not self.path.endswith(self.suffix): + raise MsitException(MsgConst.INVALID_ARGU, f"{self.path} is not a {self.suffix} file.") + + def _check_file_size(self): + if self.size_limitation and os.path.getsize(self.path) > self.size_limitation: + raise MsitException(MsgConst.RISK_ALERT, \ + f"File size exceeds the limit ({convert_bytes(self.size_limitation)}).") + + def _check_dir_size(self): + if self.size_limitation and get_dir_size(self.path, self.max_dir_depth) > self.size_limitation: + raise MsitException(MsgConst.RISK_ALERT, \ + f"Directory size exceeds the limit ({convert_bytes(self.size_limitation)}).") + + +def get_dir_size(dir_path, max_dir_depth=5): + total_size = 0 + for root, _, files in os.walk(dir_path): + current_depth = root[len(dir_path):].count(os.sep) + if current_depth > max_dir_depth: + logger.warning( + f"Calculated size of {dir_path}, but exceeded max depth ({max_dir_depth}). Current size: {total_size}." + ) + return total_size + for file_name in files: + total_size += os.path.getsize(os.path.join(root, file_name)) + return total_size + + +def make_dir(dir_path): + dir_path = MsitPath(dir_path, PathConst.DIR, "w").check() + new_dir = Path(dir_path) + try: + new_dir.mkdir(mode=PathConst.AUTHORITY_DIR, exist_ok=True, parents=False) + except OSError as e: + raise MsitException(MsgConst.IO_FAILURE, \ + f"Failed to create {dir_path}, please Check if the parent directory of the current " + f"path exists, and verify permissions or disk space.") from e + + +def change_permission(path, permission): + if not os.path.exists(path) or os.path.islink(path): + return + try: + os.chmod(path, permission) + except PermissionError as e: + raise MsitException(MsgConst.NO_PERMISSION, f"Failed to set permissions ({permission}) for {path}.") from e diff --git a/inference_tools/msit/utils/toolkits.py b/inference_tools/msit/utils/toolkits.py new file mode 100644 index 000000000..70f6d0720 --- /dev/null +++ b/inference_tools/msit/utils/toolkits.py @@ -0,0 +1,47 @@ +# Copyright (c) 2025-2025 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from sys import stdout + +from time import strftime, localtime, time + + +def get_pid(): + return os.getpid() + + +def get_current_time(used_for_log=True, microsecond=False): + if used_for_log: + return strftime("%Y-%m-%d %H:%M:%S", localtime()) + else: + if microsecond: + return round(time() * 1e6) % 10**10 + else: + return strftime("%Y%m%d_%H%M%S", localtime()) + + +def stdout_flush(): + stdout.flush() + + +def convert_bytes(bytes_size): + if bytes_size < 1024: + return f"{bytes_size} Bytes" + elif bytes_size < 1_048_576: # 1024 * 1024 + return f"{bytes_size / 1024:.2f} KB" + elif bytes_size < 1_073_741_824: # 1024 * 1024 * 1024 + return f"{bytes_size / (1_048_576):.2f} MB" + else: + return f"{bytes_size / (1_073_741_824):.2f} GB" -- Gitee