1 Star 0 Fork 16

PshySimon/sysSentry_xalarm_service

forked from src-openEuler/sysSentry 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
fix-ai_block_io-some-issues.patch 43.01 KB
一键复制 编辑 原始数据 按行查看 历史
贺有志 提交于 2024-09-29 08:26 . add fix-ai-block-io-issues.patch.
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832
From 35ba8fe8e241c5e3508c5dadc82a777065a5cc4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com>
Date: Mon, 30 Sep 2024 00:15:29 +0800
Subject: [PATCH] fix ai_block_io some issues
---
..._slow_io_detection.ini => ai_block_io.ini} | 6 +-
config/tasks/ai_block_io.mod | 5 +
.../tasks/ai_threshold_slow_io_detection.mod | 5 -
...ow_io_detection.py => test_ai_block_io.py} | 0
.../README.md | 0
.../__init__.py | 0
.../ai_block_io.py} | 57 ++--
.../alarm_report.py | 2 +-
.../ai_block_io/config_parser.py | 256 ++++++++++++++++++
.../data_access.py | 3 +
.../detector.py | 17 +-
.../io_data.py | 0
.../sliding_window.py | 0
.../threshold.py | 13 +-
.../utils.py | 15 +-
.../config_parser.py | 141 ----------
src/python/setup.py | 2 +-
17 files changed, 336 insertions(+), 186 deletions(-)
rename config/plugins/{ai_threshold_slow_io_detection.ini => ai_block_io.ini} (66%)
create mode 100644 config/tasks/ai_block_io.mod
delete mode 100644 config/tasks/ai_threshold_slow_io_detection.mod
rename selftest/test/{test_ai_threshold_slow_io_detection.py => test_ai_block_io.py} (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/README.md (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/__init__.py (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection/slow_io_detection.py => ai_block_io/ai_block_io.py} (66%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/alarm_report.py (98%)
create mode 100644 src/python/sentryPlugins/ai_block_io/config_parser.py
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/data_access.py (99%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/detector.py (77%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/io_data.py (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/sliding_window.py (100%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/threshold.py (92%)
rename src/python/sentryPlugins/{ai_threshold_slow_io_detection => ai_block_io}/utils.py (86%)
delete mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
diff --git a/config/plugins/ai_threshold_slow_io_detection.ini b/config/plugins/ai_block_io.ini
similarity index 66%
rename from config/plugins/ai_threshold_slow_io_detection.ini
rename to config/plugins/ai_block_io.ini
index 44eb928..01ce266 100644
--- a/config/plugins/ai_threshold_slow_io_detection.ini
+++ b/config/plugins/ai_block_io.ini
@@ -4,9 +4,9 @@ slow_io_detect_frequency=1
log_level=info
[algorithm]
-train_data_duration=0.1
-train_update_duration=0.02
-algorithm_type=n_sigma
+train_data_duration=24
+train_update_duration=2
+algorithm_type=boxplot
boxplot_parameter=1.5
n_sigma_parameter=3
diff --git a/config/tasks/ai_block_io.mod b/config/tasks/ai_block_io.mod
new file mode 100644
index 0000000..1971d7d
--- /dev/null
+++ b/config/tasks/ai_block_io.mod
@@ -0,0 +1,5 @@
+[common]
+enabled=yes
+task_start=/usr/bin/python3 /usr/bin/ai_block_io
+task_stop=pkill -f /usr/bin/ai_block_io
+type=oneshot
\ No newline at end of file
diff --git a/config/tasks/ai_threshold_slow_io_detection.mod b/config/tasks/ai_threshold_slow_io_detection.mod
deleted file mode 100644
index 2729f72..0000000
--- a/config/tasks/ai_threshold_slow_io_detection.mod
+++ /dev/null
@@ -1,5 +0,0 @@
-[common]
-enabled=yes
-task_start=/usr/bin/python3 /usr/bin/ai_threshold_slow_io_detection
-task_stop=pkill -f /usr/bin/ai_threshold_slow_io_detection
-type=oneshot
\ No newline at end of file
diff --git a/selftest/test/test_ai_threshold_slow_io_detection.py b/selftest/test/test_ai_block_io.py
similarity index 100%
rename from selftest/test/test_ai_threshold_slow_io_detection.py
rename to selftest/test/test_ai_block_io.py
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md b/src/python/sentryPlugins/ai_block_io/README.md
similarity index 100%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md
rename to src/python/sentryPlugins/ai_block_io/README.md
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py b/src/python/sentryPlugins/ai_block_io/__init__.py
similarity index 100%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py
rename to src/python/sentryPlugins/ai_block_io/__init__.py
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
similarity index 66%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
rename to src/python/sentryPlugins/ai_block_io/ai_block_io.py
index 43cf770..31b8a97 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
+++ b/src/python/sentryPlugins/ai_block_io/ai_block_io.py
@@ -23,7 +23,7 @@ from .data_access import get_io_data_from_collect_plug, check_collect_valid
from .io_data import MetricName
from .alarm_report import AlarmReport
-CONFIG_FILE = "/etc/sysSentry/plugins/ai_threshold_slow_io_detection.ini"
+CONFIG_FILE = "/etc/sysSentry/plugins/ai_block_io.ini"
def sig_handler(signum, frame):
@@ -40,34 +40,48 @@ class SlowIODetection:
def __init__(self, config_parser: ConfigParser):
self._config_parser = config_parser
- self.__set_log_format()
self.__init_detector_name_list()
self.__init_detector()
- def __set_log_format(self):
- log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
- log_level = get_log_level(self._config_parser.get_log_level())
- logging.basicConfig(level=log_level, format=log_format)
-
def __init_detector_name_list(self):
self._disk_list = check_collect_valid(self._config_parser.get_slow_io_detect_frequency())
- for disk in self._disk_list:
- self._detector_name_list.append(MetricName(disk, "bio", "read", "latency"))
- self._detector_name_list.append(MetricName(disk, "bio", "write", "latency"))
+ disks_to_detection: list = self._config_parser.get_disks_to_detection()
+ # 情况1:None,则启用所有磁盘检测
+ # 情况2:is not None and len = 0,则不启动任何磁盘检测
+ # 情况3:len != 0,则取交集
+ if disks_to_detection is None:
+ for disk in self._disk_list:
+ self._detector_name_list.append(MetricName(disk, "bio", "read", "latency"))
+ self._detector_name_list.append(MetricName(disk, "bio", "write", "latency"))
+ elif len(disks_to_detection) == 0:
+ logging.warning('please attention: conf file not specify any disk to detection, '
+ 'so it will not start ai block io.')
+ else:
+ disks_name_to_detection = []
+ for disk_name_to_detection in disks_to_detection:
+ disks_name_to_detection.append(disk_name_to_detection.get_disk_name())
+ disk_intersection = [disk for disk in self._disk_list if disk in disks_name_to_detection]
+ for disk in disk_intersection:
+ self._detector_name_list.append(MetricName(disk, "bio", "read", "latency"))
+ self._detector_name_list.append(MetricName(disk, "bio", "write", "latency"))
+ logging.info(f'start to detection follow disk and it\'s metric: {self._detector_name_list}')
def __init_detector(self):
train_data_duration, train_update_duration = (self._config_parser.
get_train_data_duration_and_train_update_duration())
slow_io_detection_frequency = self._config_parser.get_slow_io_detect_frequency()
- threshold_type = get_threshold_type_enum(self._config_parser.get_algorithm_type())
+ threshold_type = self._config_parser.get_algorithm_type()
data_queue_size, update_size = get_data_queue_size_and_update_size(train_data_duration,
train_update_duration,
slow_io_detection_frequency)
- sliding_window_type = get_sliding_window_type_enum(self._config_parser.get_sliding_window_type())
+ sliding_window_type = self._config_parser.get_sliding_window_type()
window_size, window_threshold = self._config_parser.get_window_size_and_window_minimum_threshold()
for detector_name in self._detector_name_list:
- threshold = ThresholdFactory().get_threshold(threshold_type, data_queue_size=data_queue_size,
+ threshold = ThresholdFactory().get_threshold(threshold_type,
+ boxplot_parameter=self._config_parser.get_boxplot_parameter(),
+ n_sigma_paramter=self._config_parser.get_n_sigma_parameter(),
+ data_queue_size=data_queue_size,
data_queue_update_size=update_size)
sliding_window = SlidingWindowFactory().get_sliding_window(sliding_window_type, queue_length=window_size,
threshold=window_threshold)
@@ -89,6 +103,7 @@ class SlowIODetection:
logging.debug(f'step1. Get io data: {str(io_data_dict_with_disk_name)}')
if io_data_dict_with_disk_name is None:
continue
+
# Step2:慢IO检测
logging.debug('step2. Start to detection slow io event.')
slow_io_event_list = []
@@ -103,13 +118,14 @@ class SlowIODetection:
for slow_io_event in slow_io_event_list:
metric_name: MetricName = slow_io_event[0]
result = slow_io_event[1]
- AlarmReport.report_major_alm(f"disk {metric_name.get_disk_name()} has slow io event."
- f"stage: {metric_name.get_metric_name()},"
- f"type: {metric_name.get_io_access_type_name()},"
- f"metric: {metric_name.get_metric_name()},"
- f"current window: {result[1]},"
- f"threshold: {result[2]}")
- logging.error(f"slow io event happen: {str(slow_io_event)}")
+ alarm_content = (f"disk {metric_name.get_disk_name()} has slow io event. "
+ f"stage is: {metric_name.get_stage_name()}, "
+ f"io access type is: {metric_name.get_io_access_type_name()}, "
+ f"metric is: {metric_name.get_metric_name()}, "
+ f"current window is: {result[1]}, "
+ f"threshold is: {result[2]}")
+ AlarmReport.report_major_alm(alarm_content)
+ logging.warning(alarm_content)
# Step4:等待检测时间
logging.debug('step4. Wait to start next slow io event detection loop.')
@@ -120,6 +136,7 @@ def main():
# Step1:注册消息处理函数
signal.signal(signal.SIGINT, sig_handler)
signal.signal(signal.SIGTERM, sig_handler)
+
# Step2:断点恢复
# todo:
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py b/src/python/sentryPlugins/ai_block_io/alarm_report.py
similarity index 98%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
rename to src/python/sentryPlugins/ai_block_io/alarm_report.py
index 3f4f34e..230c8cd 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
+++ b/src/python/sentryPlugins/ai_block_io/alarm_report.py
@@ -15,7 +15,7 @@ import json
class AlarmReport:
- TASK_NAME = "SLOW_IO_DETECTION"
+ TASK_NAME = "ai_block_io"
@staticmethod
def report_pass(info: str):
diff --git a/src/python/sentryPlugins/ai_block_io/config_parser.py b/src/python/sentryPlugins/ai_block_io/config_parser.py
new file mode 100644
index 0000000..632391d
--- /dev/null
+++ b/src/python/sentryPlugins/ai_block_io/config_parser.py
@@ -0,0 +1,256 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+import configparser
+import json
+import logging
+
+from .io_data import MetricName
+from .threshold import ThresholdType
+from .utils import get_threshold_type_enum, get_sliding_window_type_enum, get_log_level
+
+LOG_FORMAT = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
+
+
+def init_log_format(log_level: str):
+ logging.basicConfig(level=get_log_level(log_level), format=LOG_FORMAT)
+
+
+class ConfigParser:
+ DEFAULT_ABSOLUTE_THRESHOLD = 40
+ DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1
+ DEFAULT_LOG_LEVEL = 'info'
+
+ DEFAULT_ALGORITHM_TYPE = 'boxplot'
+ DEFAULT_TRAIN_DATA_DURATION = 24
+ DEFAULT_TRAIN_UPDATE_DURATION = 2
+ DEFAULT_BOXPLOT_PARAMETER = 1.5
+ DEFAULT_N_SIGMA_PARAMETER = 3
+
+ DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous'
+ DEFAULT_WINDOW_SIZE = 30
+ DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6
+
+ def __init__(self, config_file_name):
+ self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
+ self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
+ self.__log_level = ConfigParser.DEFAULT_LOG_LEVEL
+ self.__disks_to_detection: list = []
+
+ self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE
+ self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
+ self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
+ self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
+
+ self.__sliding_window_type = ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE
+ self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
+ self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD
+
+ self.__config_file_name = config_file_name
+
+ def __read_absolute_threshold(self, items_common: dict):
+ try:
+ self.__absolute_threshold = float(items_common.get('absolute_threshold',
+ ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD))
+ if self.__absolute_threshold <= 0:
+ logging.warning(
+ f'the_absolute_threshold: {self.__absolute_threshold} you set is invalid, use default value: {ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD}.')
+ self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
+ except ValueError:
+ self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
+ logging.warning(
+ f'the_absolute_threshold type conversion has error, use default value: {self.__absolute_threshold}.')
+
+ def __read__slow_io_detect_frequency(self, items_common: dict):
+ try:
+ self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency',
+ ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY))
+ if self.__slow_io_detect_frequency < 1 or self.__slow_io_detect_frequency > 10:
+ logging.warning(
+ f'the slow_io_detect_frequency: {self.__slow_io_detect_frequency} you set is invalid, use default value: {ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY}.')
+ self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
+ except ValueError:
+ self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
+ logging.warning(f'slow_io_detect_frequency type conversion has error, use default value: {self.__slow_io_detect_frequency}.')
+
+ def __read__disks_to_detect(self, items_common: dict):
+ disks_to_detection = items_common.get('disks_to_detect')
+ if disks_to_detection is None:
+ logging.warning(f'config of disks_to_detect not found, the default value be used.')
+ self.__disks_to_detection = None
+ return
+ try:
+ disks_to_detection_list = json.loads(disks_to_detection)
+ for disk_to_detection in disks_to_detection_list:
+ disk_name = disk_to_detection.get('disk_name', None)
+ stage_name = disk_to_detection.get('stage_name', None)
+ io_access_type_name = disk_to_detection.get('io_access_type_name', None)
+ metric_name = disk_to_detection.get('metric_name', None)
+ if not (disk_name is None or stage_name is None or io_access_type_name is None or metric_name is None):
+ metric_name_object = MetricName(disk_name, stage_name, io_access_type_name, metric_name)
+ self.__disks_to_detection.append(metric_name_object)
+ else:
+ logging.warning(f'config of disks_to_detect\'s some part has some error: {disk_to_detection}, it will be ignored.')
+ except json.decoder.JSONDecodeError as e:
+ logging.warning(f'config of disks_to_detect is error: {e}, it will be ignored and default value be used.')
+ self.__disks_to_detection = None
+
+ def __read__train_data_duration(self, items_algorithm: dict):
+ try:
+ self.__train_data_duration = float(items_algorithm.get('train_data_duration',
+ ConfigParser.DEFAULT_TRAIN_DATA_DURATION))
+ if self.__train_data_duration <= 0 or self.__train_data_duration > 720:
+ logging.warning(
+ f'the train_data_duration: {self.__train_data_duration} you set is invalid, use default value: {ConfigParser.DEFAULT_TRAIN_DATA_DURATION}.')
+ self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION
+ except ValueError:
+ self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION
+ logging.warning(f'the train_data_duration type conversion has error, use default value: {self.__train_data_duration}.')
+
+ def __read__train_update_duration(self, items_algorithm: dict):
+ default_train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
+ if default_train_update_duration > self.__train_data_duration:
+ default_train_update_duration = self.__train_data_duration / 2
+
+ try:
+ self.__train_update_duration = float(items_algorithm.get('train_update_duration',
+ ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION))
+ if self.__train_update_duration <= 0 or self.__train_update_duration > self.__train_data_duration:
+ logging.warning(
+ f'the train_update_duration: {self.__train_update_duration} you set is invalid, use default value: {default_train_update_duration}.')
+ self.__train_update_duration = default_train_update_duration
+ except ValueError:
+ self.__train_update_duration = default_train_update_duration
+ logging.warning(f'the train_update_duration type conversion has error, use default value: {self.__train_update_duration}.')
+
+ def __read__algorithm_type_and_parameter(self, items_algorithm: dict):
+ algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE)
+ self.__algorithm_type = get_threshold_type_enum(algorithm_type)
+
+ if self.__algorithm_type == ThresholdType.NSigmaThreshold:
+ try:
+ self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter',
+ ConfigParser.DEFAULT_N_SIGMA_PARAMETER))
+ if self.__n_sigma_parameter <= 0 or self.__n_sigma_parameter > 10:
+ logging.warning(
+ f'the n_sigma_parameter: {self.__n_sigma_parameter} you set is invalid, use default value: {ConfigParser.DEFAULT_N_SIGMA_PARAMETER}.')
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
+ except ValueError:
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
+ logging.warning(f'the n_sigma_parameter type conversion has error, use default value: {self.__n_sigma_parameter}.')
+ elif self.__algorithm_type == ThresholdType.BoxplotThreshold:
+ try:
+ self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter',
+ ConfigParser.DEFAULT_BOXPLOT_PARAMETER))
+ if self.__boxplot_parameter <= 0 or self.__boxplot_parameter > 10:
+ logging.warning(
+ f'the boxplot_parameter: {self.__boxplot_parameter} you set is invalid, use default value: {ConfigParser.DEFAULT_BOXPLOT_PARAMETER}.')
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
+ except ValueError:
+ self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
+ logging.warning(f'the boxplot_parameter type conversion has error, use default value: {self.__boxplot_parameter}.')
+
+ def __read__window_size(self, items_sliding_window: dict):
+ try:
+ self.__window_size = int(items_sliding_window.get('window_size',
+ ConfigParser.DEFAULT_WINDOW_SIZE))
+ if self.__window_size < 1 or self.__window_size > 3600:
+ logging.warning(
+ f'the window_size: {self.__window_size} you set is invalid, use default value: {ConfigParser.DEFAULT_WINDOW_SIZE}.')
+ self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
+ except ValueError:
+ self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
+ logging.warning(f'window_size type conversion has error, use default value: {self.__window_size}.')
+
+ def __read__window_minimum_threshold(self, items_sliding_window: dict):
+ default_window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD
+ if default_window_minimum_threshold > self.__window_size:
+ default_window_minimum_threshold = self.__window_size / 2
+ try:
+ self.__window_minimum_threshold = (
+ int(items_sliding_window.get('window_minimum_threshold',
+ ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD)))
+ if self.__window_minimum_threshold < 1 or self.__window_minimum_threshold > self.__window_size:
+ logging.warning(
+ f'the window_minimum_threshold: {self.__window_minimum_threshold} you set is invalid, use default value: {default_window_minimum_threshold}.')
+ self.__window_minimum_threshold = default_window_minimum_threshold
+ except ValueError:
+ self.__window_minimum_threshold = default_window_minimum_threshold
+ logging.warning(f'window_minimum_threshold type conversion has error, use default value: {self.__window_minimum_threshold}.')
+
+ def read_config_from_file(self):
+ con = configparser.ConfigParser()
+ con.read(self.__config_file_name, encoding='utf-8')
+
+ if con.has_section('common'):
+ items_common = dict(con.items('common'))
+ self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL)
+ init_log_format(self.__log_level)
+ self.__read_absolute_threshold(items_common)
+ self.__read__slow_io_detect_frequency(items_common)
+ self.__read__disks_to_detect(items_common)
+ else:
+ init_log_format(self.__log_level)
+ logging.warning("common section parameter not found, it will be set to default value.")
+
+ if con.has_section('algorithm'):
+ items_algorithm = dict(con.items('algorithm'))
+ self.__read__train_data_duration(items_algorithm)
+ self.__read__train_update_duration(items_algorithm)
+ self.__read__algorithm_type_and_parameter(items_algorithm)
+ else:
+ logging.warning("algorithm section parameter not found, it will be set to default value.")
+
+ if con.has_section('sliding_window'):
+ items_sliding_window = dict(con.items('sliding_window'))
+ sliding_window_type = items_sliding_window.get('sliding_window_type',
+ ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE)
+ self.__sliding_window_type = get_sliding_window_type_enum(sliding_window_type)
+ self.__read__window_size(items_sliding_window)
+ self.__read__window_minimum_threshold(items_sliding_window)
+ else:
+ logging.warning("sliding_window section parameter not found, it will be set to default value.")
+
+ self.__print_all_config_value()
+
+ def __print_all_config_value(self):
+ pass
+
+ def get_slow_io_detect_frequency(self):
+ return self.__slow_io_detect_frequency
+
+ def get_algorithm_type(self):
+ return self.__algorithm_type
+
+ def get_sliding_window_type(self):
+ return self.__sliding_window_type
+
+ def get_train_data_duration_and_train_update_duration(self):
+ return self.__train_data_duration, self.__train_update_duration
+
+ def get_window_size_and_window_minimum_threshold(self):
+ return self.__window_size, self.__window_minimum_threshold
+
+ def get_absolute_threshold(self):
+ return self.__absolute_threshold
+
+ def get_log_level(self):
+ return self.__log_level
+
+ def get_disks_to_detection(self):
+ return self.__disks_to_detection
+
+ def get_boxplot_parameter(self):
+ return self.__boxplot_parameter
+
+ def get_n_sigma_parameter(self):
+ return self.__n_sigma_parameter
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py b/src/python/sentryPlugins/ai_block_io/data_access.py
similarity index 99%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
rename to src/python/sentryPlugins/ai_block_io/data_access.py
index d9f3460..01c5315 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
+++ b/src/python/sentryPlugins/ai_block_io/data_access.py
@@ -17,6 +17,8 @@ from sentryCollector.collect_plugin import (
get_io_data,
is_iocollect_valid,
)
+
+
from .io_data import IOStageData, IOData
COLLECT_STAGES = [
@@ -32,6 +34,7 @@ COLLECT_STAGES = [
"iocost",
]
+
def check_collect_valid(period):
data_raw = is_iocollect_valid(period)
if data_raw["ret"] == 0:
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py b/src/python/sentryPlugins/ai_block_io/detector.py
similarity index 77%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
rename to src/python/sentryPlugins/ai_block_io/detector.py
index eda9825..bcf62cb 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
+++ b/src/python/sentryPlugins/ai_block_io/detector.py
@@ -26,19 +26,26 @@ class Detector:
self._threshold = threshold
self._slidingWindow = sliding_window
self._threshold.attach_observer(self._slidingWindow)
+ self._count = 0
def get_metric_name(self):
return self._metric_name
def is_slow_io_event(self, io_data_dict_with_disk_name: dict):
- logging.debug(f'Enter Detector: {self}')
+ self._count += 1
+ if self._count % 15 == 0:
+ self._count = 0
+ logging.info(f"({self._metric_name}) 's latest threshold is: {self._threshold.get_threshold()}.")
+ logging.debug(f'enter Detector: {self}')
metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name)
- if metric_value > 1e-6:
- logging.debug(f'Input metric value: {str(metric_value)}')
- self._threshold.push_latest_data_to_queue(metric_value)
+ if metric_value is None:
+ logging.debug('not found metric value, so return None.')
+ return False, None, None
+ logging.debug(f'input metric value: {str(metric_value)}')
+ self._threshold.push_latest_data_to_queue(metric_value)
detection_result = self._slidingWindow.is_slow_io_event(metric_value)
logging.debug(f'Detection result: {str(detection_result)}')
- logging.debug(f'Exit Detector: {self}')
+ logging.debug(f'exit Detector: {self}')
return detection_result
def __repr__(self):
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py b/src/python/sentryPlugins/ai_block_io/io_data.py
similarity index 100%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py
rename to src/python/sentryPlugins/ai_block_io/io_data.py
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py b/src/python/sentryPlugins/ai_block_io/sliding_window.py
similarity index 100%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py
rename to src/python/sentryPlugins/ai_block_io/sliding_window.py
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py b/src/python/sentryPlugins/ai_block_io/threshold.py
similarity index 92%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
rename to src/python/sentryPlugins/ai_block_io/threshold.py
index 9e1ca7b..ff85d85 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
+++ b/src/python/sentryPlugins/ai_block_io/threshold.py
@@ -79,9 +79,9 @@ class AbsoluteThreshold(Threshold):
class BoxplotThreshold(Threshold):
- def __init__(self, parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+ def __init__(self, boxplot_parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000, **kwargs):
super().__init__(data_queue_size, data_queue_update_size)
- self.parameter = parameter
+ self.parameter = boxplot_parameter
def _update_threshold(self):
data = list(self.data_queue.queue)
@@ -94,6 +94,8 @@ class BoxplotThreshold(Threshold):
self.notify_observer()
def push_latest_data_to_queue(self, data):
+ if data < 1e-6:
+ return
try:
self.data_queue.put(data, block=False)
except queue.Full:
@@ -111,9 +113,9 @@ class BoxplotThreshold(Threshold):
class NSigmaThreshold(Threshold):
- def __init__(self, parameter: float = 2.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+ def __init__(self, n_sigma_parameter: float = 3.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000, **kwargs):
super().__init__(data_queue_size, data_queue_update_size)
- self.parameter = parameter
+ self.parameter = n_sigma_parameter
def _update_threshold(self):
data = list(self.data_queue.queue)
@@ -125,6 +127,8 @@ class NSigmaThreshold(Threshold):
self.notify_observer()
def push_latest_data_to_queue(self, data):
+ if data < 1e-6:
+ return
try:
self.data_queue.put(data, block=False)
except queue.Full:
@@ -157,4 +161,3 @@ class ThresholdFactory:
return NSigmaThreshold(*args, **kwargs)
else:
raise ValueError(f"Invalid threshold type: {threshold_type}")
-
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py b/src/python/sentryPlugins/ai_block_io/utils.py
similarity index 86%
rename from src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
rename to src/python/sentryPlugins/ai_block_io/utils.py
index f66e5ed..8dbba06 100644
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
+++ b/src/python/sentryPlugins/ai_block_io/utils.py
@@ -8,13 +8,16 @@
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
# See the Mulan PSL v2 for more details.
+
import logging
from dataclasses import asdict
+
from .threshold import ThresholdType
from .sliding_window import SlidingWindowType
from .io_data import MetricName, IOData
+
def get_threshold_type_enum(algorithm_type: str):
if algorithm_type.lower() == 'absolute':
return ThresholdType.AbsoluteThreshold
@@ -22,7 +25,7 @@ def get_threshold_type_enum(algorithm_type: str):
return ThresholdType.BoxplotThreshold
if algorithm_type.lower() == 'n_sigma':
return ThresholdType.NSigmaThreshold
- logging.info('not found correct algorithm type, use default: boxplot.')
+ logging.warning(f"the algorithm type: {algorithm_type} you set is invalid, use default value: boxplot")
return ThresholdType.BoxplotThreshold
@@ -33,7 +36,7 @@ def get_sliding_window_type_enum(sliding_window_type: str):
return SlidingWindowType.ContinuousSlidingWindow
if sliding_window_type.lower() == 'median':
return SlidingWindowType.MedianSlidingWindow
- logging.info('not found correct sliding window type, use default: not_continuous.')
+ logging.warning(f"the sliding window type: {sliding_window_type} you set is invalid, use default value: not_continuous")
return SlidingWindowType.NotContinuousSlidingWindow
@@ -62,6 +65,8 @@ def get_log_level(log_level: str):
return logging.INFO
elif log_level.lower() == 'warning':
return logging.WARNING
- elif log_level.lower() == 'fatal':
- return logging.FATAL
- return None
+ elif log_level.lower() == 'error':
+ return logging.ERROR
+ elif log_level.lower() == 'critical':
+ return logging.CRITICAL
+ return logging.INFO
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
deleted file mode 100644
index cd4e6f1..0000000
--- a/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# coding: utf-8
-# Copyright (c) 2024 Huawei Technologies Co., Ltd.
-# sysSentry is licensed under the Mulan PSL v2.
-# You can use this software according to the terms and conditions of the Mulan PSL v2.
-# You may obtain a copy of Mulan PSL v2 at:
-# http://license.coscl.org.cn/MulanPSL2
-# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
-# PURPOSE.
-# See the Mulan PSL v2 for more details.
-
-import configparser
-import logging
-
-
-class ConfigParser:
-
- DEFAULT_ABSOLUTE_THRESHOLD = 40
- DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1
- DEFAULT_LOG_LEVEL = 'info'
- DEFAULT_TRAIN_DATA_DURATION = 24
- DEFAULT_TRAIN_UPDATE_DURATION = 2
- DEFAULT_ALGORITHM_TYPE = 'boxplot'
- DEFAULT_N_SIGMA_PARAMETER = 3
- DEFAULT_BOXPLOT_PARAMETER = 1.5
- DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous'
- DEFAULT_WINDOW_SIZE = 30
- DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6
-
- def __init__(self, config_file_name):
- self.__boxplot_parameter = None
- self.__window_minimum_threshold = None
- self.__window_size = None
- self.__sliding_window_type = None
- self.__n_sigma_parameter = None
- self.__algorithm_type = None
- self.__train_update_duration = None
- self.__log_level = None
- self.__slow_io_detect_frequency = None
- self.__absolute_threshold = None
- self.__train_data_duration = None
- self.__config_file_name = config_file_name
-
- def read_config_from_file(self):
-
- con = configparser.ConfigParser()
- con.read(self.__config_file_name, encoding='utf-8')
-
- items_common = dict(con.items('common'))
- items_algorithm = dict(con.items('algorithm'))
- items_sliding_window = dict(con.items('sliding_window'))
-
- try:
- self.__absolute_threshold = int(items_common.get('absolute_threshold',
- ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD))
- except ValueError:
- self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
- logging.warning('absolute threshold type conversion has error, use default value.')
-
- try:
- self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency',
- ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY))
- except ValueError:
- self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
- logging.warning('slow_io_detect_frequency type conversion has error, use default value.')
-
- self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL)
-
- try:
- self.__train_data_duration = float(items_algorithm.get('train_data_duration',
- ConfigParser.DEFAULT_TRAIN_DATA_DURATION))
- except ValueError:
- self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION
- logging.warning('train_data_duration type conversion has error, use default value.')
-
- try:
- self.__train_update_duration = float(items_algorithm.get('train_update_duration',
- ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION))
- except ValueError:
- self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
- logging.warning('train_update_duration type conversion has error, use default value.')
-
- try:
- self.__algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE)
- except ValueError:
- self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE
- logging.warning('algorithmType type conversion has error, use default value.')
-
- if self.__algorithm_type == 'n_sigma':
- try:
- self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter',
- ConfigParser.DEFAULT_N_SIGMA_PARAMETER))
- except ValueError:
- self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
- logging.warning('n_sigma_parameter type conversion has error, use default value.')
- elif self.__algorithm_type == 'boxplot':
- try:
- self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter',
- ConfigParser.DEFAULT_BOXPLOT_PARAMETER))
- except ValueError:
- self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
- logging.warning('boxplot_parameter type conversion has error, use default value.')
-
- self.__sliding_window_type = items_sliding_window.get('sliding_window_type',
- ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE)
-
- try:
- self.__window_size = int(items_sliding_window.get('window_size',
- ConfigParser.DEFAULT_WINDOW_SIZE))
- except ValueError:
- self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
- logging.warning('window_size type conversion has error, use default value.')
-
- try:
- self.__window_minimum_threshold = (
- int(items_sliding_window.get('window_minimum_threshold',
- ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD)))
- except ValueError:
- self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD
- logging.warning('window_minimum_threshold type conversion has error, use default value.')
-
- def get_slow_io_detect_frequency(self):
- return self.__slow_io_detect_frequency
-
- def get_algorithm_type(self):
- return self.__algorithm_type
-
- def get_sliding_window_type(self):
- return self.__sliding_window_type
-
- def get_train_data_duration_and_train_update_duration(self):
- return self.__train_data_duration, self.__train_update_duration
-
- def get_window_size_and_window_minimum_threshold(self):
- return self.__window_size, self.__window_minimum_threshold
-
- def get_absolute_threshold(self):
- return self.__absolute_threshold
-
- def get_log_level(self):
- return self.__log_level
diff --git a/src/python/setup.py b/src/python/setup.py
index dac6481..9e26a10 100644
--- a/src/python/setup.py
+++ b/src/python/setup.py
@@ -34,7 +34,7 @@ setup(
'xalarmd=xalarm.xalarm_daemon:alarm_process_create',
'sentryCollector=sentryCollector.collectd:main',
'avg_block_io=sentryPlugins.avg_block_io.avg_block_io:main',
- 'ai_threshold_slow_io_detection=sentryPlugins.ai_threshold_slow_io_detection.slow_io_detection:main'
+ 'ai_block_io=sentryPlugins.ai_block_io.ai_block_io:main'
]
},
)
--
2.23.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/pshysimon/sysSentry_xalarm_service.git
git@gitee.com:pshysimon/sysSentry_xalarm_service.git
pshysimon
sysSentry_xalarm_service
sysSentry_xalarm_service
master

搜索帮助