代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/sysSentry 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
From 3d72fa7f517e6e99af1205e965c3775dc23461f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B4=BA=E6=9C=89=E5=BF=97?= <1037617413@qq.com>
Date: Mon, 23 Sep 2024 11:03:26 +0800
Subject: [PATCH] add ai threshold slow io detection to sysSentry
---
.../ai_threshold_slow_io_detection.ini | 16 ++
.../tasks/ai_threshold_slow_io_detection.mod | 5 +
.../test_ai_threshold_slow_io_detection.py | 165 ++++++++++++++++++
.../ai_threshold_slow_io_detection/README.md | 2 +
.../__init__.py | 0
.../alarm_report.py | 49 ++++++
.../config_parser.py | 141 +++++++++++++++
.../data_access.py | 91 ++++++++++
.../detector.py | 48 +++++
.../ai_threshold_slow_io_detection/io_data.py | 74 ++++++++
.../sliding_window.py | 113 ++++++++++++
.../slow_io_detection.py | 133 ++++++++++++++
.../threshold.py | 160 +++++++++++++++++
.../ai_threshold_slow_io_detection/utils.py | 67 +++++++
src/python/setup.py | 3 +-
15 files changed, 1066 insertions(+), 1 deletion(-)
create mode 100644 config/plugins/ai_threshold_slow_io_detection.ini
create mode 100644 config/tasks/ai_threshold_slow_io_detection.mod
create mode 100644 selftest/test/test_ai_threshold_slow_io_detection.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
create mode 100644 src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
diff --git a/config/plugins/ai_threshold_slow_io_detection.ini b/config/plugins/ai_threshold_slow_io_detection.ini
new file mode 100644
index 0000000..44eb928
--- /dev/null
+++ b/config/plugins/ai_threshold_slow_io_detection.ini
@@ -0,0 +1,16 @@
+[common]
+absolute_threshold=40
+slow_io_detect_frequency=1
+log_level=info
+
+[algorithm]
+train_data_duration=0.1
+train_update_duration=0.02
+algorithm_type=n_sigma
+boxplot_parameter=1.5
+n_sigma_parameter=3
+
+[sliding_window]
+sliding_window_type=not_continuous
+window_size=30
+window_minimum_threshold=6
\ No newline at end of file
diff --git a/config/tasks/ai_threshold_slow_io_detection.mod b/config/tasks/ai_threshold_slow_io_detection.mod
new file mode 100644
index 0000000..2729f72
--- /dev/null
+++ b/config/tasks/ai_threshold_slow_io_detection.mod
@@ -0,0 +1,5 @@
+[common]
+enabled=yes
+task_start=/usr/bin/python3 /usr/bin/ai_threshold_slow_io_detection
+task_stop=pkill -f /usr/bin/ai_threshold_slow_io_detection
+type=oneshot
\ No newline at end of file
diff --git a/selftest/test/test_ai_threshold_slow_io_detection.py b/selftest/test/test_ai_threshold_slow_io_detection.py
new file mode 100644
index 0000000..c36fef5
--- /dev/null
+++ b/selftest/test/test_ai_threshold_slow_io_detection.py
@@ -0,0 +1,165 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+import unittest
+import numpy as np
+
+from sentryPlugins.ai_threshold_slow_io_detection.threshold import AbsoluteThreshold, BoxplotThreshold, NSigmaThreshold
+from sentryPlugins.ai_threshold_slow_io_detection.sliding_window import (NotContinuousSlidingWindow,
+ ContinuousSlidingWindow, MedianSlidingWindow)
+
+
+def _get_boxplot_threshold(data_list: list, parameter):
+ q1 = np.percentile(data_list, 25)
+ q3 = np.percentile(data_list, 75)
+ iqr = q3 - q1
+ return q3 + parameter * iqr
+
+
+def _get_n_sigma_threshold(data_list: list, parameter):
+ mean = np.mean(data_list)
+ std = np.std(data_list)
+ return mean + parameter * std
+
+
+class Test(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ print("UnitTest Begin...")
+
+ @classmethod
+ def tearDownClass(cls):
+ print("UnitTest End...")
+
+ def setUp(self):
+ print("Begin...")
+
+ def tearDown(self):
+ print("End...")
+
+ def test_absolute_threshold(self):
+ absolute = AbsoluteThreshold()
+ self.assertEqual(None, absolute.get_threshold())
+ self.assertFalse(absolute.is_abnormal(5000))
+ absolute.set_threshold(40)
+ self.assertEqual(40, absolute.get_threshold())
+ self.assertTrue(absolute.is_abnormal(50))
+
+ def test_boxplot_threshold(self):
+ boxplot = BoxplotThreshold(1.5, 5, 1)
+ # 阶段1:尚未初始化
+ self.assertEqual(None, boxplot.get_threshold())
+ self.assertFalse(boxplot.is_abnormal(5000))
+ # 往boxplot中插入5个元素后,会生成阈值
+ data_list = [20, 20, 20, 30, 10]
+ for data in data_list:
+ boxplot.push_latest_data_to_queue(data)
+ # 阶段2:初始化
+ boxplot_threshold = boxplot.get_threshold()
+ self.assertEqual(_get_boxplot_threshold(data_list, 1.5), boxplot_threshold)
+ self.assertTrue(boxplot.is_abnormal(5000))
+ data_list.pop(0)
+ data_list.append(100)
+ boxplot.push_latest_data_to_queue(100)
+ # 阶段3:更新阈值
+ boxplot_threshold = boxplot.get_threshold()
+ self.assertEqual(_get_boxplot_threshold(data_list, 1.5), boxplot_threshold)
+
+ def test_n_sigma_threshold(self):
+ n_sigma = NSigmaThreshold(3, 5, 1)
+ self.assertEqual(None, n_sigma.get_threshold())
+ self.assertFalse(n_sigma.is_abnormal(5000))
+ data_list = [20, 20, 20, 30, 10]
+ for data in data_list:
+ n_sigma.push_latest_data_to_queue(data)
+ n_sigma_threshold = n_sigma.get_threshold()
+ self.assertEqual(_get_n_sigma_threshold(data_list, 3), n_sigma_threshold)
+ self.assertTrue(n_sigma.is_abnormal(5000))
+ data_list.pop(0)
+ data_list.append(100)
+ n_sigma.push_latest_data_to_queue(100)
+ # 阶段3:更新阈值
+ n_sigma_threshold = n_sigma.get_threshold()
+ self.assertEqual(_get_n_sigma_threshold(data_list, 3), n_sigma_threshold)
+
+ def test_not_continuous_sliding_window(self):
+ not_continuous = NotContinuousSlidingWindow(5, 3)
+ boxplot_threshold = BoxplotThreshold(1.5, 10, 8)
+ boxplot_threshold.attach_observer(not_continuous)
+ data_list1 = [19, 20, 20, 20, 20, 20, 22, 24, 23, 20]
+ for data in data_list1:
+ boxplot_threshold.push_latest_data_to_queue(data)
+ result = not_continuous.is_slow_io_event(data)
+ self.assertFalse(result[0])
+ self.assertEqual(23.75, boxplot_threshold.get_threshold())
+ boxplot_threshold.push_latest_data_to_queue(24)
+ result = not_continuous.is_slow_io_event(24)
+ self.assertFalse(result[0])
+ boxplot_threshold.push_latest_data_to_queue(25)
+ result = not_continuous.is_slow_io_event(25)
+ self.assertTrue(result[0])
+ data_list2 = [20, 20, 20, 20, 20, 20]
+ for data in data_list2:
+ boxplot_threshold.push_latest_data_to_queue(data)
+ result = not_continuous.is_slow_io_event(data)
+ self.assertFalse(result[0])
+ self.assertEqual(25.625, boxplot_threshold.get_threshold())
+
+ def test_continuous_sliding_window(self):
+ continuous = ContinuousSlidingWindow(5, 3)
+ boxplot_threshold = BoxplotThreshold(1.5, 10, 8)
+ boxplot_threshold.attach_observer(continuous)
+ data_list = [19, 20, 20, 20, 20, 20, 22, 24, 23, 20]
+ for data in data_list:
+ boxplot_threshold.push_latest_data_to_queue(data)
+ result = continuous.is_slow_io_event(data)
+ self.assertFalse(result[0])
+ self.assertEqual(23.75, boxplot_threshold.get_threshold())
+ # 没有三个异常点
+ self.assertFalse(continuous.is_slow_io_event(25)[0])
+ # 不连续的三个异常点
+ self.assertFalse(continuous.is_slow_io_event(25)[0])
+ # 连续的三个异常点
+ self.assertTrue(continuous.is_slow_io_event(25)[0])
+
+ def test_median_sliding_window(self):
+ median = MedianSlidingWindow(5, 3)
+ absolute_threshold = AbsoluteThreshold(10, 8)
+ absolute_threshold.attach_observer(median)
+ absolute_threshold.set_threshold(24.5)
+ data_list = [24, 24, 24, 25, 25]
+ for data in data_list:
+ self.assertFalse(median.is_slow_io_event(data)[0])
+ self.assertTrue(median.is_slow_io_event(25)[0])
+
+ def test_parse_collect_data(self):
+ collect = {
+ "read": [1.0, 2.0, 3.0, 4.0],
+ "write": [5.0, 6.0, 7.0, 8.0],
+ "flush": [9.0, 10.0, 11.0, 12.0],
+ "discard": [13.0, 14.0, 15.0, 16.0],
+ }
+ from io_data import BaseData
+ from data_access import _get_io_stage_data
+
+ io_data = _get_io_stage_data(collect)
+ self.assertEqual(
+ io_data.read, BaseData(latency=1.0, io_dump=2.0, io_length=3.0, iops=4.0)
+ )
+ self.assertEqual(
+ io_data.write, BaseData(latency=5.0, io_dump=6.0, io_length=7.0, iops=8.0)
+ )
+ self.assertEqual(
+ io_data.flush, BaseData(latency=9.0, io_dump=10.0, io_length=11.0, iops=12.0)
+ )
+ self.assertEqual(
+ io_data.discard, BaseData(latency=13.0, io_dump=14.0, io_length=15.0, iops=16.0)
+ )
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md b/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md
new file mode 100644
index 0000000..f9b8388
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/README.md
@@ -0,0 +1,2 @@
+# slow_io_detection
+
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
new file mode 100644
index 0000000..3f4f34e
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/alarm_report.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+from syssentry.result import ResultLevel, report_result
+import logging
+import json
+
+
+class AlarmReport:
+ TASK_NAME = "SLOW_IO_DETECTION"
+
+ @staticmethod
+ def report_pass(info: str):
+ report_result(AlarmReport.TASK_NAME, ResultLevel.PASS, json.dumps({"msg": info}))
+ logging.info(f'Report {AlarmReport.TASK_NAME} PASS: {info}')
+
+ @staticmethod
+ def report_fail(info: str):
+ report_result(AlarmReport.TASK_NAME, ResultLevel.FAIL, json.dumps({"msg": info}))
+ logging.info(f'Report {AlarmReport.TASK_NAME} FAIL: {info}')
+
+ @staticmethod
+ def report_skip(info: str):
+ report_result(AlarmReport.TASK_NAME, ResultLevel.SKIP, json.dumps({"msg": info}))
+ logging.info(f'Report {AlarmReport.TASK_NAME} SKIP: {info}')
+
+ @staticmethod
+ def report_minor_alm(info: str):
+ report_result(AlarmReport.TASK_NAME, ResultLevel.MINOR_ALM, json.dumps({"msg": info}))
+ logging.info(f'Report {AlarmReport.TASK_NAME} MINOR_ALM: {info}')
+
+ @staticmethod
+ def report_major_alm(info: str):
+ report_result(AlarmReport.TASK_NAME, ResultLevel.MAJOR_ALM, json.dumps({"msg": info}))
+ logging.info(f'Report {AlarmReport.TASK_NAME} MAJOR_ALM: {info}')
+
+ @staticmethod
+ def report_critical_alm(info: str):
+ report_result(AlarmReport.TASK_NAME, ResultLevel.CRITICAL_ALM, json.dumps({"msg": info}))
+ logging.info(f'Report {AlarmReport.TASK_NAME} CRITICAL_ALM: {info}')
+
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
new file mode 100644
index 0000000..cd4e6f1
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/config_parser.py
@@ -0,0 +1,141 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+import configparser
+import logging
+
+
+class ConfigParser:
+
+ DEFAULT_ABSOLUTE_THRESHOLD = 40
+ DEFAULT_SLOW_IO_DETECTION_FREQUENCY = 1
+ DEFAULT_LOG_LEVEL = 'info'
+ DEFAULT_TRAIN_DATA_DURATION = 24
+ DEFAULT_TRAIN_UPDATE_DURATION = 2
+ DEFAULT_ALGORITHM_TYPE = 'boxplot'
+ DEFAULT_N_SIGMA_PARAMETER = 3
+ DEFAULT_BOXPLOT_PARAMETER = 1.5
+ DEFAULT_SLIDING_WINDOW_TYPE = 'not_continuous'
+ DEFAULT_WINDOW_SIZE = 30
+ DEFAULT_WINDOW_MINIMUM_THRESHOLD = 6
+
+ def __init__(self, config_file_name):
+ self.__boxplot_parameter = None
+ self.__window_minimum_threshold = None
+ self.__window_size = None
+ self.__sliding_window_type = None
+ self.__n_sigma_parameter = None
+ self.__algorithm_type = None
+ self.__train_update_duration = None
+ self.__log_level = None
+ self.__slow_io_detect_frequency = None
+ self.__absolute_threshold = None
+ self.__train_data_duration = None
+ self.__config_file_name = config_file_name
+
+ def read_config_from_file(self):
+
+ con = configparser.ConfigParser()
+ con.read(self.__config_file_name, encoding='utf-8')
+
+ items_common = dict(con.items('common'))
+ items_algorithm = dict(con.items('algorithm'))
+ items_sliding_window = dict(con.items('sliding_window'))
+
+ try:
+ self.__absolute_threshold = int(items_common.get('absolute_threshold',
+ ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD))
+ except ValueError:
+ self.__absolute_threshold = ConfigParser.DEFAULT_ABSOLUTE_THRESHOLD
+ logging.warning('absolute threshold type conversion has error, use default value.')
+
+ try:
+ self.__slow_io_detect_frequency = int(items_common.get('slow_io_detect_frequency',
+ ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY))
+ except ValueError:
+ self.__slow_io_detect_frequency = ConfigParser.DEFAULT_SLOW_IO_DETECTION_FREQUENCY
+ logging.warning('slow_io_detect_frequency type conversion has error, use default value.')
+
+ self.__log_level = items_common.get('log_level', ConfigParser.DEFAULT_LOG_LEVEL)
+
+ try:
+ self.__train_data_duration = float(items_algorithm.get('train_data_duration',
+ ConfigParser.DEFAULT_TRAIN_DATA_DURATION))
+ except ValueError:
+ self.__train_data_duration = ConfigParser.DEFAULT_TRAIN_DATA_DURATION
+ logging.warning('train_data_duration type conversion has error, use default value.')
+
+ try:
+ self.__train_update_duration = float(items_algorithm.get('train_update_duration',
+ ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION))
+ except ValueError:
+ self.__train_update_duration = ConfigParser.DEFAULT_TRAIN_UPDATE_DURATION
+ logging.warning('train_update_duration type conversion has error, use default value.')
+
+ try:
+ self.__algorithm_type = items_algorithm.get('algorithm_type', ConfigParser.DEFAULT_ALGORITHM_TYPE)
+ except ValueError:
+ self.__algorithm_type = ConfigParser.DEFAULT_ALGORITHM_TYPE
+ logging.warning('algorithmType type conversion has error, use default value.')
+
+ if self.__algorithm_type == 'n_sigma':
+ try:
+ self.__n_sigma_parameter = float(items_algorithm.get('n_sigma_parameter',
+ ConfigParser.DEFAULT_N_SIGMA_PARAMETER))
+ except ValueError:
+ self.__n_sigma_parameter = ConfigParser.DEFAULT_N_SIGMA_PARAMETER
+ logging.warning('n_sigma_parameter type conversion has error, use default value.')
+ elif self.__algorithm_type == 'boxplot':
+ try:
+ self.__boxplot_parameter = float(items_algorithm.get('boxplot_parameter',
+ ConfigParser.DEFAULT_BOXPLOT_PARAMETER))
+ except ValueError:
+ self.__boxplot_parameter = ConfigParser.DEFAULT_BOXPLOT_PARAMETER
+ logging.warning('boxplot_parameter type conversion has error, use default value.')
+
+ self.__sliding_window_type = items_sliding_window.get('sliding_window_type',
+ ConfigParser.DEFAULT_SLIDING_WINDOW_TYPE)
+
+ try:
+ self.__window_size = int(items_sliding_window.get('window_size',
+ ConfigParser.DEFAULT_WINDOW_SIZE))
+ except ValueError:
+ self.__window_size = ConfigParser.DEFAULT_WINDOW_SIZE
+ logging.warning('window_size type conversion has error, use default value.')
+
+ try:
+ self.__window_minimum_threshold = (
+ int(items_sliding_window.get('window_minimum_threshold',
+ ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD)))
+ except ValueError:
+ self.__window_minimum_threshold = ConfigParser.DEFAULT_WINDOW_MINIMUM_THRESHOLD
+ logging.warning('window_minimum_threshold type conversion has error, use default value.')
+
+ def get_slow_io_detect_frequency(self):
+ return self.__slow_io_detect_frequency
+
+ def get_algorithm_type(self):
+ return self.__algorithm_type
+
+ def get_sliding_window_type(self):
+ return self.__sliding_window_type
+
+ def get_train_data_duration_and_train_update_duration(self):
+ return self.__train_data_duration, self.__train_update_duration
+
+ def get_window_size_and_window_minimum_threshold(self):
+ return self.__window_size, self.__window_minimum_threshold
+
+ def get_absolute_threshold(self):
+ return self.__absolute_threshold
+
+ def get_log_level(self):
+ return self.__log_level
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
new file mode 100644
index 0000000..d9f3460
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/data_access.py
@@ -0,0 +1,91 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+import json
+import logging
+
+from sentryCollector.collect_plugin import (
+ Result_Messages,
+ get_io_data,
+ is_iocollect_valid,
+)
+from .io_data import IOStageData, IOData
+
+COLLECT_STAGES = [
+ "throtl",
+ "wbt",
+ "gettag",
+ "plug",
+ "bfq",
+ "hctx",
+ "requeue",
+ "rq_driver",
+ "bio",
+ "iocost",
+]
+
+def check_collect_valid(period):
+ data_raw = is_iocollect_valid(period)
+ if data_raw["ret"] == 0:
+ try:
+ data = json.loads(data_raw["message"])
+ except Exception as e:
+ logging.warning(f"get io data failed, {e}")
+ return []
+ return [k for k in data.keys()]
+ else:
+ return []
+
+
+def _get_raw_data(period, disk_list):
+ return get_io_data(
+ period,
+ disk_list,
+ COLLECT_STAGES,
+ ["read", "write", "flush", "discard"],
+ )
+
+
+def _get_io_stage_data(data):
+ io_stage_data = IOStageData()
+ for data_type in ('read', 'write', 'flush', 'discard'):
+ if data_type in data:
+ getattr(io_stage_data, data_type).latency = data[data_type][0]
+ getattr(io_stage_data, data_type).io_dump = data[data_type][1]
+ getattr(io_stage_data, data_type).io_length = data[data_type][2]
+ getattr(io_stage_data, data_type).iops = data[data_type][3]
+ return io_stage_data
+
+
+def get_io_data_from_collect_plug(period, disk_list):
+ data_raw = _get_raw_data(period, disk_list)
+ if data_raw["ret"] == 0:
+ ret = {}
+ try:
+ data = json.loads(data_raw["message"])
+ except json.decoder.JSONDecodeError as e:
+ logging.warning(f"get io data failed, {e}")
+ return None
+
+ for disk in data:
+ disk_data = data[disk]
+ disk_ret = IOData()
+ for k, v in disk_data.items():
+ try:
+ getattr(disk_ret, k)
+ setattr(disk_ret, k, _get_io_stage_data(v))
+ except AttributeError:
+ logging.debug(f'no attr {k}')
+ continue
+ ret[disk] = disk_ret
+ return ret
+ logging.warning(f'get io data failed with message: {data_raw["message"]}')
+ return None
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
new file mode 100644
index 0000000..eda9825
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/detector.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+import logging
+
+from .io_data import MetricName
+from .threshold import Threshold
+from .sliding_window import SlidingWindow
+from .utils import get_metric_value_from_io_data_dict_by_metric_name
+
+
+class Detector:
+ _metric_name: MetricName = None
+ _threshold: Threshold = None
+ _slidingWindow: SlidingWindow = None
+
+ def __init__(self, metric_name: MetricName, threshold: Threshold, sliding_window: SlidingWindow):
+ self._metric_name = metric_name
+ self._threshold = threshold
+ self._slidingWindow = sliding_window
+ self._threshold.attach_observer(self._slidingWindow)
+
+ def get_metric_name(self):
+ return self._metric_name
+
+ def is_slow_io_event(self, io_data_dict_with_disk_name: dict):
+ logging.debug(f'Enter Detector: {self}')
+ metric_value = get_metric_value_from_io_data_dict_by_metric_name(io_data_dict_with_disk_name, self._metric_name)
+ if metric_value > 1e-6:
+ logging.debug(f'Input metric value: {str(metric_value)}')
+ self._threshold.push_latest_data_to_queue(metric_value)
+ detection_result = self._slidingWindow.is_slow_io_event(metric_value)
+ logging.debug(f'Detection result: {str(detection_result)}')
+ logging.debug(f'Exit Detector: {self}')
+ return detection_result
+
+ def __repr__(self):
+ return (f'disk_name: {self._metric_name.get_disk_name()}, stage_name: {self._metric_name.get_stage_name()},'
+ f' access_type_name: {self._metric_name.get_io_access_type_name()},'
+ f' metric_name: {self._metric_name.get_metric_name()}, threshold_type: {self._threshold},'
+ f' sliding_window_type: {self._slidingWindow}')
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py
new file mode 100644
index 0000000..0e17051
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/io_data.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Optional
+
+
+@dataclass
+class BaseData:
+ latency: Optional[float] = field(default_factory=lambda: None)
+ io_dump: Optional[int] = field(default_factory=lambda: None)
+ io_length: Optional[int] = field(default_factory=lambda: None)
+ iops: Optional[int] = field(default_factory=lambda: None)
+
+
+@dataclass
+class IOStageData:
+ read: BaseData = field(default_factory=lambda: BaseData())
+ write: BaseData = field(default_factory=lambda: BaseData())
+ flush: BaseData = field(default_factory=lambda: BaseData())
+ discard: BaseData = field(default_factory=lambda: BaseData())
+
+
+@dataclass
+class IOData:
+ throtl: IOStageData = field(default_factory=lambda: IOStageData())
+ wbt: IOStageData = field(default_factory=lambda: IOStageData())
+ gettag: IOStageData = field(default_factory=lambda: IOStageData())
+ iocost: IOStageData = field(default_factory=lambda: IOStageData())
+ plug: IOStageData = field(default_factory=lambda: IOStageData())
+ bfq: IOStageData = field(default_factory=lambda: IOStageData())
+ hctx: IOStageData = field(default_factory=lambda: IOStageData())
+ requeue: IOStageData = field(default_factory=lambda: IOStageData())
+ rq_driver: IOStageData = field(default_factory=lambda: IOStageData())
+ bio: IOStageData = field(default_factory=lambda: IOStageData())
+ time_stamp: float = field(default_factory=lambda: datetime.now().timestamp())
+
+
+class MetricName:
+ _disk_name: str = None
+ _stage_name: str = None
+ _io_access_type_name: str = None
+ _metric_name: str = None
+
+ def __init__(self, disk_name: str, stage_name: str, io_access_type_name: str, metric_name: str):
+ self._disk_name = disk_name
+ self._stage_name = stage_name
+ self._io_access_type_name = io_access_type_name
+ self._metric_name = metric_name
+
+ def get_disk_name(self):
+ return self._disk_name
+
+ def get_stage_name(self):
+ return self._stage_name
+
+ def get_io_access_type_name(self):
+ return self._io_access_type_name
+
+ def get_metric_name(self):
+ return self._metric_name
+
+ def __repr__(self):
+ return (f'disk: {self._disk_name}, stage: {self._stage_name}, io_access_type: {self._io_access_type_name},'
+ f'metric: {self._metric_name}')
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py
new file mode 100644
index 0000000..d395d48
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/sliding_window.py
@@ -0,0 +1,113 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+from enum import Enum, unique
+import numpy as np
+
+
+@unique
+class SlidingWindowType(Enum):
+ NotContinuousSlidingWindow = 0
+ ContinuousSlidingWindow = 1
+ MedianSlidingWindow = 2
+
+
+class SlidingWindow:
+ _ai_threshold = None
+ _queue_length = None
+ _queue_threshold = None
+ _io_data_queue: list = None
+ _io_data_queue_abnormal_tag: list = None
+
+ def __init__(self, queue_length: int, threshold: int):
+ self._queue_length = queue_length
+ self._queue_threshold = threshold
+ self._io_data_queue = []
+ self._io_data_queue_abnormal_tag = []
+
+ def push(self, data: float):
+ if len(self._io_data_queue) == self._queue_length:
+ self._io_data_queue.pop(0)
+ self._io_data_queue_abnormal_tag.pop(0)
+ self._io_data_queue.append(data)
+ self._io_data_queue_abnormal_tag.append(data >= self._ai_threshold if self._ai_threshold is not None else False)
+
+ def update(self, threshold):
+ if self._ai_threshold == threshold:
+ return
+ self._ai_threshold = threshold
+ self._io_data_queue_abnormal_tag.clear()
+ for data in self._io_data_queue:
+ self._io_data_queue_abnormal_tag.append(data >= self._ai_threshold)
+
+ def is_slow_io_event(self, data):
+ return False, None, None
+
+ def __repr__(self):
+ return "SlidingWindow"
+
+
+class NotContinuousSlidingWindow(SlidingWindow):
+ def is_slow_io_event(self, data):
+ super().push(data)
+ if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None:
+ return False, self._io_data_queue, self._ai_threshold
+ if self._io_data_queue_abnormal_tag.count(True) >= self._queue_threshold:
+ return True, self._io_data_queue, self._ai_threshold
+ return False, self._io_data_queue, self._ai_threshold
+
+ def __repr__(self):
+ return "NotContinuousSlidingWindow"
+
+
+class ContinuousSlidingWindow(SlidingWindow):
+ def is_slow_io_event(self, data):
+ super().push(data)
+ if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None:
+ return False, self._io_data_queue, self._ai_threshold
+ consecutive_count = 0
+ for tag in self._io_data_queue_abnormal_tag:
+ if tag:
+ consecutive_count += 1
+ if consecutive_count >= self._queue_threshold:
+ return True, self._io_data_queue, self._ai_threshold
+ else:
+ consecutive_count = 0
+ return False, self._io_data_queue, self._ai_threshold
+
+ def __repr__(self):
+ return "ContinuousSlidingWindow"
+
+
+class MedianSlidingWindow(SlidingWindow):
+ def is_slow_io_event(self, data):
+ super().push(data)
+ if len(self._io_data_queue) < self._queue_length or self._ai_threshold is None:
+ return False, self._io_data_queue, self._ai_threshold
+ median = np.median(self._io_data_queue)
+ if median >= self._ai_threshold:
+ return True, self._io_data_queue, self._ai_threshold
+ return False, self._io_data_queue, self._ai_threshold
+
+ def __repr__(self):
+ return "MedianSlidingWindow"
+
+
+class SlidingWindowFactory:
+ def get_sliding_window(self, sliding_window_type: SlidingWindowType, *args, **kwargs):
+ if sliding_window_type == SlidingWindowType.NotContinuousSlidingWindow:
+ return NotContinuousSlidingWindow(*args, **kwargs)
+ elif sliding_window_type == SlidingWindowType.ContinuousSlidingWindow:
+ return ContinuousSlidingWindow(*args, **kwargs)
+ elif sliding_window_type == SlidingWindowType.MedianSlidingWindow:
+ return MedianSlidingWindow(*args, **kwargs)
+ else:
+ return NotContinuousSlidingWindow(*args, **kwargs)
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
new file mode 100644
index 0000000..43cf770
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/slow_io_detection.py
@@ -0,0 +1,133 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+
+import time
+import signal
+import logging
+
+from .detector import Detector
+from .threshold import ThresholdFactory, AbsoluteThreshold
+from .sliding_window import SlidingWindowFactory
+from .utils import (get_threshold_type_enum, get_sliding_window_type_enum, get_data_queue_size_and_update_size,
+ get_log_level)
+from .config_parser import ConfigParser
+from .data_access import get_io_data_from_collect_plug, check_collect_valid
+from .io_data import MetricName
+from .alarm_report import AlarmReport
+
+CONFIG_FILE = "/etc/sysSentry/plugins/ai_threshold_slow_io_detection.ini"
+
+
+def sig_handler(signum, frame):
+ logging.info("receive signal: %d", signum)
+ AlarmReport().report_fail(f"receive signal: {signum}")
+ exit(signum)
+
+
+class SlowIODetection:
+ _config_parser = None
+ _disk_list = None
+ _detector_name_list = []
+ _detectors = {}
+
+ def __init__(self, config_parser: ConfigParser):
+ self._config_parser = config_parser
+ self.__set_log_format()
+ self.__init_detector_name_list()
+ self.__init_detector()
+
+ def __set_log_format(self):
+ log_format = "%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
+ log_level = get_log_level(self._config_parser.get_log_level())
+ logging.basicConfig(level=log_level, format=log_format)
+
+ def __init_detector_name_list(self):
+ self._disk_list = check_collect_valid(self._config_parser.get_slow_io_detect_frequency())
+ for disk in self._disk_list:
+ self._detector_name_list.append(MetricName(disk, "bio", "read", "latency"))
+ self._detector_name_list.append(MetricName(disk, "bio", "write", "latency"))
+
+ def __init_detector(self):
+ train_data_duration, train_update_duration = (self._config_parser.
+ get_train_data_duration_and_train_update_duration())
+ slow_io_detection_frequency = self._config_parser.get_slow_io_detect_frequency()
+ threshold_type = get_threshold_type_enum(self._config_parser.get_algorithm_type())
+ data_queue_size, update_size = get_data_queue_size_and_update_size(train_data_duration,
+ train_update_duration,
+ slow_io_detection_frequency)
+ sliding_window_type = get_sliding_window_type_enum(self._config_parser.get_sliding_window_type())
+ window_size, window_threshold = self._config_parser.get_window_size_and_window_minimum_threshold()
+
+ for detector_name in self._detector_name_list:
+ threshold = ThresholdFactory().get_threshold(threshold_type, data_queue_size=data_queue_size,
+ data_queue_update_size=update_size)
+ sliding_window = SlidingWindowFactory().get_sliding_window(sliding_window_type, queue_length=window_size,
+ threshold=window_threshold)
+ detector = Detector(detector_name, threshold, sliding_window)
+ # 绝对阈值的阈值初始化
+ if isinstance(threshold, AbsoluteThreshold):
+ threshold.set_threshold(self._config_parser.get_absolute_threshold())
+ self._detectors[detector_name] = detector
+ logging.info(f"add detector: {detector}")
+
+ def launch(self):
+ while True:
+ logging.debug('step0. AI threshold slow io event detection is looping.')
+
+ # Step1:获取IO数据
+ io_data_dict_with_disk_name = get_io_data_from_collect_plug(
+ self._config_parser.get_slow_io_detect_frequency(), self._disk_list
+ )
+ logging.debug(f'step1. Get io data: {str(io_data_dict_with_disk_name)}')
+ if io_data_dict_with_disk_name is None:
+ continue
+ # Step2:慢IO检测
+ logging.debug('step2. Start to detection slow io event.')
+ slow_io_event_list = []
+ for metric_name, detector in self._detectors.items():
+ result = detector.is_slow_io_event(io_data_dict_with_disk_name)
+ if result[0]:
+ slow_io_event_list.append((detector.get_metric_name(), result))
+ logging.debug('step2. End to detection slow io event.')
+
+ # Step3:慢IO事件上报
+ logging.debug('step3. Report slow io event to sysSentry.')
+ for slow_io_event in slow_io_event_list:
+ metric_name: MetricName = slow_io_event[0]
+ result = slow_io_event[1]
+ AlarmReport.report_major_alm(f"disk {metric_name.get_disk_name()} has slow io event."
+ f"stage: {metric_name.get_metric_name()},"
+ f"type: {metric_name.get_io_access_type_name()},"
+ f"metric: {metric_name.get_metric_name()},"
+ f"current window: {result[1]},"
+ f"threshold: {result[2]}")
+ logging.error(f"slow io event happen: {str(slow_io_event)}")
+
+ # Step4:等待检测时间
+ logging.debug('step4. Wait to start next slow io event detection loop.')
+ time.sleep(self._config_parser.get_slow_io_detect_frequency())
+
+
+def main():
+ # Step1:注册消息处理函数
+ signal.signal(signal.SIGINT, sig_handler)
+ signal.signal(signal.SIGTERM, sig_handler)
+ # Step2:断点恢复
+ # todo:
+
+ # Step3:读取配置
+ config_file_name = CONFIG_FILE
+ config = ConfigParser(config_file_name)
+ config.read_config_from_file()
+
+ # Step4:启动慢IO检测
+ slow_io_detection = SlowIODetection(config)
+ slow_io_detection.launch()
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
new file mode 100644
index 0000000..9e1ca7b
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/threshold.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+import logging
+from enum import Enum
+import queue
+import numpy as np
+import math
+
+from .sliding_window import SlidingWindow
+
+
+class ThresholdState(Enum):
+ INIT = 0
+ START = 1
+
+
+class Threshold:
+ threshold = None
+ data_queue: queue.Queue = None
+ data_queue_update_size: int = None
+ new_data_size: int = None
+ threshold_state: ThresholdState = None
+
+ def __init__(self, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+ self._observer = None
+ self.data_queue = queue.Queue(data_queue_size)
+ self.data_queue_update_size = data_queue_update_size
+ self.new_data_size = 0
+ self.threshold_state = ThresholdState.INIT
+ self.threshold = math.inf
+
+ def set_threshold(self, threshold):
+ self.threshold = threshold
+ self.threshold_state = ThresholdState.START
+ self.notify_observer()
+
+ def get_threshold(self):
+ if self.threshold_state == ThresholdState.INIT:
+ return None
+ return self.threshold
+
+ def is_abnormal(self, data):
+ if self.threshold_state == ThresholdState.INIT:
+ return False
+ return data >= self.threshold
+
+ # 使用观察者模式,当阈值更新时,自动同步刷新滑窗中的阈值
+ def attach_observer(self, observer: SlidingWindow):
+ self._observer = observer
+
+ def notify_observer(self):
+ if self._observer is not None:
+ self._observer.update(self.threshold)
+
+ def push_latest_data_to_queue(self, data):
+ pass
+
+ def __repr__(self):
+ return "Threshold"
+
+
+class AbsoluteThreshold(Threshold):
+ def __init__(self, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+ super().__init__(data_queue_size, data_queue_update_size)
+
+ def push_latest_data_to_queue(self, data):
+ pass
+
+ def __repr__(self):
+ return "AbsoluteThreshold"
+
+
+class BoxplotThreshold(Threshold):
+ def __init__(self, parameter: float = 1.5, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+ super().__init__(data_queue_size, data_queue_update_size)
+ self.parameter = parameter
+
+ def _update_threshold(self):
+ data = list(self.data_queue.queue)
+ q1 = np.percentile(data, 25)
+ q3 = np.percentile(data, 75)
+ iqr = q3 - q1
+ self.threshold = q3 + self.parameter * iqr
+ if self.threshold_state == ThresholdState.INIT:
+ self.threshold_state = ThresholdState.START
+ self.notify_observer()
+
+ def push_latest_data_to_queue(self, data):
+ try:
+ self.data_queue.put(data, block=False)
+ except queue.Full:
+ self.data_queue.get()
+ self.data_queue.put(data)
+ self.new_data_size += 1
+ if (self.data_queue.full() and (self.threshold_state == ThresholdState.INIT or
+ (self.threshold_state == ThresholdState.START and
+ self.new_data_size >= self.data_queue_update_size))):
+ self._update_threshold()
+ self.new_data_size = 0
+
+ def __repr__(self):
+ return "BoxplotThreshold"
+
+
+class NSigmaThreshold(Threshold):
+ def __init__(self, parameter: float = 2.0, data_queue_size: int = 10000, data_queue_update_size: int = 1000):
+ super().__init__(data_queue_size, data_queue_update_size)
+ self.parameter = parameter
+
+ def _update_threshold(self):
+ data = list(self.data_queue.queue)
+ mean = np.mean(data)
+ std = np.std(data)
+ self.threshold = mean + self.parameter * std
+ if self.threshold_state == ThresholdState.INIT:
+ self.threshold_state = ThresholdState.START
+ self.notify_observer()
+
+ def push_latest_data_to_queue(self, data):
+ try:
+ self.data_queue.put(data, block=False)
+ except queue.Full:
+ self.data_queue.get()
+ self.data_queue.put(data)
+ self.new_data_size += 1
+ if (self.data_queue.full() and (self.threshold_state == ThresholdState.INIT or
+ (self.threshold_state == ThresholdState.START and
+ self.new_data_size >= self.data_queue_update_size))):
+ self._update_threshold()
+ self.new_data_size = 0
+
+ def __repr__(self):
+ return "NSigmaThreshold"
+
+
+class ThresholdType(Enum):
+ AbsoluteThreshold = 0
+ BoxplotThreshold = 1
+ NSigmaThreshold = 2
+
+
+class ThresholdFactory:
+ def get_threshold(self, threshold_type: ThresholdType, *args, **kwargs):
+ if threshold_type == ThresholdType.AbsoluteThreshold:
+ return AbsoluteThreshold(*args, **kwargs)
+ elif threshold_type == ThresholdType.BoxplotThreshold:
+ return BoxplotThreshold(*args, **kwargs)
+ elif threshold_type == ThresholdType.NSigmaThreshold:
+ return NSigmaThreshold(*args, **kwargs)
+ else:
+ raise ValueError(f"Invalid threshold type: {threshold_type}")
+
diff --git a/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py b/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
new file mode 100644
index 0000000..f66e5ed
--- /dev/null
+++ b/src/python/sentryPlugins/ai_threshold_slow_io_detection/utils.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+# Copyright (c) 2024 Huawei Technologies Co., Ltd.
+# sysSentry is licensed under the Mulan PSL v2.
+# You can use this software according to the terms and conditions of the Mulan PSL v2.
+# You may obtain a copy of Mulan PSL v2 at:
+# http://license.coscl.org.cn/MulanPSL2
+# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
+# PURPOSE.
+# See the Mulan PSL v2 for more details.
+import logging
+from dataclasses import asdict
+
+from .threshold import ThresholdType
+from .sliding_window import SlidingWindowType
+from .io_data import MetricName, IOData
+
+def get_threshold_type_enum(algorithm_type: str):
+ if algorithm_type.lower() == 'absolute':
+ return ThresholdType.AbsoluteThreshold
+ if algorithm_type.lower() == 'boxplot':
+ return ThresholdType.BoxplotThreshold
+ if algorithm_type.lower() == 'n_sigma':
+ return ThresholdType.NSigmaThreshold
+ logging.info('not found correct algorithm type, use default: boxplot.')
+ return ThresholdType.BoxplotThreshold
+
+
+def get_sliding_window_type_enum(sliding_window_type: str):
+ if sliding_window_type.lower() == 'not_continuous':
+ return SlidingWindowType.NotContinuousSlidingWindow
+ if sliding_window_type.lower() == 'continuous':
+ return SlidingWindowType.ContinuousSlidingWindow
+ if sliding_window_type.lower() == 'median':
+ return SlidingWindowType.MedianSlidingWindow
+ logging.info('not found correct sliding window type, use default: not_continuous.')
+ return SlidingWindowType.NotContinuousSlidingWindow
+
+
+def get_metric_value_from_io_data_dict_by_metric_name(io_data_dict: dict, metric_name: MetricName):
+ try:
+ io_data: IOData = io_data_dict[metric_name.get_disk_name()]
+ io_stage_data = asdict(io_data)[metric_name.get_stage_name()]
+ base_data = io_stage_data[metric_name.get_io_access_type_name()]
+ metric_value = base_data[metric_name.get_metric_name()]
+ return metric_value
+ except KeyError:
+ return None
+
+
+def get_data_queue_size_and_update_size(training_data_duration: float, train_update_duration: float,
+ slow_io_detect_frequency: int):
+ data_queue_size = int(training_data_duration * 60 * 60 / slow_io_detect_frequency)
+ update_size = int(train_update_duration * 60 * 60 / slow_io_detect_frequency)
+ return data_queue_size, update_size
+
+
+def get_log_level(log_level: str):
+ if log_level.lower() == 'debug':
+ return logging.DEBUG
+ elif log_level.lower() == 'info':
+ return logging.INFO
+ elif log_level.lower() == 'warning':
+ return logging.WARNING
+ elif log_level.lower() == 'fatal':
+ return logging.FATAL
+ return None
diff --git a/src/python/setup.py b/src/python/setup.py
index c28c691..dac6481 100644
--- a/src/python/setup.py
+++ b/src/python/setup.py
@@ -33,7 +33,8 @@ setup(
'syssentry=syssentry.syssentry:main',
'xalarmd=xalarm.xalarm_daemon:alarm_process_create',
'sentryCollector=sentryCollector.collectd:main',
- 'avg_block_io=sentryPlugins.avg_block_io.avg_block_io:main'
+ 'avg_block_io=sentryPlugins.avg_block_io.avg_block_io:main',
+ 'ai_threshold_slow_io_detection=sentryPlugins.ai_threshold_slow_io_detection.slow_io_detection:main'
]
},
)
--
2.23.0
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。