From e9ab6e55a904001b40113249a0fafe028af54825 Mon Sep 17 00:00:00 2001 From: suweifeng Date: Fri, 16 Aug 2024 08:22:14 +0000 Subject: [PATCH] adapt for HSAK Signed-off-by: suweifeng (cherry picked from commit 7170ecdb44000556cbd95aff9a65940d60487f85) --- ...SAK-needed-head-file-and-API-to-spdk.patch | 856 +++++ 0004-lib-bdev-Add-bdev-support-for-HSAK.patch | 834 +++++ ...ib-env_dpdk-Add-config-args-for-HSAK.patch | 158 + 0006-lib-nvme-Add-nvme-support-for-HSAK.patch | 1317 ++++++++ ...dev-Add-bdev-module-support-for-HSAK.patch | 2529 +++++++++++++++ ..._cmd_dataset_management-and-delete-s.patch | 122 + 0009-spdk-add-nvme-support-for-HSAK.patch | 1158 +++++++ 0010-Add-CUSE-switch-for-nvme-ctrlr.patch | 54 + ...-serial-vendor-special-opcode-in-CUS.patch | 30 + 0012-adapt-for-spdk-24.01.patch | 2883 +++++++++++++++++ spdk.spec | 39 +- 11 files changed, 9973 insertions(+), 7 deletions(-) create mode 100644 0003-add-HSAK-needed-head-file-and-API-to-spdk.patch create mode 100644 0004-lib-bdev-Add-bdev-support-for-HSAK.patch create mode 100644 0005-lib-env_dpdk-Add-config-args-for-HSAK.patch create mode 100644 0006-lib-nvme-Add-nvme-support-for-HSAK.patch create mode 100644 0007-module-bdev-Add-bdev-module-support-for-HSAK.patch create mode 100644 0008-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch create mode 100644 0009-spdk-add-nvme-support-for-HSAK.patch create mode 100644 0010-Add-CUSE-switch-for-nvme-ctrlr.patch create mode 100644 0011-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch create mode 100644 0012-adapt-for-spdk-24.01.patch diff --git a/0003-add-HSAK-needed-head-file-and-API-to-spdk.patch b/0003-add-HSAK-needed-head-file-and-API-to-spdk.patch new file mode 100644 index 0000000..9ef15cd --- /dev/null +++ b/0003-add-HSAK-needed-head-file-and-API-to-spdk.patch @@ -0,0 +1,856 @@ +From 84d4122c0c11f2f104979e5254dcbbe995acc41c Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Thu, 18 Feb 2021 10:52:24 +0800 +Subject: [PATCH 03/12] add HSAK needed head file and API to spdk + +Signed-off-by: sunshihao +--- + CONFIG | 3 + + Makefile | 6 + + configure | 8 ++ + etc/spdk/nvme.conf.in | 88 ++++++++++++ + include/spdk/bdev.h | 85 +++++++++++ + include/spdk/bdev_module.h | 89 ++++++++++++ + include/spdk/log.h | 2 +- + include/spdk/nvme.h | 230 ++++++++++++++++++++++++++++++ + include/spdk/thread.h | 18 +++ + include/spdk_internal/bdev_stat.h | 63 ++++++++ + include/spdk_internal/debug.h | 43 ++++++ + mk/spdk.app_vars.mk | 4 +- + 12 files changed, 637 insertions(+), 2 deletions(-) + create mode 100644 etc/spdk/nvme.conf.in + create mode 100644 include/spdk_internal/bdev_stat.h + create mode 100644 include/spdk_internal/debug.h + +diff --git a/CONFIG b/CONFIG +index 7b8d56a..a7674ff 100644 +--- a/CONFIG ++++ b/CONFIG +@@ -22,6 +22,9 @@ CONFIG_CROSS_PREFIX= + # Build with debug logging. Turn off for performance testing and normal usage + CONFIG_DEBUG=n + ++# Enable read and write NVMe for application ++CONFIG_APP_RW=n ++ + # Treat warnings as errors (fail the build on any warning). + CONFIG_WERROR=n + +diff --git a/Makefile b/Makefile +index 3aeae41..6ad42d7 100644 +--- a/Makefile ++++ b/Makefile +@@ -10,6 +10,12 @@ S := + SPDK_ROOT_DIR := $(CURDIR) + include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + ++ifeq ($(CONFIG_APP_RW),y) ++# secure compile option ++CFLAGS += -fPIE -pie -fPIC -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror ++CFLAGS += -Wl,-z,relro,-z,now,-z,noexecstack -Wtrampolines ++endif ++ + DIRS-y += lib + DIRS-y += module + DIRS-$(CONFIG_SHARED) += shared_lib +diff --git a/configure b/configure +index 86a6ab3..2177acd 100755 +--- a/configure ++++ b/configure +@@ -33,6 +33,8 @@ function usage() { + echo " is specified, DPDK will detect number of cores in the system during" + echo " compilation, and will set maximum number of lcores to this value" + echo " --enable-debug Configure for debug builds" ++ echo " --enable-err-injc Enable error injection feature" ++ echo " --enable-raw Enable read and write NVMe disk feature." + echo " --enable-werror Treat compiler warnings as errors" + echo " --enable-asan Enable address sanitizer" + echo " --enable-ubsan Enable undefined behavior sanitizer" +@@ -310,6 +312,12 @@ for i in "$@"; do + --disable-debug) + CONFIG[DEBUG]=n + ;; ++ --enable-raw) ++ CONFIG[APP_RW]=y ++ ;; ++ --enable-err-injc) ++ CONFIG[ERR_INJC]=y ++ ;; + --enable-asan) + CONFIG[ASAN]=y + ;; +diff --git a/etc/spdk/nvme.conf.in b/etc/spdk/nvme.conf.in +new file mode 100644 +index 0000000..a3df92b +--- /dev/null ++++ b/etc/spdk/nvme.conf.in +@@ -0,0 +1,88 @@ ++#NVME configuration file ++# ++# Please write all parameters using ASCII. ++# The parameter must be quoted if it includes whitespace. ++# ++# Configuration syntax: ++# Leading whitespace is ignored. ++# Lines starting with '#' are comments. ++# Lines ending with '\' are concatenated with the next line. ++# Bracketed ([]) names define sections ++ ++[Global] ++ # Users can restrict work items to only run on certain cores by specifying a ReactorMask. ++ # Can not specify the NO. 0 core. ++ ReactorMask 0x2 ++ ++ # The print level of log. ++ # 0: Print ERROR log only; 1: Print WARNING and ERROR log; and so on, 4: Print all level log ++ LogLevel 1 ++ ++ # The sizes of Memory for Libstorge(Unit: MB). The minimum value is 300MB. ++ # If parameter "SocketMem" was set corrected, MemSize was useless ++ MemSize 300 ++ ++ # The same block device supports multiple queues. ++ MultiQ No ++ ++ # End-to-end data protection. This item is only used if the namespace is formatted to use end-to-end protection information. ++ # if the value is set to '1', then the protection information are generated by controller, and the logical block data and protection information are written to NVM. ++ # if the value is set to '2', then the protection information are transferred from the host buffer to NVM. ++ E2eDif 2 ++ ++ # Open IOstat or not ++ IoStat No ++ ++ # Poll time threshold in millisecond, It will count exceptional polling thread call which duration exceed the value and display in stat report. ++ # This item is only used when UseReactor = No, Set to 0 means disable this measurement. ++ PollTime 0 ++ ++ # Preallocate specified amounts of memory(Unit: MB) per socket. ++ # The parameter is a comma-sprated list of values, For example: ++ # SocketMem 1024,2048 ++ # This will allocate 1 gigabyte of memory on socket 0, and 2048 megabytes of memory on socket 1. ++ # The sum of socket memory must be greater than 300MB. ++ # if SocketMem was set corrected, The parameter "MemSize" was useless ++ # SocketMem 300 ++ ++ # Place a per-socket upper limit on memory use (non-legacy memory mode only). ++ # 0 will disable the limit for a particular socket. ++ # SocketLimit 1024,1 ++ # This will set upper limit of 1 gigabyte on socket 0, and 1 megabytes of memory on socket 1. ++ # if the value is set to empty, means disable the limit per socket. ++ # if SocketMem was empty, the parameter was useless. ++ # SocketLimit 300 ++ ++ #Decide whether to start rpc server or not ++ RpcServer Yes ++ ++# NVMe configuration options ++[Nvme] ++ # NVMe Device Whitelist ++ # Users may specify which NVMe devices to claim by their transport id. ++ # See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format. ++ # The second argument is the assigned name, which can be referenced from ++ # other sections in the configuration file. For NVMe devices, a namespace ++ # is automatically appended to each name in the format nY, where ++ # Y is the NSID (starts at 1). ++ #TransportID "trtype:PCIe traddr:0000:81:00.0" nvme0 ++ #TransportID "trtype:PCIe traddr:0000:01:00.0" nvme1 ++ ++ # The number of attempts per I/O when an I/O fails. Do not include ++ # this key to get the default behavior. ++ RetryCount 4 ++ # Timeout for each command, in microseconds. If 0, don't track timeouts. ++ TimeoutUsec 0 ++ # Action to take on command time out. Only valid when Timeout is greater ++ # than 0. This may be 'Reset' to reset the controller, 'Abort' to abort ++ # the command, or 'None' to just print a message but do nothing. ++ # Admin command timeouts will always result in a reset. ++ ActionOnTimeout None ++ # Set how often the admin queue is polled for asynchronous events. ++ # Units in microseconds. ++ AdminPollRate 100000 ++ ++[Reactor] ++ # Batch size of IO for one-time release by reactor. ++ # The maximum value is 32. ++ BatchSize 8 +diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h +index 02fc065..a0a5adb 100644 +--- a/include/spdk/bdev.h ++++ b/include/spdk/bdev.h +@@ -27,6 +27,8 @@ extern "C" { + + #define SPDK_BDEV_SMALL_BUF_MAX_SIZE 8192 + #define SPDK_BDEV_LARGE_BUF_MAX_SIZE (64 * 1024) ++#define SPDK_BDEV_SMALL_BUF_WITH_MAX_MD 512 ++#define SPDK_BDEV_LARGE_BUF_WITH_MAX_MD 1024 + + #define SPDK_BDEV_MAX_INTERLEAVED_MD_SIZE (64) + +@@ -94,6 +96,42 @@ enum spdk_bdev_status { + SPDK_BDEV_STATUS_REMOVING, + }; + ++#ifdef SPDK_CONFIG_APP_RW ++/** ns status */ ++enum spdk_bdev_ns_status { ++ SPDK_BDEV_NS_STATUS_INVALID, ++ SPDK_BDEV_NS_STATUS_READY, ++ SPDK_BDEV_NS_STATUS_REMOVING, ++ SPDK_BDEV_NS_STATUS_UNREGISTER, ++}; ++ ++typedef void (*LIBSTORAGE_CALLBACK_FUNC)(int32_t cb_status, int32_t sct_code, void *cb_arg); ++ ++typedef struct libstorage_io { ++ uint8_t *buf; ++ struct iovec *iovs; /* array of iovecs to transfer */ ++ int iovcnt; /* Number of iovecs in iovs array */ ++ int32_t fd; /* File Descriptor */ ++ uint16_t opcode; /* r/w */ ++ uint16_t streamId; /* Stream ID for IO */ ++ uint8_t pi_action; ++ uint8_t fua; ++ uint8_t location; ++ bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */ ++ uint32_t count; ++ uint32_t nbytes; ++ uint64_t offset; ++ uint8_t *md_buf; ++ uint32_t md_len; ++ uint32_t magic; ++ /*Save the error code returned by the callback */ ++ int32_t err; ++ int32_t reserved; ++ LIBSTORAGE_CALLBACK_FUNC cb; ++ void *cb_arg; ++} LIBSTORAGE_IO_T; ++#endif ++ + /** + * \brief Handle to an opened SPDK block device. + */ +@@ -122,6 +160,13 @@ enum spdk_bdev_io_type { + SPDK_BDEV_IO_TYPE_SEEK_DATA, + SPDK_BDEV_IO_TYPE_COPY, + SPDK_BDEV_IO_TYPE_NVME_IOV_MD, ++#ifdef SPDK_CONFIG_APP_RW ++ SPDK_BDEV_IO_TYPE_READ_NVME, ++ SPDK_BDEV_IO_TYPE_WRITE_NVME, ++ SPDK_BDEV_IO_TYPE_READV_NVME, ++ SPDK_BDEV_IO_TYPE_WRITEV_NVME, ++ SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS, ++#endif + SPDK_BDEV_NUM_IO_TYPES /* Keep last */ + }; + +@@ -176,6 +221,14 @@ struct spdk_bdev_io_stat { + uint64_t max_copy_latency_ticks; + uint64_t min_copy_latency_ticks; + uint64_t ticks_rate; ++#ifdef SPDK_CONFIG_APP_RW ++ int io_stat_id; ++ uint64_t io_ticks; ++ uint64_t pre_ticks; ++ uint64_t cur_ticks; ++ uint64_t start_tsc; ++ uint64_t interval_tsc; ++#endif + + /* This data structure is privately defined in the bdev library. + * This data structure is only used by the bdev_get_iostat RPC now. +@@ -1574,6 +1627,38 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + ++#ifdef SPDK_CONFIG_APP_RW ++/** ++ * Submit an unmap request to the block device. Unmap is sometimes also called trim or ++ * deallocate. This notifies the device that the data in the blocks described is no ++ * longer valid. Reading blocks that have been unmapped results in indeterminate data. ++ * ++ * \param bdev Block device description ++ * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). ++ * \param unmap_d An array of unmap descriptors. ++ * \param bdesc_count The number of elements in unmap_d. ++ * \param cb Called when the request is complete. ++ * \param cb_arg Argument passed to cb. ++ * ++ * \return 0 on success. On success, the callback will always ++ * be called (even if the request ultimately failed). Return ++ * negated errno on failure, in which case the callback will not be called. ++ */ ++int ++spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, ++ void *unmap_d, uint16_t unmap_count, ++ spdk_bdev_io_completion_cb cb, void *cb_arg); ++ ++void* ++spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); ++ ++void* ++spdk_bdev_io_get_pool(size_t nbytes); ++ ++bool ++spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh); ++#endif ++ + /** + * Submit an unmap request to the block device. Unmap is sometimes also called trim or + * deallocate. This notifies the device that the data in the blocks described is no +diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h +index b5dd04b..dbd7d0c 100644 +--- a/include/spdk/bdev_module.h ++++ b/include/spdk/bdev_module.h +@@ -367,8 +367,67 @@ struct spdk_bdev_fn_table { + + /** Check if bdev can handle spdk_accel_sequence to handle I/O of specific type. */ + bool (*accel_sequence_supported)(void *ctx, enum spdk_bdev_io_type type); ++ ++#ifdef SPDK_CONFIG_APP_RW ++ uint16_t (*get_io_channel_id)(struct spdk_io_channel *ch); ++ ++ int (*bdev_poll_rsp)(void *pollCh); ++ ++ uint64_t (*get_timeout_count)(struct spdk_io_channel *ch); ++#endif ++}; ++ ++#ifdef SPDK_CONFIG_APP_RW ++static inline void spdk_bdev_set_io_location(void *bdev_ctx, uint8_t location) ++{ ++ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bdev_ctx); ++ uint8_t *ioLoc = (uint8_t *)bdev_io->internal.caller_ctx; ++ *ioLoc = location; ++} ++ ++enum spdk_bdev_driver_ctx { ++ SPDK_BDEV_IO_ACTION_PI, ++ SPDK_BDEV_IO_ACTION_FUA, ++ SPDK_BDEV_IO_STREAM_ID_0, ++ SPDK_BDEV_IO_STREAM_ID_1, ++}; ++ ++enum spdk_bdev_io_e2e_pi_action{ ++ IO_NO_PROTECTION = 0, ++ IO_HALF_WAY_PROTECTION = 1, ++ IO_E2E_PROTECTION = 2 + }; + ++#define FLAG_NO_REF 0x10//bit 4 : 1, disable ctrl ref tag check; 0, enable check ++#define FLAG_CALCRC 0x08//bit 3 : 1, libstorage calculate crc; 0, app calculate crc ++#define FLAG_PRCHK 0x04//bit 2 : 1, enable ctrl guard crc check; 0, disable check ++ ++enum spdk_bdev_io_fua{ ++ IO_FUA_NO = 0, ++ IO_FUA_YES = 1 ++}; ++ ++void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); ++ ++void spdk_bdev_fail_ctrlr(const char* traddr); ++ ++void *nvme_channel_get_group(void *io_ch); ++ ++enum reqLocation_E ++{ ++ LOCAL_RECEIVE_APP = 1, ++ LOCAL_LIBSTORAGE_SUBMIT = 2, ++ LOCAL_LIBSTORAGE_ASYNC_REQ = 3, ++ LOCAL_LIBSTORAGE_BDEV_NVME = 4, ++ LOCAL_LIBSTORAGE_HUNG_REQ = 5, ++ LOCAL_LIBSTORAGE_TO_DISK = 6, ++ LOCAL_LIBSTORAGE_FROM_DISK = 7, ++ LOCAL_LIBSTORAGE_CALLBACK = 8, ++ LOCAL_LIBSTORAGE_SUBMIT_RETRY = 9, ++ LOCAL_LIBSTORAGE_BDEV_NOMEM = 10, ++}; ++#endif ++ + /** bdev I/O completion status */ + enum spdk_bdev_io_status { + SPDK_BDEV_IO_STATUS_AIO_ERROR = -8, +@@ -630,6 +689,10 @@ struct spdk_bdev { + /** True if the state of the QoS is being modified */ + bool qos_mod_in_progress; + ++#ifdef SPDK_CONFIG_APP_RW ++ enum spdk_bdev_ns_status ns_status; ++#endif ++ + /** + * SPDK spinlock protecting many of the internal fields of this structure. If + * multiple locks need to be held, the following order must be used: +@@ -815,6 +878,11 @@ struct spdk_bdev_io { + /* Sequence of accel operations */ + struct spdk_accel_sequence *accel_sequence; + ++#ifdef SPDK_CONFIG_APP_RW ++ /* The number of bytes to transfer */ ++ size_t nbytes; ++#endif ++ + /** stored user callback in case we split the I/O and use a temporary callback */ + spdk_bdev_io_completion_cb stored_user_cb; + +@@ -904,6 +972,27 @@ struct spdk_bdev_io { + /* The data buffer */ + void *buf; + } zone_mgmt; ++#ifdef SPDK_CONFIG_APP_RW ++ struct { ++ /* The data buffer to transfer */ ++ void *buf; ++ ++ /* The meta data buffer to transfer */ ++ void *md_buf; ++ ++ /** Total size of data(in blocks) to be transferred. */ ++ uint64_t num_blocks; ++ ++ /* The number of bytes to transfer */ ++ size_t nbytes; ++ ++ /** Starting offset (in blocks) of the bdev for this I/O. */ ++ size_t offset_blocks; ++ ++ /* meta data buffer size to transfer */ ++ size_t md_len; ++ } contig; ++#endif + } u; + + /** It may be used by modules to put the bdev_io into its own list. */ +diff --git a/include/spdk/log.h b/include/spdk/log.h +index 61520d4..b39891f 100644 +--- a/include/spdk/log.h ++++ b/include/spdk/log.h +@@ -154,7 +154,7 @@ enum spdk_log_level spdk_log_get_print_level(void); + * \param format Format string to the message. + */ + void spdk_log(enum spdk_log_level level, const char *file, const int line, const char *func, +- const char *format, ...) __attribute__((__format__(__printf__, 5, 6))); ++ const char *format, ...) __attribute__((weak)) __attribute__((__format__(__printf__, 5, 6))); + + /** + * Same as spdk_log except that instead of being called with variable number of +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index a0b3bd2..d3d3e10 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -3066,6 +3066,7 @@ enum spdk_nvme_ns_flags { + part of the logical block that it is associated with */ + SPDK_NVME_NS_WRITE_UNCORRECTABLE_SUPPORTED = 1 << 6, /**< The write uncorrectable command is supported */ + SPDK_NVME_NS_COMPARE_SUPPORTED = 1 << 7, /**< The compare command is supported */ ++ SPDK_NVME_NS_DPS_PI_MDSTART = 0x100 /**< protection info transferred at start of metadata */ + }; + + /** +@@ -4242,6 +4243,235 @@ struct spdk_nvme_transport_ops { + */ + void spdk_nvme_transport_register(const struct spdk_nvme_transport_ops *ops); + ++#ifdef SPDK_CONFIG_APP_RW ++struct nvme_ctrlr_info { ++ char ctrlName[16]; ++ char pciAddr[24]; ++ uint64_t tnvmcap; /* Total NVM Capacity in bytes */ ++ uint64_t unvmcap; /* Unallocated NVM Capacity in bytes */ ++ int8_t sn[20]; /* Serial number */ ++ int8_t mn[40]; /* Model number */ ++ uint8_t fr[8]; /* Firmware revision */ ++ uint32_t max_num_ns; /* Number of namespaces */ ++ uint32_t version; /* Version of the NVM Express specification that the controller implementation supports */ ++ uint16_t num_io_queues; /* num of io queues */ ++ uint16_t io_queue_size; /* io queue size */ ++ uint16_t device_id; /* Device id */ ++ uint16_t subdevice_id; /* Subsystem device id */ ++ uint16_t vid; /* Vendor id */ ++ uint16_t ssvid; /* Subsystem vendor id */ ++ uint16_t ctrlid; /* Controller id */ ++ uint16_t trtype; /* Transport type */ ++ uint16_t support_ns :1; /* Supports the Namespace Management and Namespace Attachment commands */ ++ uint16_t directives :1; /* Supports Directives */ ++ uint16_t streams :1; /* Supports Streams Directives */ ++ uint16_t dsm :1; /* Supports the controller supports the Dataset Management command */ ++ uint16_t reserved :12; ++ uint16_t reserved2[3]; ++}; ++ ++struct nvme_ctrlr; ++struct nvme_bdev_ctrlr; ++struct spdk_bdev; ++struct nvme_bdev; ++struct spdk_nvme_ns; ++struct spdk_nvme_qpair; ++int32_t nvme_ctrlr_get_info(const char* ctrlName, struct nvme_ctrlr_info** ppCtrlr); ++struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_name(const char* ctrlname); ++struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr); ++struct nvme_bdev_ctrlr* nvme_ctrlr_get_by_name(const char* ctrlname); ++void nvme_ctrlr_clear_iostat_by_name(const char* ctrlname); ++void nvme_ctrlr_clear_iostat_all(void); ++struct nvme_bdev_ctrlr* bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev); ++struct spdk_nvme_ns* bdev_nvme_get_ns(struct nvme_bdev *nbdev); ++void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr); ++int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_ctrlr, uint32_t nsid); ++bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid); ++void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid); ++void spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown); ++bool spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr); ++int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, struct spdk_nvme_health_information_page *health_payload); ++int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, struct spdk_nvme_error_information_entry *error_info); ++struct spdk_nvme_ctrlr_opts* spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr); ++int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata); ++bool spdk_nvme_ns_is_allocated(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid); ++bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr); ++bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); ++bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); ++bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); ++bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload); ++int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); ++int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload); ++int32_t spdk_nvme_ns_ret_streams_param(struct spdk_nvme_ns *ns, void *payload); ++int32_t spdk_nvme_ns_get_streams_status(struct spdk_nvme_ns *ns, void *payload); ++int32_t spdk_nvme_ns_alloc_streams_res(struct spdk_nvme_ns *ns, uint16_t nsr); ++int32_t spdk_nvme_ns_release_streams_id(struct spdk_nvme_ns *ns, uint16_t streamsId); ++int32_t spdk_nvme_ns_release_streams_res(struct spdk_nvme_ns *ns); ++void spdk_nvme_use_streams(bool use); ++ ++/** ++ * \brief Get the ctrlr is_failed state, for an I/O sent to the given namespace. ++ * ++ * This function is thread safe and can be called at any point while the controller is attached to ++ * the SPDK NVMe driver. ++ */ ++bool spdk_nvme_ns_ctrl_is_failed(struct spdk_nvme_ns *ns); ++#define NVME_MAX_CONTROLLERS 1024 ++ ++/* check nvme whether exist by access cc register */ ++bool nvme_ctrlr_is_exist(struct spdk_nvme_ctrlr *ctrlr); ++ ++/* create ctrlr for new added device */ ++int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, const char *base_name, ++ const char **names, size_t *count, const char *hostnqn); ++ ++int spdk_nvme_detach_ublock(struct spdk_nvme_ctrlr *ctrlr); ++void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr); ++ ++#define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1 ++#define SPDK_NVME_UEVENT_SUBSYSTEM_NVME 2 ++ ++enum spdk_nvme_uevent_action { ++ SPDK_NVME_UEVENT_ADD = 0, ++ SPDK_NVME_UEVENT_REMOVE = 1, ++}; ++ ++struct spdk_uevent { ++ /* remove or add */ ++ enum spdk_nvme_uevent_action action; ++ int subsystem; ++ /* pci address of device */ ++ char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; ++}; ++ ++/* make a socket to get uevent */ ++int nvme_uevent_connect(void); ++ ++/* get uevent from socket fd */ ++int nvme_get_uevent(int fd, struct spdk_uevent *uevent); ++ ++/* blocked to get uevent from socket fd */ ++int nvme_get_uevent_block(int fd, struct spdk_uevent *uevent); ++ ++/** ++ * @Description: bind device with pci_addr to driver ++ * @param pci_addr: device's pci_addr,like "0000:08:00.0" ++ * @param driver: driver name which device bind to ++ */ ++int32_t spdk_rebind_driver(char *pci_addr, char *driver_name); ++ ++/** ++ * \brief True if the protection information transferred at the start of metadata ++ * when end-to-end data protection enabled. ++ * ++ * This function is thread safe and can be called at any point while the controller is attached to ++ * the SPDK NVMe driver. ++ */ ++bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns); ++ ++/** ++ * \brief True if the namespace supports Dataset Management command. ++ * ++ * This function is thread safe and can be called at any point while the controller is attached to ++ * the SPDK NVMe driver. ++ */ ++bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns); ++ ++/** ++ * Submit a data set management request to the specified NVMe namespace. Data set ++ * management operations are designed to optimize interaction with the block ++ * translation layer inside the device. The most common type of operation is ++ * deallocate, which is often referred to as TRIM or UNMAP. ++ * ++ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). ++ * The user must ensure that only one thread submits I/O on a given qpair at any ++ * given time. ++ * ++ * This is a convenience wrapper that will automatically allocate and construct ++ * the correct data buffers. Therefore, ranges does not need to be allocated from ++ * pinned memory and can be placed on the stack. If a higher performance, zero-copy ++ * version of DSM is required, simply build and submit a raw command using ++ * spdk_nvme_ctrlr_cmd_io_raw(). ++ * ++ * \param ns NVMe namespace to submit the DSM request ++ * \param type A bit field constructed from \ref spdk_nvme_dsm_attribute. ++ * \param qpair I/O queue pair to submit the request ++ * \param ranges An array of \ref spdk_nvme_dsm_range elements describing the LBAs ++ * to operate on. ++ * \param num_ranges The number of elements in the ranges array. ++ * \param cb_fn Callback function to invoke when the I/O is completed ++ * \param cb_arg Argument to pass to the callback function ++ * ++ * \return 0 if successfully submitted, negated POSIX errno values otherwise. ++ */ ++int spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ uint32_t type, ++ const struct spdk_nvme_dsm_range *ranges, ++ uint16_t num_ranges, ++ spdk_nvme_cmd_cb cb_fn, ++ void *cb_arg); ++/** ++ * \brief Submits a write I/O to the specified NVMe namespace. ++ * ++ * \param ns NVMe namespace to submit the write I/O ++ * \param qpair I/O queue pair to submit the request ++ * \param lba starting LBA to write the data ++ * \param lba_count length (in sectors) for the write operation ++ * \param streamId The stream id for write I/O ++ * \param cb_fn callback function to invoke when the I/O is completed ++ * \param cb_arg argument to pass to the callback function ++ * \param io_flags set flags, defined in nvme_spec.h, for this I/O ++ * \param reset_sgl_fn callback function to reset scattered payload ++ * \param next_sge_fn callback function to iterate each scattered ++ * payload memory segment ++ * ++ * \return 0 if successfully submitted, ENOMEM if an nvme_request ++ * structure cannot be allocated for the I/O request ++ * ++ * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). ++ * The user must ensure that only one thread submits I/O on a given qpair at any given time. ++ */ ++int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ uint64_t lba, uint32_t lba_count, uint16_t streamId, ++ spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, ++ spdk_nvme_req_reset_sgl_cb reset_sgl_fn, ++ spdk_nvme_req_next_sge_cb next_sge_fn); ++ ++/** ++ * \brief Send comman to NVMe controller to start or abort a self-test operation. ++ * ++ * \param ctrlr NVMe controller to operate self-test command. ++ * \param nsid Depending on the log page, this may be 0, a namespace identifier, or SPDK_NVME_GLOBAL_NS_TAG. ++ * \param stc self-test code, which specifies the action taken by the Device Self-test command. ++ * \param payload The pointer to the payload buffer. it doesn't work actually. ++ * \param payload_size The size of payload buffer. it doesn't work actually. ++ * \param cb_fn Callback function to invoke when the feature has been retrieved. ++ * \param cb_arg Argument to pass to the callback function. ++ * ++ * \return 0 if successfully submitted, ENOMEM if resources could not be allocated for this request ++ * ++ * This function is thread safe and can be called at any point while the controller is attached to ++ * the SPDK NVMe driver. ++ * ++ * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion ++ * of commands submitted through this function. ++ * ++ * \sa spdk_nvme_ctrlr_cmd_self_test_operation() ++ */ ++int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, uint32_t stc, ++ void *payload, uint32_t payload_size, ++ spdk_nvme_cmd_cb cb_fn, void *cb_arg); ++ ++/** ++ *\get I/O queue pair id ++ *\param qpair I/O queue pair to submit the request ++ *\ ++ *\return I/O queue pair id ++ */ ++uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); ++#endif ++ + /* + * Macro used to register new transports. + */ +diff --git a/include/spdk/thread.h b/include/spdk/thread.h +index 2f520c2..5f239a5 100644 +--- a/include/spdk/thread.h ++++ b/include/spdk/thread.h +@@ -22,6 +22,9 @@ + #include "spdk/env.h" + #include "spdk/util.h" + #include "spdk/likely.h" ++#ifdef SPDK_CONFIG_APP_RW ++#include "rte_config.h" ++#endif + + #ifdef __cplusplus + extern "C" { +@@ -41,6 +44,21 @@ enum spdk_thread_poller_rc { + */ + struct spdk_thread; + ++#ifdef SPDK_CONFIG_APP_RW ++struct spdk_iodev_thread_info { ++ struct spdk_thread *thread; ++ volatile int32_t state; ++ uint32_t bdevnum; ++}; ++extern struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE]; ++ ++void spdk_reactors_use(bool useOrNot); ++ ++bool spdk_get_reactor_type(void); ++ ++void spdk_set_thread_exited(struct spdk_thread *thread); ++#endif ++ + /** + * A function repeatedly called on the same spdk_thread. + */ +diff --git a/include/spdk_internal/bdev_stat.h b/include/spdk_internal/bdev_stat.h +new file mode 100644 +index 0000000..f1ba1df +--- /dev/null ++++ b/include/spdk_internal/bdev_stat.h +@@ -0,0 +1,63 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++*/ ++ ++#ifndef LIBSTORAGE_STAT_H ++#define LIBSTORAGE_STAT_H ++ ++#include ++#include ++#include ++#include ++ ++//share memory file name ++#define LIBSTORAGE_STAT_SHM_FILE_NAME "libstorage_stat.shm.\ ++49ce4ec241e017c65812b71b9832a50865f0b7d9b4d5f18d3d03283b" ++ ++//max number of channel+bdev ++#define STAT_MAX_NUM 8192 ++ ++extern int32_t g_libstorage_iostat; ++extern int32_t g_polltime_threshold; ++ ++extern pthread_mutex_t *g_io_stat_map_mutex; ++ ++/* libstorage iostat status */ ++enum libstorage_iostat_status { ++ LIBSTORAGE_IOSTAT_DISABLE = 0, ++ LIBSTORAGE_IOSTAT_ENABLE = 1, ++ LIBSTORAGE_IOSTAT_QUERY = 2, ++}; ++ ++struct libstorage_bdev_io_stat ++{ ++ bool used; ++ uint16_t channel_id; ++ char bdev_name[24]; ++ uint64_t num_read_ops; ++ uint64_t num_write_ops; ++ uint64_t bytes_read; ++ uint64_t bytes_written; ++ uint64_t io_outstanding; ++ uint64_t read_latency_ticks; ++ uint64_t write_latency_ticks; ++ uint64_t io_ticks; ++ bool poll_time_used; ++ uint64_t num_poll_timeout; ++}; ++ ++extern struct libstorage_bdev_io_stat *g_io_stat_map; ++ ++int libstorage_stat_init(void); ++ ++int libstorage_stat_exit(void); ++#endif +diff --git a/include/spdk_internal/debug.h b/include/spdk_internal/debug.h +new file mode 100644 +index 0000000..5d6e623 +--- /dev/null ++++ b/include/spdk_internal/debug.h +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++*/ ++ ++#ifndef LIBSTORAGE_INTERNAL_DEBUG_H ++#define LIBSTORAGE_INTERNAL_DEBUG_H ++#include "spdk/stdinc.h" ++ ++struct spdk_debug_subsystem ++{ ++ const char *name; ++ void (*output)(FILE *file); ++ TAILQ_ENTRY(spdk_debug_subsystem) tailq; ++}; ++ ++void spdk_add_debug_subsystem(struct spdk_debug_subsystem *subsystem); ++ ++/** ++ * \brief Register a new subsystem ++ */ ++#define SPDK_DEBUG_REGISTER(_name, _output) \ ++ struct spdk_debug_subsystem __spdk_debug_subsystem_ ## _name = \ ++ { \ ++ .name = #_name, \ ++ .output = _output, \ ++ }; \ ++ __attribute__((constructor)) static void _name ## _debug_register(void) \ ++ { \ ++ spdk_add_debug_subsystem(&__spdk_debug_subsystem_ ## _name); \ ++ } ++ ++void spdk_output_debug_info(void); ++ ++#endif +diff --git a/mk/spdk.app_vars.mk b/mk/spdk.app_vars.mk +index 4b3d0f9..944f597 100644 +--- a/mk/spdk.app_vars.mk ++++ b/mk/spdk.app_vars.mk +@@ -20,8 +20,10 @@ SPDK_LIB_LINKER_ARGS = \ + -L$(SPDK_ROOT_DIR)/build/lib \ + -Wl,--whole-archive \ + -Wl,--no-as-needed \ ++ -Wl,-Bstatic \ + $(SPDK_DEPLIB_LIST:%=-lspdk_%) \ +- -Wl,--no-whole-archive ++ -Wl,--no-whole-archive \ ++ -Wl,-Bdynamic + + # This is primarily used for unit tests to ensure they link when shared library + # build is enabled. Shared libraries can't get their mock implementation from +-- +2.43.0 + diff --git a/0004-lib-bdev-Add-bdev-support-for-HSAK.patch b/0004-lib-bdev-Add-bdev-support-for-HSAK.patch new file mode 100644 index 0000000..bbc1cbc --- /dev/null +++ b/0004-lib-bdev-Add-bdev-support-for-HSAK.patch @@ -0,0 +1,834 @@ +From 173dc95db4aa75f0f5a8334c16896c854a2c305b Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Thu, 18 Feb 2021 16:49:16 +0800 +Subject: [PATCH 04/12] lib/bdev: Add bdev support for HSAK + +Signed-off-by: sunshihao +--- + include/spdk/bdev.h | 21 ++- + include/spdk/bdev_module.h | 9 +- + include/spdk/nvme.h | 41 +++--- + include/spdk_internal/bdev_stat.h | 14 +- + include/spdk_internal/debug.h | 5 +- + lib/bdev/Makefile | 1 + + lib/bdev/bdev.c | 136 ++++++++++++++++++- + lib/bdev/bdev_internal.h | 18 +++ + lib/bdev/bdev_self.c | 217 ++++++++++++++++++++++++++++++ + 9 files changed, 411 insertions(+), 51 deletions(-) + create mode 100644 lib/bdev/bdev_self.c + +diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h +index a0a5adb..a8e9131 100644 +--- a/include/spdk/bdev.h ++++ b/include/spdk/bdev.h +@@ -109,23 +109,22 @@ typedef void (*LIBSTORAGE_CALLBACK_FUNC)(int32_t cb_status, int32_t sct_code, vo + + typedef struct libstorage_io { + uint8_t *buf; +- struct iovec *iovs; /* array of iovecs to transfer */ +- int iovcnt; /* Number of iovecs in iovs array */ +- int32_t fd; /* File Descriptor */ +- uint16_t opcode; /* r/w */ +- uint16_t streamId; /* Stream ID for IO */ ++ struct iovec *iovs; /* array of iovecs to transfer */ ++ int iovcnt; /* Number of iovecs in iovs array */ ++ int32_t fd; /* File Descriptor */ ++ uint16_t opcode; /* r/w */ ++ uint16_t streamId; /* Stream ID for IO */ + uint8_t pi_action; + uint8_t fua; + uint8_t location; +- bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */ ++ bool inSubmit; /* In the I/0 phase or not. Use in nopoll model */ + uint32_t count; + uint32_t nbytes; + uint64_t offset; + uint8_t *md_buf; + uint32_t md_len; + uint32_t magic; +- /*Save the error code returned by the callback */ +- int32_t err; ++ int32_t err; /* Save the error code returned by the callback */ + int32_t reserved; + LIBSTORAGE_CALLBACK_FUNC cb; + void *cb_arg; +@@ -1627,7 +1626,7 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + uint64_t offset, uint64_t nbytes, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +-#ifdef SPDK_CONFIG_APP_RW ++#ifdef SPDK_CONFIG_APP_RW + /** + * Submit an unmap request to the block device. Unmap is sometimes also called trim or + * deallocate. This notifies the device that the data in the blocks described is no +@@ -1649,10 +1648,10 @@ spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel + void *unmap_d, uint16_t unmap_count, + spdk_bdev_io_completion_cb cb, void *cb_arg); + +-void* ++void * + spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); + +-void* ++void * + spdk_bdev_io_get_pool(size_t nbytes); + + bool +diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h +index dbd7d0c..a830e2f 100644 +--- a/include/spdk/bdev_module.h ++++ b/include/spdk/bdev_module.h +@@ -392,7 +392,7 @@ enum spdk_bdev_driver_ctx { + SPDK_BDEV_IO_STREAM_ID_1, + }; + +-enum spdk_bdev_io_e2e_pi_action{ ++enum spdk_bdev_io_e2e_pi_action { + IO_NO_PROTECTION = 0, + IO_HALF_WAY_PROTECTION = 1, + IO_E2E_PROTECTION = 2 +@@ -402,19 +402,18 @@ enum spdk_bdev_io_e2e_pi_action{ + #define FLAG_CALCRC 0x08//bit 3 : 1, libstorage calculate crc; 0, app calculate crc + #define FLAG_PRCHK 0x04//bit 2 : 1, enable ctrl guard crc check; 0, disable check + +-enum spdk_bdev_io_fua{ ++enum spdk_bdev_io_fua { + IO_FUA_NO = 0, + IO_FUA_YES = 1 + }; + + void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); + +-void spdk_bdev_fail_ctrlr(const char* traddr); ++void spdk_bdev_fail_ctrlr(const char *traddr); + + void *nvme_channel_get_group(void *io_ch); + +-enum reqLocation_E +-{ ++enum reqLocation_E { + LOCAL_RECEIVE_APP = 1, + LOCAL_LIBSTORAGE_SUBMIT = 2, + LOCAL_LIBSTORAGE_ASYNC_REQ = 3, +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index d3d3e10..b6a1f05 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -4262,11 +4262,11 @@ struct nvme_ctrlr_info { + uint16_t ssvid; /* Subsystem vendor id */ + uint16_t ctrlid; /* Controller id */ + uint16_t trtype; /* Transport type */ +- uint16_t support_ns :1; /* Supports the Namespace Management and Namespace Attachment commands */ +- uint16_t directives :1; /* Supports Directives */ +- uint16_t streams :1; /* Supports Streams Directives */ +- uint16_t dsm :1; /* Supports the controller supports the Dataset Management command */ +- uint16_t reserved :12; ++ uint16_t support_ns : 1; /* Supports the Namespace Management and Namespace Attachment commands */ ++ uint16_t directives : 1; /* Supports Directives */ ++ uint16_t streams : 1; /* Supports Streams Directives */ ++ uint16_t dsm : 1; /* Supports the controller supports the Dataset Management command */ ++ uint16_t reserved : 12; + uint16_t reserved2[3]; + }; + +@@ -4276,23 +4276,24 @@ struct spdk_bdev; + struct nvme_bdev; + struct spdk_nvme_ns; + struct spdk_nvme_qpair; +-int32_t nvme_ctrlr_get_info(const char* ctrlName, struct nvme_ctrlr_info** ppCtrlr); +-struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_name(const char* ctrlname); +-struct spdk_nvme_ctrlr* spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr); +-struct nvme_bdev_ctrlr* nvme_ctrlr_get_by_name(const char* ctrlname); +-void nvme_ctrlr_clear_iostat_by_name(const char* ctrlname); ++int32_t nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr); ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_name(const char *ctrlname); ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr); ++struct nvme_bdev_ctrlr *nvme_ctrlr_get_by_name(const char *ctrlname); ++void nvme_ctrlr_clear_iostat_by_name(const char *ctrlname); + void nvme_ctrlr_clear_iostat_all(void); +-struct nvme_bdev_ctrlr* bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev); +-struct spdk_nvme_ns* bdev_nvme_get_ns(struct nvme_bdev *nbdev); ++struct nvme_bdev_ctrlr *bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev); ++struct spdk_nvme_ns *bdev_nvme_get_ns(struct nvme_bdev *nbdev); + void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr); + int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_ctrlr, uint32_t nsid); + bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid); + void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid); + void spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown); + bool spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr); +-int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, struct spdk_nvme_health_information_page *health_payload); +-int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, struct spdk_nvme_error_information_entry *error_info); +-struct spdk_nvme_ctrlr_opts* spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr); ++int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++ struct spdk_nvme_health_information_page *health_payload); ++int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, ++ struct spdk_nvme_error_information_entry *error_info); + int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata); + bool spdk_nvme_ns_is_allocated(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid); + bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr); +@@ -4300,7 +4301,8 @@ bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); +-int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload); ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, ++ void *payload); + int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload); + int32_t spdk_nvme_ns_ret_streams_param(struct spdk_nvme_ns *ns, void *payload); +@@ -4459,9 +4461,10 @@ int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpa + * + * \sa spdk_nvme_ctrlr_cmd_self_test_operation() + */ +-int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, uint32_t stc, +- void *payload, uint32_t payload_size, +- spdk_nvme_cmd_cb cb_fn, void *cb_arg); ++int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++ uint32_t stc, ++ void *payload, uint32_t payload_size, ++ spdk_nvme_cmd_cb cb_fn, void *cb_arg); + + /** + *\get I/O queue pair id +diff --git a/include/spdk_internal/bdev_stat.h b/include/spdk_internal/bdev_stat.h +index f1ba1df..58a5102 100644 +--- a/include/spdk_internal/bdev_stat.h ++++ b/include/spdk_internal/bdev_stat.h +@@ -9,21 +9,18 @@ + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +-*/ ++ */ + + #ifndef LIBSTORAGE_STAT_H + #define LIBSTORAGE_STAT_H + +-#include +-#include +-#include +-#include ++#include "spdk/stdinc.h" + +-//share memory file name ++/* share memory file name */ + #define LIBSTORAGE_STAT_SHM_FILE_NAME "libstorage_stat.shm.\ + 49ce4ec241e017c65812b71b9832a50865f0b7d9b4d5f18d3d03283b" + +-//max number of channel+bdev ++/* max number of channel+bdev */ + #define STAT_MAX_NUM 8192 + + extern int32_t g_libstorage_iostat; +@@ -38,8 +35,7 @@ enum libstorage_iostat_status { + LIBSTORAGE_IOSTAT_QUERY = 2, + }; + +-struct libstorage_bdev_io_stat +-{ ++struct libstorage_bdev_io_stat { + bool used; + uint16_t channel_id; + char bdev_name[24]; +diff --git a/include/spdk_internal/debug.h b/include/spdk_internal/debug.h +index 5d6e623..cf9b9e7 100644 +--- a/include/spdk_internal/debug.h ++++ b/include/spdk_internal/debug.h +@@ -9,14 +9,13 @@ + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. +-*/ ++ */ + + #ifndef LIBSTORAGE_INTERNAL_DEBUG_H + #define LIBSTORAGE_INTERNAL_DEBUG_H + #include "spdk/stdinc.h" + +-struct spdk_debug_subsystem +-{ ++struct spdk_debug_subsystem { + const char *name; + void (*output)(FILE *file); + TAILQ_ENTRY(spdk_debug_subsystem) tailq; +diff --git a/lib/bdev/Makefile b/lib/bdev/Makefile +index 4305cfe..6d8dfd7 100644 +--- a/lib/bdev/Makefile ++++ b/lib/bdev/Makefile +@@ -11,6 +11,7 @@ SO_VER := 14 + SO_MINOR := 0 + + C_SRCS = bdev.c bdev_rpc.c bdev_zone.c part.c scsi_nvme.c ++C_SRCS-$(CONFIG_APP_RW) += bdev_self.c + C_SRCS-$(CONFIG_VTUNE) += vtune.c + LIBNAME = bdev + +diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c +index 8daa452..64355f9 100644 +--- a/lib/bdev/bdev.c ++++ b/lib/bdev/bdev.c +@@ -25,6 +25,13 @@ + #include "spdk/log.h" + #include "spdk/string.h" + ++#ifdef SPDK_CONFIG_APP_RW ++#include "spdk/stdinc.h" ++#include "spdk/barrier.h" ++#include ++#include "spdk_internal/bdev_stat.h" ++#endif ++ + #include "bdev_internal.h" + #include "spdk_internal/trace_defs.h" + #include "spdk_internal/assert.h" +@@ -3442,6 +3449,17 @@ _bdev_io_submit(void *ctx) + struct spdk_bdev *bdev = bdev_io->bdev; + struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch; + ++#ifdef SPDK_CONFIG_APP_RW ++ struct spdk_bdev_io_stat *stat = &bdev_ch->stat; ++ if (bdev_ch->io_outstanding > 0) { ++ stat->pre_ticks = stat->cur_ticks; ++ stat->cur_ticks = tsc; ++ stat->io_ticks += stat->cur_ticks - stat->pre_ticks; ++ } else { ++ stat->cur_ticks = tsc; ++ } ++#endif ++ + if (spdk_likely(bdev_ch->flags == 0)) { + bdev_io_do_submit(bdev_ch, bdev_io); + return; +@@ -3453,6 +3471,9 @@ _bdev_io_submit(void *ctx) + if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) && + bdev_abort_queued_io(&bdev_ch->qos_queued_io, bdev_io->u.abort.bio_to_abort)) { + _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_set_io_location(bdev_io->driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NOMEM); ++#endif + } else { + TAILQ_INSERT_TAIL(&bdev_ch->qos_queued_io, bdev_io, internal.link); + bdev_qos_io_submit(bdev_ch, bdev->internal.qos); +@@ -3895,6 +3916,7 @@ bdev_desc_free(struct spdk_bdev_desc *desc) + spdk_spin_destroy(&desc->spinlock); + free(desc->media_events_buffer); + free(desc); ++ desc = NULL; + } + + static void +@@ -4088,6 +4110,9 @@ bdev_channel_create(void *io_device, void *ctx_buf) + ch->flags = 0; + ch->shared_resource = shared_resource; + ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_init_iostat(ch, ch->bdev, ch->channel, &ch->stat); ++#endif + TAILQ_INIT(&ch->io_submitted); + TAILQ_INIT(&ch->io_locked); + TAILQ_INIT(&ch->io_accel_exec); +@@ -4520,6 +4545,10 @@ bdev_channel_destroy(void *io_device, void *ctx_buf) + spdk_histogram_data_free(ch->histogram); + } + ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_destroy_iostat(ch, ch->bdev, ch->channel); ++#endif ++ + bdev_channel_destroy_resource(ch); + } + +@@ -5235,6 +5264,26 @@ spdk_bdev_io_get_seek_offset(const struct spdk_bdev_io *bdev_io) + return bdev_io->u.bdev.seek.offset; + } + ++static void ++bdev_build_contig_io(uint8_t type, void *buf, void *md_buf, uint64_t offset_blocks, ++ uint64_t num_blocks, ++ struct libstorage_io *io, struct spdk_bdev_io *bdev_io) ++{ ++ bdev_io->type = type; ++ bdev_io->u.contig.buf = buf; ++ bdev_io->u.contig.md_buf = md_buf; ++ bdev_io->u.contig.offset_blocks = offset_blocks; ++ bdev_io->u.contig.num_blocks = num_blocks; ++ bdev_io->u.contig.nbytes = io->nbytes; ++ bdev_io->u.contig.md_len = io->md_len; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua; ++ if (type == SPDK_BDEV_IO_TYPE_WRITE_NVME) { ++ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_0] = io->streamId & 0xFF; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_1] = (io->streamId >> 8) & 0xFF; ++ } ++} ++ + static int + bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf, + void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, +@@ -5255,6 +5304,7 @@ bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch + + bdev_io->internal.ch = channel; + bdev_io->internal.desc = desc; ++#ifndef SPDK_CONFIG_APP_RW + bdev_io->type = SPDK_BDEV_IO_TYPE_READ; + bdev_io->u.bdev.iovs = &bdev_io->iov; + bdev_io->u.bdev.iovs[0].iov_base = buf; +@@ -5303,7 +5353,7 @@ spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channe + struct iovec iov = { + .iov_base = buf, + }; +- ++#ifndef SPDK_CONFIG_APP_RW + if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +@@ -5311,6 +5361,7 @@ spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channe + if (md_buf && !_is_buf_allocated(&iov)) { + return -EINVAL; + } ++#endif + + return bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); +@@ -5360,6 +5411,12 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; ++#else ++ struct libstorage_io *io = (struct libstorage_io *)cb_arg; ++ bdev_build_contig_io(SPDK_BDEV_IO_TYPE_READ_NVME, buf, md_buf, offset_blocks, num_blocks, ++ io, bdev_io); ++ cb_arg = &io->location; ++#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + bdev_io->internal.memory_domain = domain; + bdev_io->internal.memory_domain_ctx = domain_ctx; +@@ -5390,6 +5447,7 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) + { ++#ifndef SPDK_CONFIG_APP_RW + if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +@@ -5397,6 +5455,7 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + if (md_buf && !_is_buf_allocated(iov)) { + return -EINVAL; + } ++#endif + + return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, + num_blocks, NULL, NULL, NULL, cb, cb_arg); +@@ -5524,7 +5583,7 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + struct iovec iov = { + .iov_base = buf, + }; +- ++#ifndef SPDK_CONFIG_APP_RW + if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +@@ -5532,6 +5591,7 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann + if (md_buf && !_is_buf_allocated(&iov)) { + return -EINVAL; + } ++#endif + + return bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks, + cb, cb_arg); +@@ -5570,6 +5630,16 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel * + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; ++#ifdef SPDK_CONFIG_APP_RW ++ struct libstorage_io *io = (struct libstorage_io *)cb_arg; ++ bdev_io->type = SPDK_BDEV_IO_TYPE_READV_NVME; ++ bdev_io->u.bdev.nbytes = io->nbytes; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_0] = io->streamId & 0xFF; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_1] = (io->streamId >> 8) & 0xFF; ++ cb_arg = &io->location; ++#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + bdev_io->internal.memory_domain = domain; + bdev_io->internal.memory_domain_ctx = domain_ctx; +@@ -5616,6 +5686,7 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg) + { ++#ifndef SPDK_CONFIG_APP_RW + if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } +@@ -5623,6 +5694,7 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan + if (md_buf && !_is_buf_allocated(iov)) { + return -EINVAL; + } ++#endif + + return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, + num_blocks, NULL, NULL, NULL, cb, cb_arg); +@@ -5981,9 +6053,11 @@ spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch); + ++#ifndef SPDK_CONFIG_APP_RW + if (!desc->write) { + return -EBADF; + } ++#endif + + if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { + return -EINVAL; +@@ -6008,6 +6082,12 @@ spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io + bdev_io->u.bdev.md_buf = NULL; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; ++#else ++ LIBSTORAGE_IO_T *io = (struct libstorage_io *)cb_arg; ++ bdev_build_contig_io(SPDK_BDEV_IO_TYPE_WRITE_NVME, buf, md_buf, offset_blocks, num_blocks, ++ io, bdev_io); ++ cb_arg = &io->location; ++#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + bdev_io->u.bdev.memory_domain = NULL; + bdev_io->u.bdev.memory_domain_ctx = NULL; +@@ -6033,9 +6113,11 @@ spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch); + ++#ifndef SPDK_CONFIG_APP_RW + if (!desc->write) { + return -EBADF; + } ++#endif + + if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { + return -EINVAL; +@@ -7139,7 +7221,9 @@ _bdev_io_complete(void *ctx) + } + + assert(bdev_io->internal.cb != NULL); ++#ifndef SPDK_CONFIG_APP_RW + assert(spdk_get_thread() == spdk_bdev_io_get_thread(bdev_io)); ++#endif + + bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS, + bdev_io->internal.caller_ctx); +@@ -8321,9 +8405,9 @@ spdk_bdev_close(struct spdk_bdev_desc *desc) + + SPDK_DEBUGLOG(bdev, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name, + spdk_get_thread()); +- ++#ifndef SPDK_CONFIG_APP_RW + assert(desc->thread == spdk_get_thread()); +- ++#endif + spdk_poller_unregister(&desc->io_timeout_poller); + + spdk_spin_lock(&g_bdev_mgr.spinlock); +@@ -10306,6 +10390,50 @@ spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + return 0; + } + ++#ifdef SPDK_CONFIG_APP_RW ++void * ++spdk_bdev_io_get_pool(size_t nbytes) ++{ ++ struct spdk_mempool *pool = NULL; ++ ++ if (nbytes == 0 || nbytes > SPDK_BDEV_LARGE_BUF_MAX_SIZE + SPDK_BDEV_LARGE_BUF_WITH_MAX_MD) { ++ SPDK_ERRLOG("The size of buffer[%zu] is incorrect!\n", nbytes); ++ return NULL; ++ } ++ ++ if (nbytes <= SPDK_BDEV_SMALL_BUF_MAX_SIZE + SPDK_BDEV_SMALL_BUF_WITH_MAX_MD) { ++ pool = g_bdev_mgr.buf_small_pool; ++ } else { ++ pool = g_bdev_mgr.buf_large_pool; ++ } ++ ++ return pool; ++} ++ ++void * ++spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch) ++{ ++ struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(io_ch); ++ struct spdk_io_channel *under_io_ch = ch->channel; ++ void *nvme_io_ch = spdk_io_channel_get_ctx(under_io_ch); ++ ++ return nvme_channel_get_group(nvme_io_ch); ++} ++ ++bool ++spdk_bdev_have_io_in_channel(struct spdk_io_channel *io_ch) ++{ ++ struct spdk_bdev_channel *bdev_ch = NULL; ++ ++ if (io_ch != NULL) { ++ bdev_ch = spdk_io_channel_get_ctx(io_ch); ++ return bdev_ch->io_outstanding != 0; ++ } ++ ++ return false; ++} ++#endif ++ + SPDK_LOG_REGISTER_COMPONENT(bdev) + + SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV) +diff --git a/lib/bdev/bdev_internal.h b/lib/bdev/bdev_internal.h +index f485dfa..8d3dbe6 100644 +--- a/lib/bdev/bdev_internal.h ++++ b/lib/bdev/bdev_internal.h +@@ -31,4 +31,22 @@ typedef void (*bdev_reset_device_stat_cb)(struct spdk_bdev *bdev, void *cb_arg, + void bdev_reset_device_stat(struct spdk_bdev *bdev, enum spdk_bdev_reset_stat_mode mode, + bdev_reset_device_stat_cb cb, void *cb_arg); + ++#ifdef SPDK_CONFIG_APP_RW ++void ++spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch, ++ struct spdk_bdev_io_stat *stat); ++ ++void ++spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch); ++ ++void ++bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat); ++ ++void ++bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat, ++ struct spdk_io_channel *channel, uint64_t io_outstanding); ++#endif ++ + #endif /* SPDK_BDEV_INTERNAL_H */ +diff --git a/lib/bdev/bdev_self.c b/lib/bdev/bdev_self.c +new file mode 100644 +index 0000000..7050c30 +--- /dev/null ++++ b/lib/bdev/bdev_self.c +@@ -0,0 +1,217 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include "bdev_internal.h" ++ ++#include "spdk/stdinc.h" ++#include "spdk/bdev.h" ++#include "spdk/bdev_module.h" ++#include "spdk/env.h" ++#include "spdk/nvme_spec.h" ++#include "spdk/log.h" ++ ++#include ++#include "spdk_internal/bdev_stat.h" ++ ++pthread_mutex_t *g_io_stat_map_mutex = NULL; ++/* share memory for libstorage iostat */ ++struct libstorage_bdev_io_stat *g_io_stat_map; ++/* libstorage iostat enable or disable switch */ ++int32_t g_libstorage_iostat = 0; ++int32_t g_polltime_threshold = 0; ++ ++void ++spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch, ++ struct spdk_bdev_io_stat *stat) ++{ ++ int i = 0; ++ bool find = false; ++ uint16_t channel_id; ++ ++ if (bdev->fn_table->get_io_channel_id) { ++ channel_id = bdev->fn_table->get_io_channel_id(io_ch); ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ /* Reuse last record */ ++ if (g_io_stat_map[i].used && !strcmp(g_io_stat_map[i].bdev_name, bdev->name) ++ && g_io_stat_map[i].channel_id == channel_id) { ++ stat->io_stat_id = i; ++ find = true; ++ g_io_stat_map[i].num_read_ops = 0; ++ g_io_stat_map[i].num_write_ops = 0; ++ g_io_stat_map[i].bytes_read = 0; ++ g_io_stat_map[i].bytes_written = 0; ++ g_io_stat_map[i].io_outstanding = 0; ++ g_io_stat_map[i].read_latency_ticks = 0; ++ g_io_stat_map[i].write_latency_ticks = 0; ++ g_io_stat_map[i].io_ticks = 0; ++ g_io_stat_map[i].poll_time_used = false; ++ g_io_stat_map[i].num_poll_timeout = 0; ++ break; ++ } ++ } ++ if (!find) { ++ /* Add lock when multi thread or process */ ++ if (pthread_mutex_lock(g_io_stat_map_mutex) == EOWNERDEAD) { ++ if (pthread_mutex_consistent(g_io_stat_map_mutex)) { ++ SPDK_WARNLOG("[libstorage] the iostat_map process mutex is not normal any more.\n"); ++ } ++ } ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ /* Find unused record, allocate it to this channel */ ++ if (!g_io_stat_map[i].used) { ++ g_io_stat_map[i].used = true; ++ if (strncpy_s(g_io_stat_map[i].bdev_name, sizeof(g_io_stat_map[i].bdev_name), bdev->name, ++ sizeof(g_io_stat_map[i].bdev_name) - 1) != 0) { ++ SPDK_ERRLOG("[libstorage] string copy failed.\n"); ++ } ++ g_io_stat_map[i].channel_id = channel_id; ++ stat->io_stat_id = i; ++ find = true; ++ break; ++ } ++ } ++ pthread_mutex_unlock(g_io_stat_map_mutex); ++ } ++ if (!find) { ++ stat->io_stat_id = -1; ++ SPDK_ERRLOG("channel %u bdev %s allocate io stat memory failed.\n", channel_id, bdev->name); ++ } ++ } else { ++ /* It is not nvme disk, can use iostat. So do not do IO statistics in libstorage. */ ++ stat->io_stat_id = -1; ++ } ++ stat->start_tsc = spdk_get_ticks(); ++ stat->interval_tsc = spdk_get_ticks_hz() / 10; ++} ++ ++void ++spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch) ++{ ++ int i = 0; ++ uint16_t channel_id; ++ ++ if (bdev->fn_table->get_io_channel_id) { ++ channel_id = bdev->fn_table->get_io_channel_id(io_ch); ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ /* clear channel iostat info in share memory */ ++ if (g_io_stat_map[i].used && !strcmp(g_io_stat_map[i].bdev_name, bdev->name) ++ && g_io_stat_map[i].channel_id == channel_id) { ++ g_io_stat_map[i].channel_id = 0; ++ memset(g_io_stat_map[i].bdev_name, 0, sizeof(g_io_stat_map[i].bdev_name)); ++ g_io_stat_map[i].num_read_ops = 0; ++ g_io_stat_map[i].num_write_ops = 0; ++ g_io_stat_map[i].bytes_read = 0; ++ g_io_stat_map[i].bytes_written = 0; ++ g_io_stat_map[i].io_outstanding = 0; ++ g_io_stat_map[i].read_latency_ticks = 0; ++ g_io_stat_map[i].write_latency_ticks = 0; ++ g_io_stat_map[i].io_ticks = 0; ++ /* used flag set false in last avoid race in channel create */ ++ g_io_stat_map[i].used = false; ++ g_io_stat_map[i].poll_time_used = false; ++ g_io_stat_map[i].num_poll_timeout = 0; ++ } ++ } ++ } ++} ++ ++int ++spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, ++ void *unmap_d, uint16_t unmap_count, ++ spdk_bdev_io_completion_cb cb, void *cb_arg) ++{ ++ struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); ++ struct spdk_bdev_io *bdev_io = NULL; ++ struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); ++ ++ bdev_io = bdev_channel_get_io(channel); ++ if (bdev_io == NULL) { ++ return -ENOMEM; ++ } ++ ++ bdev_io->internal.ch = channel; ++ bdev_io->internal.desc = desc; ++ bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS; ++ bdev_io->u.contig.buf = unmap_d; ++ bdev_io->u.contig.num_blocks = unmap_count; ++ bdev_io_init(bdev_io, bdev, cb_arg, cb); ++ ++ bdev_io_submit(bdev_io); ++ return 0; ++} ++ ++void ++bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat) ++{ ++ switch (bdev_io->type) { ++ case SPDK_BDEV_IO_TYPE_READ_NVME: ++ stat->bytes_read += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len; ++ stat->num_read_ops++; ++ stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ break; ++ case SPDK_BDEV_IO_TYPE_WRITE_NVME: ++ stat->bytes_written += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len; ++ stat->num_write_ops++; ++ stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ break; ++ case SPDK_BDEV_IO_TYPE_READV_NVME: ++ stat->bytes_read += bdev_io->u.bdev.nbytes; ++ stat->num_read_ops++; ++ stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ break; ++ case SPDK_BDEV_IO_TYPE_WRITEV_NVME: ++ stat->bytes_written += bdev_io->u.bdev.nbytes; ++ stat->num_write_ops++; ++ stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ break; ++ default: ++ break; ++ } ++} ++ ++void ++bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat, ++ struct spdk_io_channel *channel, uint64_t io_outstanding) ++{ ++ uint64_t num_poll_timeout; ++ ++ stat->pre_ticks = stat->cur_ticks; ++ stat->cur_ticks = tsc; ++ stat->io_ticks += stat->cur_ticks - stat->pre_ticks; ++ ++ if (g_libstorage_iostat) { ++ if ((stat->io_stat_id >= 0) && (stat->io_stat_id < STAT_MAX_NUM)) { ++ g_io_stat_map[stat->io_stat_id].io_outstanding = io_outstanding; ++ if (tsc > (stat->start_tsc + stat->interval_tsc)) { ++ g_io_stat_map[stat->io_stat_id].num_read_ops = stat->num_read_ops; ++ g_io_stat_map[stat->io_stat_id].num_write_ops = stat->num_write_ops; ++ g_io_stat_map[stat->io_stat_id].bytes_read = stat->bytes_read; ++ g_io_stat_map[stat->io_stat_id].bytes_written = stat->bytes_written; ++ g_io_stat_map[stat->io_stat_id].read_latency_ticks = stat->read_latency_ticks; ++ g_io_stat_map[stat->io_stat_id].write_latency_ticks = stat->write_latency_ticks; ++ g_io_stat_map[stat->io_stat_id].io_ticks = stat->io_ticks; ++ ++ stat->start_tsc = tsc; ++ ++ if (g_polltime_threshold) { ++ num_poll_timeout = bdev_io->bdev->fn_table->get_timeout_count ? \ ++ bdev_io->bdev->fn_table->get_timeout_count(channel) : 0; ++ g_io_stat_map[stat->io_stat_id].poll_time_used = true; ++ g_io_stat_map[stat->io_stat_id].num_poll_timeout = num_poll_timeout; ++ } ++ } ++ } ++ } ++} +-- +2.43.0 + diff --git a/0005-lib-env_dpdk-Add-config-args-for-HSAK.patch b/0005-lib-env_dpdk-Add-config-args-for-HSAK.patch new file mode 100644 index 0000000..41337f4 --- /dev/null +++ b/0005-lib-env_dpdk-Add-config-args-for-HSAK.patch @@ -0,0 +1,158 @@ +From 4a2d5f1dbcb8ec9cd50624338c97772e5039a0c3 Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Mon, 22 Feb 2021 19:58:17 +0800 +Subject: [PATCH 05/12] lib/env_dpdk: Add config args for HSAK + +Signed-off-by: sunshihao +--- + lib/env_dpdk/init.c | 7 +++++++ + lib/event/reactor.c | 29 +++++++++++++++++++++++++++-- + lib/jsonrpc/jsonrpc_internal.h | 2 +- + lib/jsonrpc/jsonrpc_server_tcp.c | 4 ++-- + 4 files changed, 37 insertions(+), 5 deletions(-) + +diff --git a/lib/env_dpdk/init.c b/lib/env_dpdk/init.c +index ea868c0..e94486c 100644 +--- a/lib/env_dpdk/init.c ++++ b/lib/env_dpdk/init.c +@@ -439,6 +439,13 @@ build_eal_cmdline(const struct spdk_env_opts *opts) + + #ifdef __linux__ + ++#ifdef SPDK_CONFIG_APP_RW ++ /* set IOVA use phys addr and keep same with DPDK16.11 */ ++ args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=pa")); ++ if (args == NULL) { ++ return -1; ++ } ++#endif + if (opts->iova_mode) { + /* iova-mode=pa is incompatible with no_huge */ + args = push_arg(args, &argcount, _sprintf_alloc("--iova-mode=%s", opts->iova_mode)); +diff --git a/lib/event/reactor.c b/lib/event/reactor.c +index 1506feb..3cb2e12 100644 +--- a/lib/event/reactor.c ++++ b/lib/event/reactor.c +@@ -16,6 +16,8 @@ + #include "spdk/scheduler.h" + #include "spdk/string.h" + #include "spdk/fd_group.h" ++#include "spdk_internal/thread.h" ++#include "spdk/conf.h" + + #ifdef __linux__ + #include +@@ -28,6 +30,10 @@ + + #define SPDK_EVENT_BATCH_SIZE 8 + ++#ifdef SPDK_CONFIG_APP_RW ++struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE]; ++#endif ++ + static struct spdk_reactor *g_reactors; + static uint32_t g_reactor_count; + static struct spdk_cpuset g_reactor_core_mask; +@@ -36,6 +42,7 @@ static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZE + static bool g_framework_context_switch_monitor_enabled = true; + + static struct spdk_mempool *g_spdk_event_mempool = NULL; ++static int16_t g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE; + + TAILQ_HEAD(, spdk_scheduler) g_scheduler_list + = TAILQ_HEAD_INITIALIZER(g_scheduler_list); +@@ -213,6 +220,20 @@ spdk_reactors_init(size_t msg_mempool_size) + uint32_t i, current_core; + char mempool_name[32]; + ++#ifdef SPDK_CONFIG_APP_RW ++ struct spdk_conf_section *sp; ++ sp = spdk_conf_find_section(NULL, "Reactor"); ++ if (sp != 0) { ++ g_reactor_batch_size = spdk_conf_section_get_intval(sp, "BatchSize"); ++ if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_BATCH_SIZE) { ++ g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE; ++ } ++ syslog(LOG_INFO,"BatchSize is set to %d\n", g_reactor_batch_size); ++ } else { ++ SPDK_ERRLOG("config file does not contain [Reactor] section, which need to be provided\n"); ++ } ++#endif ++ + snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid()); + g_spdk_event_mempool = spdk_mempool_create(mempool_name, + 262144 - 1, /* Power of 2 minus 1 is optimal for memory consumption */ +@@ -556,7 +577,7 @@ event_queue_run_batch(void *arg) + return -errno; + } + +- count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE); ++ count = spdk_ring_dequeue(reactor->events, events, g_reactor_batch_size); + + if (spdk_ring_count(reactor->events) != 0) { + /* Trigger new notification if there are still events in event-queue waiting for processing. */ +@@ -567,7 +588,7 @@ event_queue_run_batch(void *arg) + } + } + } else { +- count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE); ++ count = spdk_ring_dequeue(reactor->events, events, g_reactor_batch_size); + } + + if (count == 0) { +@@ -969,6 +990,9 @@ reactor_run(void *arg) + } + + if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) { ++#ifdef SPDK_CONFIG_APP_RW ++ lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_EXITED; ++#endif + break; + } + } +@@ -1070,6 +1094,7 @@ spdk_reactors_start(void) + spdk_env_thread_wait_all(); + + g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN; ++#endif + } + + static void +diff --git a/lib/jsonrpc/jsonrpc_internal.h b/lib/jsonrpc/jsonrpc_internal.h +index d13ec7a..d1fe15e 100644 +--- a/lib/jsonrpc/jsonrpc_internal.h ++++ b/lib/jsonrpc/jsonrpc_internal.h +@@ -12,7 +12,7 @@ + + #include "spdk/log.h" + +-#define SPDK_JSONRPC_RECV_BUF_SIZE (32 * 1024) ++#define SPDK_JSONRPC_RECV_BUF_SIZE (4 * 1024 * 1024) + #define SPDK_JSONRPC_SEND_BUF_SIZE_INIT (32 * 1024) + #define SPDK_JSONRPC_SEND_BUF_SIZE_MAX (32 * 1024 * 1024) + #define SPDK_JSONRPC_ID_MAX_LEN 128 +diff --git a/lib/jsonrpc/jsonrpc_server_tcp.c b/lib/jsonrpc/jsonrpc_server_tcp.c +index ceea4f6..6abfd43 100644 +--- a/lib/jsonrpc/jsonrpc_server_tcp.c ++++ b/lib/jsonrpc/jsonrpc_server_tcp.c +@@ -298,7 +298,7 @@ jsonrpc_server_conn_recv(struct spdk_jsonrpc_server_conn *conn) + } + + offset += rc; +- } while (rc > 0); ++ } while (rc > 1000); + + if (offset > 0) { + /* +@@ -367,7 +367,7 @@ more: + return 0; + } + +- SPDK_DEBUGLOG(rpc, "send() failed: %s\n", spdk_strerror(errno)); ++ SPDK_ERRLOG("send() failed: %s\n", spdk_strerror(errno)); + return -1; + } + +-- +2.43.0 + diff --git a/0006-lib-nvme-Add-nvme-support-for-HSAK.patch b/0006-lib-nvme-Add-nvme-support-for-HSAK.patch new file mode 100644 index 0000000..0074c38 --- /dev/null +++ b/0006-lib-nvme-Add-nvme-support-for-HSAK.patch @@ -0,0 +1,1317 @@ +From e58cde165cd877801c4332757399d249c6500d40 Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Thu, 25 Feb 2021 16:15:02 +0800 +Subject: [PATCH 06/12] lib/nvme: Add nvme support for HSAK + +Signed-off-by: sunshihao +--- + lib/event/reactor.c | 2 +- + lib/nvme/Makefile | 1 + + lib/nvme/nvme.c | 124 ++++++++++++++++- + lib/nvme/nvme_ctrlr.c | 31 +++++ + lib/nvme/nvme_ctrlr_cmd.c | 14 ++ + lib/nvme/nvme_ctrlr_self.c | 239 ++++++++++++++++++++++++++++++++ + lib/nvme/nvme_internal.h | 14 +- + lib/nvme/nvme_ns.c | 5 + + lib/nvme/nvme_ns_cmd.c | 140 ++++++++++++++++++- + lib/nvme/nvme_ns_self.c | 93 +++++++++++++ + lib/nvme/nvme_pcie.c | 3 + + lib/nvme/nvme_pcie_common.c | 12 ++ + lib/nvme/nvme_qpair.c | 9 +- + lib/nvme/nvme_rebind.c | 262 ++++++++++++++++++++++++++++++++++++ + 14 files changed, 939 insertions(+), 10 deletions(-) + create mode 100644 lib/nvme/nvme_ctrlr_self.c + create mode 100644 lib/nvme/nvme_ns_self.c + create mode 100644 lib/nvme/nvme_rebind.c + +diff --git a/lib/event/reactor.c b/lib/event/reactor.c +index 3cb2e12..6f604e1 100644 +--- a/lib/event/reactor.c ++++ b/lib/event/reactor.c +@@ -228,7 +228,7 @@ spdk_reactors_init(size_t msg_mempool_size) + if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_BATCH_SIZE) { + g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE; + } +- syslog(LOG_INFO,"BatchSize is set to %d\n", g_reactor_batch_size); ++ syslog(LOG_INFO, "BatchSize is set to %d\n", g_reactor_batch_size); + } else { + SPDK_ERRLOG("config file does not contain [Reactor] section, which need to be provided\n"); + } +diff --git a/lib/nvme/Makefile b/lib/nvme/Makefile +index d9ba07b..c50672a 100644 +--- a/lib/nvme/Makefile ++++ b/lib/nvme/Makefile +@@ -19,6 +19,7 @@ ifeq ($(OS),Linux) + C_SRCS += nvme_vfio_user.c + endif + C_SRCS-$(CONFIG_RDMA) += nvme_rdma.c ++C_SRCS-$(CONFIG_APP_RW) += nvme_rebind.c nvme_ctrlr_self.c nvme_ns_self.c + + LIBNAME = nvme + LOCAL_SYS_LIBS = +diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c +index 0254be4..28a48c3 100644 +--- a/lib/nvme/nvme.c ++++ b/lib/nvme/nvme.c +@@ -9,6 +9,9 @@ + #include "spdk/env.h" + #include "nvme_internal.h" + #include "nvme_io_msg.h" ++#include "spdk/nvme.h" ++#include "spdk_internal/debug.h" ++#include "spdk/bdev_module.h" + + #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver" + +@@ -29,6 +32,91 @@ nvme_ctrlr_shared(const struct spdk_nvme_ctrlr *ctrlr) + return ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE; + } + ++#ifdef SPDK_CONFIG_APP_RW ++static pthread_t g_admin_timer_thread; ++ ++bool nvme_ctrlr_is_exist(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ union spdk_nvme_cc_register cc; ++ ++ if (NULL == ctrlr) { ++ SPDK_ERRLOG("invalid paramter\n"); ++ return false; ++ } ++ ++ if (nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw), ++ &cc.raw) != 0) { ++ return false; ++ } ++ ++ return true; ++} ++ ++static void admin_timer_timeout(void) ++{ ++ struct spdk_nvme_ctrlr *ctrlr = NULL; ++ ++ nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); ++ ++ TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq) { ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ if (ctrlr->is_resetting) { ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++ continue; ++ } ++ spdk_nvme_ctrlr_process_admin_completions(ctrlr); ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++ } ++ ++ nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); ++} ++ ++static void *nvme_ctrlr_run_admin_timer(void *arg) ++{ ++ sleep(20); ++ ++ while (1) { ++ admin_timer_timeout(); ++ usleep(10000); ++ } ++ ++ return ((void *)0); ++} ++ ++static int ++nvme_ctrlr_start_admin_timer(void) ++{ ++ if (g_admin_timer_thread == 0) { ++ if (pthread_create(&g_admin_timer_thread, NULL, nvme_ctrlr_run_admin_timer, NULL) != 0) { ++ SPDK_ERRLOG("Failed to create admin timer thread.\n"); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ ++int ++spdk_nvme_detach_ublock(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); ++ ++ nvme_ctrlr_proc_put_ref(ctrlr); ++ ++ if (nvme_ctrlr_get_ref_count(ctrlr) == 0) { ++ if (nvme_ctrlr_shared(ctrlr)) { ++ TAILQ_REMOVE(&g_spdk_nvme_driver->shared_attached_ctrlrs, ctrlr, tailq); ++ } else { ++ TAILQ_REMOVE(&g_nvme_attached_ctrlrs, ctrlr, tailq); ++ } ++ nvme_ctrlr_destruct_ublock(ctrlr); ++ } ++ ++ nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); ++ return 0; ++} ++#endif ++ + void + nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx, + struct spdk_nvme_ctrlr *ctrlr) +@@ -214,13 +302,14 @@ void + nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl) + { + struct nvme_completion_poll_status *status = arg; +- ++#ifndef SPDK_CONFIG_APP_RW + if (status->timed_out) { + /* There is no routine waiting for the completion of this request, free allocated memory */ + spdk_free(status->dma_data); + free(status); + return; + } ++#endif + + /* + * Copy status into the argument passed by the caller, so that +@@ -484,7 +573,11 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + + assert(active_proc->timeout_cb_fn != NULL); + ++#ifndef SPDK_CONFIG_APP_RW + if (req->timed_out || req->submit_tick == 0) { ++#else ++ if (req->submit_tick == 0) { ++#endif + return 0; + } + +@@ -501,6 +594,7 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + return 1; + } + ++#ifndef SPDK_CONFIG_APP_RW + req->timed_out = true; + + /* +@@ -511,6 +605,28 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, + nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, + cid); ++#else ++ if (!nvme_qpair_is_admin_queue(qpair) && (req->cmd.opc == SPDK_NVME_OPC_WRITE || ++ req->cmd.opc == SPDK_NVME_OPC_READ)) { ++ SPDK_WARNLOG("IO timeout, OP[%u] NS[%u] LBA[%lu].\n", req->cmd.opc, req->cmd.nsid, ++ *(uint64_t *)&req->cmd.cdw10); ++ } else { ++ SPDK_WARNLOG("%s Command[%u] timeout.\n", nvme_qpair_is_admin_queue(qpair) ? ++ "Admin" : "IO", req->cmd.opc); ++ } ++ if (req->timed_out) { ++ /* Reset the controller if the command was already timed out. */ ++ SPDK_WARNLOG("IO Command[%u] timeout again, reset controller.\n", cid); ++ active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, NULL, cid); ++ } else { ++ req->timed_out = true; ++ active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, ++ nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, ++ cid); ++ /* Timing again. Reset the controller if it times out again */ ++ req->submit_tick = spdk_get_ticks(); ++ } ++#endif + return 0; + } + +@@ -814,6 +930,12 @@ nvme_probe_internal(struct spdk_nvme_probe_ctx *probe_ctx, + return -1; + } + ++#ifdef SPDK_CONFIG_APP_RW ++ if (nvme_ctrlr_start_admin_timer() != 0) { ++ return -1; ++ } ++#endif ++ + nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); + + rc = nvme_transport_ctrlr_scan(probe_ctx, direct_connect); +diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c +index 0fa6808..4143c15 100644 +--- a/lib/nvme/nvme_ctrlr.c ++++ b/lib/nvme/nvme_ctrlr.c +@@ -12,6 +12,9 @@ + #include "spdk/env.h" + #include "spdk/string.h" + #include "spdk/endian.h" ++#ifdef SPDK_CONFIG_APP_RW ++#include "spdk_internal/debug.h" ++#endif + + struct nvme_active_ns_ctx; + +@@ -3638,8 +3641,13 @@ nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nv + * Not using sleep() to avoid blocking other controller's initialization. + */ + if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) { ++#ifdef SPDK_CONFIG_APP_RW ++ SPDK_DEBUGLOG(nvme, "Applying quirk: delay 2 seconds before reading registers\n"); ++ ctrlr->sleep_timeout_tsc = spdk_get_ticks() + 2 * spdk_get_ticks_hz(); ++#else + NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n"); + ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000); ++#endif + } + + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, +@@ -4109,11 +4117,15 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr) + { + int rc; + ++#ifdef SPDK_CONFIG_APP_RW ++ nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); ++#else + if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE); + } else { + nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE); + } ++#endif + + if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) { + NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n", +@@ -5331,3 +5343,22 @@ spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr, + { + return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size); + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++void ++nvme_ctrlr_destruct_ublock(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ struct spdk_nvme_qpair *qpair = NULL; ++ struct spdk_nvme_qpair *tmp = NULL; ++ ++ SPDK_DEBUGLOG(nvme, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr); ++ TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { ++ spdk_nvme_ctrlr_free_io_qpair(qpair); ++ } ++ ++ nvme_ctrlr_free_doorbell_buffer(ctrlr); ++ nvme_ctrlr_destruct_namespaces(ctrlr); ++ spdk_bit_array_free(&ctrlr->free_io_qids); ++ nvme_transport_ctrlr_destruct(ctrlr); ++} ++#endif +diff --git a/lib/nvme/nvme_ctrlr_cmd.c b/lib/nvme/nvme_ctrlr_cmd.c +index bcc92b2..416f099 100644 +--- a/lib/nvme/nvme_ctrlr_cmd.c ++++ b/lib/nvme/nvme_ctrlr_cmd.c +@@ -610,6 +610,20 @@ nvme_ctrlr_retry_queued_abort(struct spdk_nvme_ctrlr *ctrlr) + break; + } + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ if (rc < 0) { ++ /* If abort fail, free all of the queued abort requests */ ++ STAILQ_FOREACH_SAFE(next, &ctrlr->queued_aborts, stailq, tmp) { ++ STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); ++ nvme_free_request(next); ++ ctrlr->outstanding_aborts--; ++ } ++ } ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++#endif ++ + } + + static int +diff --git a/lib/nvme/nvme_ctrlr_self.c b/lib/nvme/nvme_ctrlr_self.c +new file mode 100644 +index 0000000..d3937d9 +--- /dev/null ++++ b/lib/nvme/nvme_ctrlr_self.c +@@ -0,0 +1,239 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include "spdk/stdinc.h" ++#include "nvme_internal.h" ++ ++void ++spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown) ++{ ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ ctrlr-> is_destructed= is_shutdown; ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++} ++ ++bool ++spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ if(NULL == ctrlr) { ++ SPDK_ERRLOG("spdk_nvme_ctrlr_is_smart_per_namespace_supported: Invalid Parameters!\n"); ++ return false; ++ } ++ ++ /* check Bit 0 of Log Page Attributes(LPA), ++ to find out whether the controller supports namespace basis or not. */ ++ ++ if(0 == ctrlr->cdata.lpa.ns_smart) { ++ SPDK_NOTICELOG("This controller does not support the SMART information on a per namespace basis.\n"); ++ return false; ++ } ++ ++ return true; ++} ++ ++static int nvme_get_log_info(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page, uint32_t nsid, ++ void *payload, uint32_t payload_size) ++{ ++ struct nvme_completion_poll_status status = {0x0}; ++ int ret; ++ ++ status.done = false; ++ ret = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, log_page, nsid, payload, payload_size, 0, ++ nvme_completion_poll_cb, &status); ++ if (ret) { ++ return ret; ++ } ++ ++ while (status.done == false) { ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++ } ++ if (spdk_nvme_cpl_is_error(&status.cpl)) { ++ SPDK_ERRLOG("spdk_nvme_ctrlr_get_smart_info failed! sc[0x%x], sct[0x%x]\n", ++ status.cpl.status.sc, status.cpl.status.sct); ++ return -ENXIO; ++ } ++ return 0; ++} ++ ++int ++spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++ struct spdk_nvme_health_information_page *smart_info) ++{ ++ struct spdk_nvme_ns *ns = NULL; ++ ++ if(NULL == ctrlr || NULL == smart_info) { ++ SPDK_ERRLOG("Invalid parameters!\n"); ++ return -EINVAL; ++ } ++ ++ /* if controller does not support namespase basis, then set the nsid to 0xFFFFFFFF, and continue the process. ++ and if nsid is 0, set the nsid to 0xFFFFFFFF too. */ ++ if(!spdk_nvme_ctrlr_is_smart_per_namespace_supported(ctrlr) || 0 == nsid) { ++ nsid = SPDK_NVME_GLOBAL_NS_TAG; ++ } ++ ++ /* nsid should be 0xffffffff or on a per namespace basis. */ ++ if(nsid != SPDK_NVME_GLOBAL_NS_TAG) { ++ ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); ++ if (NULL == ns) { ++ SPDK_ERRLOG("Invalid NS %u\n", nsid); ++ return -EINVAL; ++ } ++ ++ /* if the namespace specified is not active, set the nsid to 0xFFFFFFFF, and continue the process. */ ++ if (!spdk_nvme_ns_is_active(ns)) { ++ SPDK_WARNLOG("NS %u is not active\n", nsid); ++ nsid = SPDK_NVME_GLOBAL_NS_TAG; ++ } ++ } ++ ++ return nvme_get_log_info(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, nsid, smart_info, ++ sizeof(struct spdk_nvme_health_information_page)); ++} ++ ++int ++spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, ++ struct spdk_nvme_error_information_entry *error_info) ++{ ++ const struct spdk_nvme_ctrlr_data *cdata = NULL; ++ uint32_t nsid = SPDK_NVME_GLOBAL_NS_TAG; ++ int ret; ++ ++ if(NULL == ctrlr || NULL == error_info) { ++ SPDK_ERRLOG("Invalid parameters!\n"); ++ return -EINVAL; ++ } ++ ++ /* get cdata for judging the err_entries is bigger than the maximum number of entries supported or not. */ ++ cdata = spdk_nvme_ctrlr_get_data(ctrlr); ++ if (err_entries > (cdata->elpe + 1u)) { ++ /* if the parameter is bigger, then cut it into the maximum number supported. */ ++ SPDK_WARNLOG("Cannot get %d error log entries, the controller only support %d errors.\n", ++ err_entries, cdata->elpe + 1); ++ err_entries = cdata->elpe + 1; ++ } ++ ++ ret = nvme_get_log_info(ctrlr, SPDK_NVME_LOG_ERROR, nsid, error_info, ++ sizeof(struct spdk_nvme_error_information_entry) * err_entries); ++ if (ret) { ++ return ret; ++ } ++ /* return actual count of error log pages info. */ ++ return err_entries; ++} ++ ++struct spdk_nvme_ctrlr_opts * ++spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return &ctrlr->opts; ++} ++ ++bool ++spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return ctrlr->cdata.oacs.ns_manage != 0; ++} ++ ++bool ++spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return ctrlr->cdata.oacs.format != 0; ++} ++ ++bool ++spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return ctrlr->cdata.fna.format_all_ns != 0; ++} ++ ++bool ++spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ return ctrlr->cdata.oacs.directives != 0; ++} ++ ++void ++spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ int rc; ++ struct nvme_completion_poll_status status; ++ struct spdk_nvme_ctrlr_data cdata; ++ if (ctrlr == NULL) { ++ SPDK_ERRLOG("Parameter error\n"); ++ return; ++ } ++ ++ status.done = false; ++ rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0, &cdata, ++ sizeof(cdata), nvme_completion_poll_cb, &status); ++ if (rc != 0) { ++ return; ++ } ++ ++ if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { ++ SPDK_ERRLOG("Failed to identify ctrlr data, cannot update unvmcap, sct[%x], sc[%x]\n", ++ status.cpl.status.sct, status.cpl.status.sc); ++ return; ++ } ++ ++ ctrlr->cdata.unvmcap[0] = cdata.unvmcap[0]; ++ ctrlr->cdata.unvmcap[1] = cdata.unvmcap[1]; ++} ++ ++int32_t ++spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload) ++{ ++ struct nvme_completion_poll_status status; ++ int32_t res; ++ ++ if (ctrlr == NULL || payload == NULL) { ++ SPDK_ERRLOG("Parameter error\n"); ++ return -EINVAL; ++ } ++ ++ if (!spdk_nvme_ctrlr_is_directive_supported(ctrlr)) { ++ SPDK_WARNLOG("The controller[%s] does not support Directives.\n", ctrlr->trid.traddr); ++ return -EPERM; ++ } ++ ++ status.done = false; ++ res = nvme_ctrlr_cmd_directive_receive(ctrlr, nsid, SPDK_NVME_ID_RECV_OP_RET_PARA, ++ SPDK_NVME_DIR_TYPE_IDENTIFY, 0, payload, ++ sizeof(struct spdk_nvme_identify_recv_ret_para), ++ 0, nvme_completion_poll_cb, &status); ++ if (res != 0) { ++ return res; ++ } ++ ++ while (status.done == false) { ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ spdk_nvme_qpair_process_completions(ctrlr->adminq, 0); ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++ } ++ ++ if (spdk_nvme_cpl_is_error(&status.cpl)) { ++ SPDK_ERRLOG("Failed to Identify directive! sc[0x%x], sct[0x%x]\n", ++ status.cpl.status.sc, status.cpl.status.sct); ++ return -ENXIO; ++ } ++ ++ return 0; ++} ++ ++uint16_t ++spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair) ++{ ++ return qpair->id; ++} +diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h +index e57af4e..a18fb78 100644 +--- a/lib/nvme/nvme_internal.h ++++ b/lib/nvme/nvme_internal.h +@@ -166,7 +166,7 @@ extern struct spdk_nvme_transport_opts g_spdk_nvme_transport_opts; + #define DEFAULT_IO_QUEUE_SIZE (256) + #define DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK (1024) /* Matches Linux kernel driver */ + +-#define DEFAULT_IO_QUEUE_REQUESTS (512) ++#define DEFAULT_IO_QUEUE_REQUESTS (2048) + + #define SPDK_NVME_DEFAULT_RETRY_COUNT (4) + +@@ -203,6 +203,10 @@ enum nvme_payload_type { + + /** nvme_request::u.sgl is valid for this request */ + NVME_PAYLOAD_TYPE_SGL, ++#ifdef SPDK_CONFIG_APP_RW ++ /** nvme_request::sgl is not extended LBA align */ ++ NVME_PAYLOAD_TYPE_SGL_PRP, ++#endif + }; + + /** Boot partition write states */ +@@ -238,6 +242,9 @@ struct nvme_payload { + + /** Virtual memory address of a single virtually contiguous metadata buffer */ + void *md; ++#ifdef SPDK_CONFIG_APP_RW ++ enum nvme_payload_type payload_type; ++#endif + }; + + #define NVME_PAYLOAD_CONTIG(contig_, md_) \ +@@ -1280,6 +1287,11 @@ int nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, + req->accel_sequence = NULL; \ + } while (0); + ++#ifdef SPDK_CONFIG_APP_RW ++void nvme_ctrlr_destruct_ublock(struct spdk_nvme_ctrlr *ctrlr); ++void nvme_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); ++#endif ++ + static inline struct nvme_request * + nvme_allocate_request(struct spdk_nvme_qpair *qpair, + const struct nvme_payload *payload, uint32_t payload_size, uint32_t md_size, +diff --git a/lib/nvme/nvme_ns.c b/lib/nvme/nvme_ns.c +index db9e5dc..a714773 100644 +--- a/lib/nvme/nvme_ns.c ++++ b/lib/nvme/nvme_ns.c +@@ -86,6 +86,11 @@ nvme_ns_set_identify_data(struct spdk_nvme_ns *ns) + ns->flags |= SPDK_NVME_NS_DPS_PI_SUPPORTED; + ns->pi_type = nsdata->dps.pit; + } ++#ifdef SPDK_CONFIG_APP_RW ++ if (nsdata->dps.md_start) { ++ ns->flags |= SPDK_NVME_NS_DPS_PI_MDSTART; ++ } ++#endif + } + + static int +diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c +index bbafe8b..519bd87 100644 +--- a/lib/nvme/nvme_ns_cmd.c ++++ b/lib/nvme/nvme_ns_cmd.c +@@ -10,7 +10,7 @@ + + static inline struct nvme_request *_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, ++ struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, + void *cb_arg, uint32_t opc, uint32_t io_flags, + uint16_t apptag_mask, uint16_t apptag, uint32_t cdw13, bool check_sgl, +@@ -74,7 +74,7 @@ _nvme_get_sectors_per_max_io(struct spdk_nvme_ns *ns, uint32_t io_flags) + + static struct nvme_request * + _nvme_add_child_request(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, ++ struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, uint32_t cdw13, +@@ -97,7 +97,7 @@ _nvme_add_child_request(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + static struct nvme_request * + _nvme_ns_cmd_split_request(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, ++ struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, +@@ -196,7 +196,7 @@ _nvme_ns_cmd_setup_request(struct spdk_nvme_ns *ns, struct nvme_request *req, + static struct nvme_request * + _nvme_ns_cmd_split_request_prp(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, ++ struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, +@@ -323,10 +323,78 @@ _nvme_ns_cmd_split_request_prp(struct spdk_nvme_ns *ns, + return req; + } + ++#ifdef SPDK_CONFIG_APP_RW ++#define NVME_MAX_SGL_PRP_DESCRIPTORS (2048) ++ ++static int ++_nvme_ns_check_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_payload *payload, ++ uint32_t payload_offset, uint32_t extended_lba_size, struct nvme_request *req) ++{ ++ void *virt_addr = NULL; ++ uint64_t phys_addr; ++ uint32_t remaining_transfer_len, remaining_user_sge_len, length; ++ uint32_t nseg = 0; ++ uint32_t nseg_idx; ++ ++ struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_PRP_DESCRIPTORS]; ++ ++ /* ++ * check scattered payloads whether extended_lba_size align. ++ */ ++ req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, payload_offset); ++ ++ remaining_transfer_len = req->payload_size; ++ ++ while (remaining_transfer_len > 0) { ++ req->payload.next_sge_fn(req->payload.contig_or_cb_arg, ++ &virt_addr, &remaining_user_sge_len); ++ ++ remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len); ++ remaining_transfer_len -= remaining_user_sge_len; ++ while (remaining_user_sge_len > 0) { ++ if (nseg >= NVME_MAX_SGL_PRP_DESCRIPTORS) { ++ SPDK_ERRLOG("Seg number of SGL should not greater than %d\n", NVME_MAX_SGL_PRP_DESCRIPTORS); ++ return -1; ++ } ++ ++ phys_addr = spdk_vtophys(virt_addr, NULL); ++ if (phys_addr == SPDK_VTOPHYS_ERROR) { ++ SPDK_ERRLOG("Cannot translate SGL data addr 0x%lx to physical addr.\n", (uint64_t)virt_addr); ++ return -1; ++ } ++ ++ length = spdk_min(remaining_user_sge_len, 0x200000 - _2MB_OFFSET(virt_addr)); ++ remaining_user_sge_len -= length; ++ virt_addr += length; ++ ++ if (nseg > 0 && phys_addr == ++ sgl[nseg - 1].address + sgl[nseg - 1].unkeyed.length) { ++ /* extend previous entry */ ++ sgl[nseg - 1].unkeyed.length += length; ++ continue; ++ } ++ ++ sgl[nseg].unkeyed.length = length; ++ sgl[nseg].address = phys_addr; ++ ++ nseg++; ++ } ++ } ++ ++ for (nseg_idx = 0; nseg_idx < nseg; nseg_idx++) { ++ if (sgl[nseg_idx].unkeyed.length % extended_lba_size) { ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++#endif ++ + static struct nvme_request * + _nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns, + struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, ++ struct nvme_payload *payload, + uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, +@@ -417,7 +485,7 @@ _nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns, + + static inline struct nvme_request * + _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, ++ struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset, + uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc, + uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, uint32_t cdw13, bool check_sgl, + void *accel_sequence, int *rc) +@@ -439,6 +507,9 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + + req->payload_offset = payload_offset; + req->md_offset = md_offset; ++#ifdef SPDK_CONFIG_APP_RW ++ req->user_cb_arg = cb_arg; ++#endif + req->accel_sequence = accel_sequence; + + /* Zone append commands cannot be split. */ +@@ -466,6 +537,17 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + io_flags, req, sectors_per_stripe, sectors_per_stripe - 1, + apptag_mask, apptag, cdw13, accel_sequence, rc); + } else if (lba_count > sectors_per_max_io) { ++#ifdef SPDK_CONFIG_APP_RW ++ if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL && check_sgl) { ++ if (ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) { ++ rc = _nvme_ns_check_hw_sgl_request(qpair, payload, payload_offset, sector_size, req); ++ } ++ } ++ if (rc > 0) { ++ req->payload.payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ payload->payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ } ++#endif + return _nvme_ns_cmd_split_request(ns, qpair, payload, payload_offset, md_offset, lba, lba_count, + cb_fn, + cb_arg, opc, +@@ -473,6 +555,17 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + apptag, cdw13, accel_sequence, rc); + } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL && check_sgl) { + if (ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) { ++#ifdef SPDK_CONFIG_APP_RW ++ if ((payload->payload_type == NVME_PAYLOAD_TYPE_SGL_PRP) ++ || (_nvme_ns_check_hw_sgl_request(qpair, payload, payload_offset, sector_size, req) > 0)) { ++ req->payload.payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ payload->payload_type = NVME_PAYLOAD_TYPE_SGL_PRP; ++ return _nvme_ns_cmd_split_request_prp(ns, qpair, payload, payload_offset, md_offset, ++ lba, lba_count, cb_fn, cb_arg, opc, io_flags, ++ req, apptag_mask, apptag, cdw13, ++ accel_sequence, rc); ++ } ++#endif + return _nvme_ns_cmd_split_request_sgl(ns, qpair, payload, payload_offset, md_offset, + lba, lba_count, cb_fn, cb_arg, opc, io_flags, + req, apptag_mask, apptag, cdw13, +@@ -1462,3 +1555,38 @@ spdk_nvme_ns_cmd_io_mgmt_send(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *q + + return nvme_qpair_submit_request(qpair, req); + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++int ++spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ uint32_t type, ++ const struct spdk_nvme_dsm_range *ranges, uint16_t num_ranges, ++ spdk_nvme_cmd_cb cb_fn, void *cb_arg) ++{ ++ struct nvme_request *req = NULL; ++ struct spdk_nvme_cmd *cmd = NULL; ++ struct nvme_payload payload; ++ ++ if (ranges == NULL) { ++ return -EINVAL; ++ } ++ ++ payload = NVME_PAYLOAD_CONTIG((void *)ranges, NULL); ++ ++ req = nvme_allocate_request(qpair, &payload, num_ranges * sizeof(struct spdk_nvme_dsm_range), ++ 0, cb_fn, cb_arg); ++ if (req == NULL) { ++ return -ENOMEM; ++ } ++ ++ req->user_cb_arg = cb_arg; ++ ++ cmd = &req->cmd; ++ cmd->opc = SPDK_NVME_OPC_DATASET_MANAGEMENT; ++ cmd->nsid = ns->id; ++ ++ cmd->cdw10 = num_ranges - 1; ++ cmd->cdw11 = type; ++ ++ return nvme_qpair_submit_request(qpair, req); ++} +diff --git a/lib/nvme/nvme_ns_self.c b/lib/nvme/nvme_ns_self.c +new file mode 100644 +index 0000000..5aabbaa +--- /dev/null ++++ b/lib/nvme/nvme_ns_self.c +@@ -0,0 +1,93 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++#include "nvme_internal.h" ++ ++bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns) ++{ ++ return (ns->flags & SPDK_NVME_NS_DPS_PI_MDSTART) ? true : false; ++} ++ ++bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns) ++{ ++ return (ns->flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) ? true : false; ++} ++ ++int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata) ++{ ++ if (ctrlr == NULL || nsdata == NULL) { ++ SPDK_ERRLOG("Parameter error\n"); ++ return -1; ++ } ++ ++ struct nvme_completion_poll_status status; ++ int rc = 0; ++ status.done = false; ++ ++ if (spdk_nvme_ctrlr_is_ns_manage_supported(ctrlr)) { ++ rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS, 0, SPDK_NVME_GLOBAL_NS_TAG, 0, ++ nsdata, sizeof(*nsdata), nvme_completion_poll_cb, &status); ++ } ++ else { ++ rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS, 0, 1, 0, ++ nsdata, sizeof(*nsdata), nvme_completion_poll_cb, &status); ++ } ++ if (rc != 0) { ++ return rc; ++ } ++ ++ if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { ++ SPDK_ERRLOG("Failed to identify nsdata, sct[%x], sc[%x]\n", status.cpl.status.sct, status.cpl.status.sc); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++bool spdk_nvme_ns_is_allocated(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid) ++{ ++ struct spdk_nvme_ns_data nsdata; ++ struct nvme_completion_poll_status status; ++ int rc = 0; ++ ++ if (ctrlr == NULL || nsid == 0) { ++ SPDK_ERRLOG("Parameter error. ns[%u]\n", nsid); ++ return false; ++ } ++ ++ if (!spdk_nvme_ctrlr_is_ns_manage_supported(ctrlr)) { ++ return true; ++ } ++ ++ nsdata.ncap = 0; ++ status.done = false; ++ rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS_ALLOCATED, 0, nsid, 0, ++ &nsdata, sizeof(nsdata), nvme_completion_poll_cb, &status); ++ if (rc != 0) { ++ SPDK_ERRLOG("Failed to identify ns[%u]'s nsdata. rc[%d]\n", nsid, rc); ++ return false; ++ } ++ ++ if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { ++ SPDK_ERRLOG("Failed to identify ns[%u]'s nsdata, sct[%x], sc[%x]\n", ++ nsid, status.cpl.status.sct, status.cpl.status.sc); ++ return false; ++ } ++ ++ return (nsdata.ncap != 0); ++} ++ ++bool ++spdk_nvme_ns_ctrl_is_failed(struct spdk_nvme_ns *ns) ++{ ++ return ns->ctrlr->is_failed; ++} +diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c +index a8d47c3..4b5e7cd 100644 +--- a/lib/nvme/nvme_pcie.c ++++ b/lib/nvme/nvme_pcie.c +@@ -12,6 +12,7 @@ + #include "spdk/env.h" + #include "spdk/likely.h" + #include "spdk/string.h" ++#include "spdk/bdev_module.h" + #include "nvme_internal.h" + #include "nvme_pcie_internal.h" + +@@ -98,6 +99,7 @@ _nvme_pcie_event_process(struct spdk_pci_event *event, void *cb_ctx) + } + } + ++#ifndef SPDK_CONFIG_APP_RW + static int + _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) + { +@@ -141,6 +143,7 @@ _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) + } + return rc; + } ++#endif + + static volatile void * + nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset) +diff --git a/lib/nvme/nvme_pcie_common.c b/lib/nvme/nvme_pcie_common.c +index 35c3d51..688370d 100644 +--- a/lib/nvme/nvme_pcie_common.c ++++ b/lib/nvme/nvme_pcie_common.c +@@ -660,6 +660,12 @@ nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracke + if (!pqpair->flags.delay_cmd_submit) { + nvme_pcie_qpair_ring_sq_doorbell(qpair); + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++ if (nvme_qpair_is_io_queue(qpair)) { ++ spdk_bdev_set_io_location(req->user_cb_arg, (uint8_t)LOCAL_LIBSTORAGE_TO_DISK); ++ } ++#endif + } + + void +@@ -733,6 +739,7 @@ nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) + { + struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); + struct nvme_tracker *tr, *temp, *last; ++ int count = 0; + + last = TAILQ_LAST(&pqpair->outstanding_tr, nvme_outstanding_tr_head); + +@@ -741,6 +748,7 @@ nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) + if (!qpair->ctrlr->opts.disable_error_logging) { + SPDK_ERRLOG("aborting outstanding command\n"); + } ++ count++; + nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, + SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true); + +@@ -748,6 +756,10 @@ nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) + break; + } + } ++ ++ if (count != 0) { ++ SPDK_ERRLOG("Aborted %u qpair[%p]'s outstanding command\n", count, pqpair); ++ } + } + + void +diff --git a/lib/nvme/nvme_qpair.c b/lib/nvme/nvme_qpair.c +index 5e1ef28..5e0aa73 100644 +--- a/lib/nvme/nvme_qpair.c ++++ b/lib/nvme/nvme_qpair.c +@@ -645,7 +645,7 @@ nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair) + * to submit any queued requests that built up while we were in the connected or enabling state. + */ + if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTED && +- !qpair->ctrlr->is_resetting)) { ++ !qpair->ctrlr->is_resetting && !qpair->ctrlr->is_removed && !qpair->ctrlr->is_destructed)) { + nvme_qpair_set_state(qpair, NVME_QPAIR_ENABLING); + /* + * PCIe is special, for fabrics transports, we can abort requests before disconnect during reset +@@ -972,6 +972,13 @@ _nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *r + rc = nvme_qpair_submit_request(qpair, child_req); + if (spdk_unlikely(rc != 0)) { + child_req_failed = true; ++#ifdef SPDK_CONFIG_APP_RW ++ if (rc == -ENXIO && child_req->num_children == 0) { ++ SPDK_WARNLOG("Warning: child req submit failed.\n"); ++ nvme_request_remove_child(req, child_req); ++ nvme_free_request(child_req); ++ } ++#endif + } + } else { /* free remaining child_reqs since one child_req fails */ + nvme_request_remove_child(req, child_req); +diff --git a/lib/nvme/nvme_rebind.c b/lib/nvme/nvme_rebind.c +new file mode 100644 +index 0000000..5836fa3 +--- /dev/null ++++ b/lib/nvme/nvme_rebind.c +@@ -0,0 +1,262 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "spdk/log.h" ++#include "spdk/nvme.h" ++ ++#define PATH_LEN 4096 ++#define ID_LEN 16 ++ ++// nvme that fails to bind uio ++struct failed_nvme { ++ char *pci_addr; ++ TAILQ_ENTRY(failed_nvme) tailq; ++}; ++ ++/** ++ * failed nvmes list, failed nvme will send a "nvme add uevent" when we bind it back to nvme driver ++ * in spdk_rebind_driver, we should ignore this event or we wouldn't stop binding this nvme to uio. ++ */ ++static TAILQ_HEAD(failed_nvme_list, failed_nvme) g_failed_nvmes = TAILQ_HEAD_INITIALIZER(g_failed_nvmes); ++ ++// get vendor id from /sys/bus/pci/devices/pci_addr/vendor ++// get device id from /sys/bus/pci/devices/pci_addr/device ++static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char *ret_id, uint8_t ret_id_len) ++{ ++ int32_t fd = -1; ++ char sysfs_path[PATH_LEN]; ++ char tmp_id[ID_LEN] = {0}; ++ char *tmp = NULL; ++ ++ // id's length is 5 byte,like XXXX'\0' ++ if (ret_id_len < 5) { ++ SPDK_ERRLOG("ret_id_len is less than 5 bytes\n"); ++ return -1; ++ } ++ ++ // construct path in sysfs which stores id ++ if (snprintf_s(sysfs_path, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/devices/%s/%s", pci_addr, id_type) > 0) { ++ fd = open(sysfs_path, O_RDONLY); ++ } ++ if (fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_path, errno, strerror(errno)); ++ return -1; ++ } ++ ++ // id in sysfs is like 0xDDDD ++ if (read(fd, tmp_id, ID_LEN - 1) <= 0) { ++ SPDK_ERRLOG("fail to read id from %s, errno(%d): %s\n", sysfs_path, errno, strerror(errno)); ++ close(fd); ++ return -1; ++ } ++ ++ // 2 means skipping prefix "0x" of id read from sysfs ++ tmp = tmp_id + 2; ++ // 4 means the value of id read from sysfs, not including prefix "0x" ++ if (snprintf_s(ret_id, ret_id_len, 4, "%s", tmp) <= 0) { ++ SPDK_ERRLOG("string copy failed\n"); ++ } ++ ++ close(fd); ++ return 0; ++} ++ ++// get ven_dev_id which combines vendor id and device id ++static int32_t get_ven_dev_id(const char *pci_addr, char *ven_dev_id, uint8_t ven_dev_id_len) ++{ ++ char ven_id[ID_LEN], dev_id[ID_LEN]; ++ ++ // ven_dev_id combines with vendor id and device id,like "DDDD XXXX'\0'",length is 10 bytes ++ if (ven_dev_id_len < 10) { ++ SPDK_ERRLOG("ven_dev_id_len is less than 10 bytes\n"); ++ return -1; ++ } ++ ++ // get vendor id from sysfs,format is like "DDDD" ++ if (get_id_from_sysfs(pci_addr, "vendor", ven_id, ID_LEN) < 0) { ++ SPDK_ERRLOG("fail to get vendor id\n"); ++ return -1; ++ } ++ ++ // get device id from sysfs,format is like "XXXX" ++ if (get_id_from_sysfs(pci_addr, "device", dev_id, ID_LEN) < 0) { ++ SPDK_ERRLOG("fail to get device id\n"); ++ return -1; ++ } ++ ++ if (snprintf_s(ven_dev_id, ven_dev_id_len, ven_dev_id_len - 1, "%s %s", ven_id, dev_id) <= 0) { ++ SPDK_ERRLOG("string copy failed\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++// unbind driver by writing remove_id and unbind files in sysfs ++static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) ++{ ++ char sysfs_dev_remove_id[PATH_LEN]; // remove_id file path in sysfs ++ char sysfs_dev_unbind[PATH_LEN]; // unbind file path in sysfs ++ int32_t remove_id_fd = -1; // file description of remove_id file ++ int32_t unbind_fd = -1; // file description of unbind file ++ int32_t ret; ++ ++ ret = snprintf_s(sysfs_dev_remove_id, PATH_LEN, PATH_LEN - 1, ++ "/sys/bus/pci/devices/%s/driver/remove_id", pci_addr); ++ if (ret <= 0) { ++ SPDK_ERRLOG("copy dev id failed\n"); ++ return -1; ++ } ++ ret = snprintf_s(sysfs_dev_unbind, PATH_LEN, PATH_LEN - 1, ++ "/sys/bus/pci/devices/%s/driver/unbind", pci_addr); ++ if (ret <= 0) { ++ SPDK_ERRLOG("copy dev unbind failed\n"); ++ return -1; ++ } ++ ++ remove_id_fd = open(sysfs_dev_remove_id, O_WRONLY); ++ if (remove_id_fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_dev_remove_id, errno, strerror(errno)); ++ return -1; ++ } ++ ++ (void)write(remove_id_fd, ven_dev_id, strlen(ven_dev_id) + 1); ++ close(remove_id_fd); ++ ++ // unbind driver by wrting unbind file ++ unbind_fd = open(sysfs_dev_unbind, O_WRONLY); ++ if (unbind_fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_dev_unbind, errno, strerror(errno)); ++ return -1; ++ } ++ ++ ret = write(unbind_fd, pci_addr, strlen(pci_addr) + 1); ++ if (ret < 0) { ++ SPDK_ERRLOG("write %s to %s fail, errno(%d): %s\n",pci_addr, sysfs_dev_unbind, errno, strerror(errno)); ++ close(unbind_fd); ++ return -1; ++ } ++ ++ close(unbind_fd); ++ ++ return 0; ++} ++ ++// bind device to new driver by writing new_id and bind files in sysfs ++static int32_t bind_driver(const char *pci_addr, const char *ven_dev_id, const char *driver_name) ++{ ++ char sysfs_driver_new_id[PATH_LEN]; // new_id file path in sysfs ++ char sysfs_driver_bind[PATH_LEN]; // bind file path in sysfs ++ int32_t new_id_fd = -1; // file description of new_id file ++ int32_t bind_fd = -1; // file descriptoin of bind file ++ int rc; ++ ++ rc = snprintf_s(sysfs_driver_new_id, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/new_id", driver_name); ++ if (rc > 0) { ++ rc = snprintf_s(sysfs_driver_bind, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/bind", driver_name); ++ } ++ if (rc <= 0) { ++ SPDK_ERRLOG("string copy failed\n"); ++ return -1; ++ } ++ ++ // try to bind driver by write ven_dev_id to new_id file ++ new_id_fd = open(sysfs_driver_new_id, O_WRONLY); ++ if (new_id_fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_driver_new_id, errno, strerror(errno)); ++ return -1; ++ } ++ ++ (void)write(new_id_fd, ven_dev_id, strlen(ven_dev_id) + 1); ++ close(new_id_fd); ++ ++ // bind driver by writing pci_addr to bind file if writing new_id file failed ++ bind_fd = open(sysfs_driver_bind, O_WRONLY); ++ if (bind_fd < 0) { ++ SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_driver_bind, errno, strerror(errno)); ++ return -1; ++ } ++ ++ (void)write(bind_fd, pci_addr, strlen(pci_addr) + 1); ++ close(bind_fd); ++ return 0; ++} ++ ++int32_t spdk_rebind_driver(char *pci_addr, char *driver_name) ++{ ++ char ven_dev_id[ID_LEN] = {0}; ++ struct failed_nvme *iter = NULL; ++ unsigned int sleep_time = 1000; ++ ++ if (pci_addr == NULL || driver_name == NULL) { ++ SPDK_ERRLOG("pci address and driver_name can't be NULL to rebind driver\n"); ++ return -1; ++ } ++ ++ // ignore event from binding pci back to nvme driver ++ TAILQ_FOREACH(iter, &g_failed_nvmes, tailq) { ++ if (strncmp(iter->pci_addr, pci_addr, strlen(iter->pci_addr)) == 0) { ++ // oncely ignore nvme add event from binding back to nvme,so do rebind when next hotplug of this pci happen ++ TAILQ_REMOVE(&g_failed_nvmes, iter, tailq); ++ free(iter->pci_addr); ++ free(iter); ++ SPDK_WARNLOG("ignore failed nvme %s\n", pci_addr); ++ return 0; ++ } ++ } ++ ++ if (get_ven_dev_id(pci_addr, ven_dev_id, ID_LEN) < 0) { ++ SPDK_ERRLOG("failed to get ven_dev_id\n"); ++ return -1; ++ } ++ ++ while (unbind_driver(pci_addr, ven_dev_id) < 0) { ++ usleep(sleep_time); ++ sleep_time = sleep_time * 2; ++ if (sleep_time > 1000000) { ++ SPDK_ERRLOG("failed to unbind driver of %s\n", pci_addr); ++ return -1; ++ } ++ } ++ ++ if (bind_driver(pci_addr, ven_dev_id, driver_name) < 0) { ++ // retry ++ if (bind_driver(pci_addr, ven_dev_id, driver_name) < 0) { ++ SPDK_ERRLOG("fail to bind %s to %s\n", pci_addr, driver_name); ++ // add fialed nvme to g_failed_nvmes ++ struct failed_nvme *failed_nvme = (struct failed_nvme *)malloc(sizeof(struct failed_nvme)); ++ if (failed_nvme == NULL) { ++ SPDK_ERRLOG("failed to malloc for failed_nvme,can't bind %s back to nvme\n", pci_addr); ++ return -1; ++ } ++ failed_nvme->pci_addr = strdup(pci_addr); ++ if (failed_nvme->pci_addr == NULL) { ++ SPDK_ERRLOG("failed to malloc for failed_nvme,can't bind %s back to nvme\n", pci_addr); ++ free(failed_nvme); ++ return -1; ++ } ++ TAILQ_INSERT_TAIL(&g_failed_nvmes, failed_nvme, tailq); ++ ++ // bind device back to nvme driver if failed to bind uio ++ bind_driver(pci_addr, ven_dev_id, "nvme"); ++ } ++ } ++ return 0; ++} +-- +2.43.0 + diff --git a/0007-module-bdev-Add-bdev-module-support-for-HSAK.patch b/0007-module-bdev-Add-bdev-module-support-for-HSAK.patch new file mode 100644 index 0000000..93ce7c6 --- /dev/null +++ b/0007-module-bdev-Add-bdev-module-support-for-HSAK.patch @@ -0,0 +1,2529 @@ +From bf39dfe20108f441a7e7d659ccd74239c6da56f2 Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Thu, 25 Feb 2021 18:21:53 +0800 +Subject: [PATCH 07/12] module/bdev: Add bdev module support for HSAK + +Signed-off-by: sunshihao +--- + app/spdk_lspci/Makefile | 2 +- + include/spdk/bdev.h | 20 +- + include/spdk/bdev_module.h | 28 +- + include/spdk/nvme.h | 72 +--- + lib/bdev/bdev.c | 2 + + lib/nvme/nvme_ctrlr_self.c | 78 ++-- + lib/nvme/nvme_ns.c | 5 - + lib/nvme/nvme_ns_cmd.c | 1 + + lib/nvme/nvme_ns_self.c | 11 +- + lib/nvme/nvme_pcie.c | 2 + + lib/nvme/nvme_pcie_common.c | 1 + + lib/nvme/nvme_rebind.c | 86 ++-- + lib/rpc/rpc.c | 2 + + lib/thread/thread.c | 14 +- + mk/spdk.common.mk | 5 +- + mk/spdk.modules.mk | 2 +- + module/bdev/nvme/Makefile | 1 + + module/bdev/nvme/bdev_nvme.c | 278 ++++++++++++- + module/bdev/nvme/bdev_nvme.h | 42 ++ + module/bdev/nvme/bdev_nvme_self.c | 661 ++++++++++++++++++++++++++++++ + module/bdev/nvme/bdev_nvme_self.h | 43 ++ + scripts/setup_self.sh | 347 ++++++++++++++++ + 22 files changed, 1491 insertions(+), 212 deletions(-) + create mode 100644 module/bdev/nvme/bdev_nvme_self.c + create mode 100644 module/bdev/nvme/bdev_nvme_self.h + create mode 100755 scripts/setup_self.sh + +diff --git a/app/spdk_lspci/Makefile b/app/spdk_lspci/Makefile +index ed10b65..5f990ec 100644 +--- a/app/spdk_lspci/Makefile ++++ b/app/spdk_lspci/Makefile +@@ -11,7 +11,7 @@ APP = spdk_lspci + + C_SRCS := spdk_lspci.c + +-SPDK_LIB_LIST = $(SOCK_MODULES_LIST) nvme vmd ++SPDK_LIB_LIST = $(SOCK_MODULES_LIST) nvme vmd trace + + include $(SPDK_ROOT_DIR)/mk/spdk.app.mk + +diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h +index a8e9131..55b91f9 100644 +--- a/include/spdk/bdev.h ++++ b/include/spdk/bdev.h +@@ -97,7 +97,6 @@ enum spdk_bdev_status { + }; + + #ifdef SPDK_CONFIG_APP_RW +-/** ns status */ + enum spdk_bdev_ns_status { + SPDK_BDEV_NS_STATUS_INVALID, + SPDK_BDEV_NS_STATUS_READY, +@@ -106,7 +105,6 @@ enum spdk_bdev_ns_status { + }; + + typedef void (*LIBSTORAGE_CALLBACK_FUNC)(int32_t cb_status, int32_t sct_code, void *cb_arg); +- + typedef struct libstorage_io { + uint8_t *buf; + struct iovec *iovs; /* array of iovecs to transfer */ +@@ -1643,19 +1641,13 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + */ +-int +-spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +- void *unmap_d, uint16_t unmap_count, +- spdk_bdev_io_completion_cb cb, void *cb_arg); +- +-void * +-spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); +- +-void * +-spdk_bdev_io_get_pool(size_t nbytes); ++int spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, ++ void *unmap_d, uint16_t unmap_count, ++ spdk_bdev_io_completion_cb cb, void *cb_arg); + +-bool +-spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh); ++void *spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); ++void *spdk_bdev_io_get_pool(size_t nbytes); ++bool spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh); + #endif + + /** +diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h +index a830e2f..7cb0da3 100644 +--- a/include/spdk/bdev_module.h ++++ b/include/spdk/bdev_module.h +@@ -370,21 +370,12 @@ struct spdk_bdev_fn_table { + + #ifdef SPDK_CONFIG_APP_RW + uint16_t (*get_io_channel_id)(struct spdk_io_channel *ch); +- + int (*bdev_poll_rsp)(void *pollCh); +- + uint64_t (*get_timeout_count)(struct spdk_io_channel *ch); + #endif + }; + + #ifdef SPDK_CONFIG_APP_RW +-static inline void spdk_bdev_set_io_location(void *bdev_ctx, uint8_t location) +-{ +- struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bdev_ctx); +- uint8_t *ioLoc = (uint8_t *)bdev_io->internal.caller_ctx; +- *ioLoc = location; +-} +- + enum spdk_bdev_driver_ctx { + SPDK_BDEV_IO_ACTION_PI, + SPDK_BDEV_IO_ACTION_FUA, +@@ -407,12 +398,6 @@ enum spdk_bdev_io_fua { + IO_FUA_YES = 1 + }; + +-void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); +- +-void spdk_bdev_fail_ctrlr(const char *traddr); +- +-void *nvme_channel_get_group(void *io_ch); +- + enum reqLocation_E { + LOCAL_RECEIVE_APP = 1, + LOCAL_LIBSTORAGE_SUBMIT = 2, +@@ -425,6 +410,10 @@ enum reqLocation_E { + LOCAL_LIBSTORAGE_SUBMIT_RETRY = 9, + LOCAL_LIBSTORAGE_BDEV_NOMEM = 10, + }; ++ ++void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); ++void spdk_bdev_fail_ctrlr(const char *traddr); ++void *nvme_channel_get_group(void *io_ch); + #endif + + /** bdev I/O completion status */ +@@ -1862,6 +1851,15 @@ int spdk_bdev_unquiesce_range(struct spdk_bdev *bdev, struct spdk_bdev_module *m + uint64_t offset, uint64_t length, + spdk_bdev_quiesce_cb cb_fn, void *cb_arg); + ++#ifdef SPDK_CONFIG_APP_RW ++static inline void spdk_bdev_set_io_location(void *bdev_ctx, uint8_t location) ++{ ++ struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bdev_ctx); ++ uint8_t *ioLoc = (uint8_t *)bdev_io->internal.caller_ctx; ++ *ioLoc = location; ++} ++#endif ++ + /* + * Macro used to register module for later initialization. + */ +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index b6a1f05..507e139 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -4244,6 +4244,7 @@ struct spdk_nvme_transport_ops { + void spdk_nvme_transport_register(const struct spdk_nvme_transport_ops *ops); + + #ifdef SPDK_CONFIG_APP_RW ++#define NVME_MAX_CONTROLLERS 1024 + struct nvme_ctrlr_info { + char ctrlName[16]; + char pciAddr[24]; +@@ -4262,10 +4263,10 @@ struct nvme_ctrlr_info { + uint16_t ssvid; /* Subsystem vendor id */ + uint16_t ctrlid; /* Controller id */ + uint16_t trtype; /* Transport type */ +- uint16_t support_ns : 1; /* Supports the Namespace Management and Namespace Attachment commands */ +- uint16_t directives : 1; /* Supports Directives */ +- uint16_t streams : 1; /* Supports Streams Directives */ +- uint16_t dsm : 1; /* Supports the controller supports the Dataset Management command */ ++ uint16_t support_ns : 1; /* Supports the Namespace Management and Namespace Attachment commands */ ++ uint16_t directives : 1; /* Supports Directives */ ++ uint16_t streams : 1; /* Supports Streams Directives */ ++ uint16_t dsm : 1; /* Supports the controller supports the Dataset Management command */ + uint16_t reserved : 12; + uint16_t reserved2[3]; + }; +@@ -4301,7 +4302,7 @@ bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); +-int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + void *payload); + int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload); +@@ -4347,38 +4348,13 @@ struct spdk_uevent { + char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; + }; + +-/* make a socket to get uevent */ + int nvme_uevent_connect(void); +- +-/* get uevent from socket fd */ + int nvme_get_uevent(int fd, struct spdk_uevent *uevent); +- +-/* blocked to get uevent from socket fd */ + int nvme_get_uevent_block(int fd, struct spdk_uevent *uevent); +- +-/** +- * @Description: bind device with pci_addr to driver +- * @param pci_addr: device's pci_addr,like "0000:08:00.0" +- * @param driver: driver name which device bind to +- */ + int32_t spdk_rebind_driver(char *pci_addr, char *driver_name); +- +-/** +- * \brief True if the protection information transferred at the start of metadata +- * when end-to-end data protection enabled. +- * +- * This function is thread safe and can be called at any point while the controller is attached to +- * the SPDK NVMe driver. +- */ + bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns); +- +-/** +- * \brief True if the namespace supports Dataset Management command. +- * +- * This function is thread safe and can be called at any point while the controller is attached to +- * the SPDK NVMe driver. +- */ + bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns); ++uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); + + /** + * Submit a data set management request to the specified NVMe namespace. Data set +@@ -4439,40 +4415,6 @@ int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpa + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn); +- +-/** +- * \brief Send comman to NVMe controller to start or abort a self-test operation. +- * +- * \param ctrlr NVMe controller to operate self-test command. +- * \param nsid Depending on the log page, this may be 0, a namespace identifier, or SPDK_NVME_GLOBAL_NS_TAG. +- * \param stc self-test code, which specifies the action taken by the Device Self-test command. +- * \param payload The pointer to the payload buffer. it doesn't work actually. +- * \param payload_size The size of payload buffer. it doesn't work actually. +- * \param cb_fn Callback function to invoke when the feature has been retrieved. +- * \param cb_arg Argument to pass to the callback function. +- * +- * \return 0 if successfully submitted, ENOMEM if resources could not be allocated for this request +- * +- * This function is thread safe and can be called at any point while the controller is attached to +- * the SPDK NVMe driver. +- * +- * Call \ref spdk_nvme_ctrlr_process_admin_completions() to poll for completion +- * of commands submitted through this function. +- * +- * \sa spdk_nvme_ctrlr_cmd_self_test_operation() +- */ +-int spdk_nvme_ctrlr_cmd_self_test_operation(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, +- uint32_t stc, +- void *payload, uint32_t payload_size, +- spdk_nvme_cmd_cb cb_fn, void *cb_arg); +- +-/** +- *\get I/O queue pair id +- *\param qpair I/O queue pair to submit the request +- *\ +- *\return I/O queue pair id +- */ +-uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); + #endif + + /* +diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c +index 64355f9..88a33e4 100644 +--- a/lib/bdev/bdev.c ++++ b/lib/bdev/bdev.c +@@ -5264,6 +5264,7 @@ spdk_bdev_io_get_seek_offset(const struct spdk_bdev_io *bdev_io) + return bdev_io->u.bdev.seek.offset; + } + ++#ifdef SPDK_CONFIG_APP_RW + static void + bdev_build_contig_io(uint8_t type, void *buf, void *md_buf, uint64_t offset_blocks, + uint64_t num_blocks, +@@ -5283,6 +5284,7 @@ bdev_build_contig_io(uint8_t type, void *buf, void *md_buf, uint64_t offset_bloc + bdev_io->driver_ctx[SPDK_BDEV_IO_STREAM_ID_1] = (io->streamId >> 8) & 0xFF; + } + } ++#endif + + static int + bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf, +diff --git a/lib/nvme/nvme_ctrlr_self.c b/lib/nvme/nvme_ctrlr_self.c +index d3937d9..6ad8bc9 100644 +--- a/lib/nvme/nvme_ctrlr_self.c ++++ b/lib/nvme/nvme_ctrlr_self.c +@@ -14,18 +14,16 @@ + #include "spdk/stdinc.h" + #include "nvme_internal.h" + +-void +-spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown) ++void spdk_nvme_ctrlr_set_shutdown(struct spdk_nvme_ctrlr *ctrlr, bool is_shutdown) + { + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); +- ctrlr-> is_destructed= is_shutdown; ++ ctrlr->is_destructed = is_shutdown; + nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); + } + +-bool +-spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) + { +- if(NULL == ctrlr) { ++ if (NULL == ctrlr) { + SPDK_ERRLOG("spdk_nvme_ctrlr_is_smart_per_namespace_supported: Invalid Parameters!\n"); + return false; + } +@@ -33,7 +31,7 @@ spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) + /* check Bit 0 of Log Page Attributes(LPA), + to find out whether the controller supports namespace basis or not. */ + +- if(0 == ctrlr->cdata.lpa.ns_smart) { ++ if (0 == ctrlr->cdata.lpa.ns_smart) { + SPDK_NOTICELOG("This controller does not support the SMART information on a per namespace basis.\n"); + return false; + } +@@ -42,14 +40,14 @@ spdk_nvme_ctrlr_is_smart_per_namespace_supported(struct spdk_nvme_ctrlr *ctrlr) + } + + static int nvme_get_log_info(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page, uint32_t nsid, +- void *payload, uint32_t payload_size) ++ void *payload, uint32_t payload_size) + { + struct nvme_completion_poll_status status = {0x0}; + int ret; + + status.done = false; + ret = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, log_page, nsid, payload, payload_size, 0, +- nvme_completion_poll_cb, &status); ++ nvme_completion_poll_cb, &status); + if (ret) { + return ret; + } +@@ -61,31 +59,30 @@ static int nvme_get_log_info(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page, ui + } + if (spdk_nvme_cpl_is_error(&status.cpl)) { + SPDK_ERRLOG("spdk_nvme_ctrlr_get_smart_info failed! sc[0x%x], sct[0x%x]\n", +- status.cpl.status.sc, status.cpl.status.sct); ++ status.cpl.status.sc, status.cpl.status.sct); + return -ENXIO; + } + return 0; + } + +-int +-spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, +- struct spdk_nvme_health_information_page *smart_info) ++int spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++ struct spdk_nvme_health_information_page *smart_info) + { + struct spdk_nvme_ns *ns = NULL; + +- if(NULL == ctrlr || NULL == smart_info) { ++ if (NULL == ctrlr || NULL == smart_info) { + SPDK_ERRLOG("Invalid parameters!\n"); + return -EINVAL; + } + + /* if controller does not support namespase basis, then set the nsid to 0xFFFFFFFF, and continue the process. + and if nsid is 0, set the nsid to 0xFFFFFFFF too. */ +- if(!spdk_nvme_ctrlr_is_smart_per_namespace_supported(ctrlr) || 0 == nsid) { ++ if (!spdk_nvme_ctrlr_is_smart_per_namespace_supported(ctrlr) || 0 == nsid) { + nsid = SPDK_NVME_GLOBAL_NS_TAG; + } + + /* nsid should be 0xffffffff or on a per namespace basis. */ +- if(nsid != SPDK_NVME_GLOBAL_NS_TAG) { ++ if (nsid != SPDK_NVME_GLOBAL_NS_TAG) { + ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (NULL == ns) { + SPDK_ERRLOG("Invalid NS %u\n", nsid); +@@ -100,18 +97,17 @@ spdk_nvme_ctrlr_get_smart_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, + } + + return nvme_get_log_info(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, nsid, smart_info, +- sizeof(struct spdk_nvme_health_information_page)); ++ sizeof(struct spdk_nvme_health_information_page)); + } + +-int +-spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, +- struct spdk_nvme_error_information_entry *error_info) ++int spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entries, ++ struct spdk_nvme_error_information_entry *error_info) + { + const struct spdk_nvme_ctrlr_data *cdata = NULL; + uint32_t nsid = SPDK_NVME_GLOBAL_NS_TAG; + int ret; + +- if(NULL == ctrlr || NULL == error_info) { ++ if (NULL == ctrlr || NULL == error_info) { + SPDK_ERRLOG("Invalid parameters!\n"); + return -EINVAL; + } +@@ -121,7 +117,7 @@ spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entri + if (err_entries > (cdata->elpe + 1u)) { + /* if the parameter is bigger, then cut it into the maximum number supported. */ + SPDK_WARNLOG("Cannot get %d error log entries, the controller only support %d errors.\n", +- err_entries, cdata->elpe + 1); ++ err_entries, cdata->elpe + 1); + err_entries = cdata->elpe + 1; + } + +@@ -134,38 +130,27 @@ spdk_nvme_ctrlr_get_error_info(struct spdk_nvme_ctrlr *ctrlr, uint32_t err_entri + return err_entries; + } + +-struct spdk_nvme_ctrlr_opts * +-spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr) +-{ +- return &ctrlr->opts; +-} +- +-bool +-spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr) + { + return ctrlr->cdata.oacs.ns_manage != 0; + } + +-bool +-spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr) + { + return ctrlr->cdata.oacs.format != 0; + } + +-bool +-spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr) + { + return ctrlr->cdata.fna.format_all_ns != 0; + } + +-bool +-spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr) ++bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr) + { + return ctrlr->cdata.oacs.directives != 0; + } + +-void +-spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr) ++void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr) + { + int rc; + struct nvme_completion_poll_status status; +@@ -192,8 +177,7 @@ spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr) + ctrlr->cdata.unvmcap[1] = cdata.unvmcap[1]; + } + +-int32_t +-spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload) ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, void *payload) + { + struct nvme_completion_poll_status status; + int32_t res; +@@ -209,10 +193,11 @@ spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid + } + + status.done = false; +- res = nvme_ctrlr_cmd_directive_receive(ctrlr, nsid, SPDK_NVME_ID_RECV_OP_RET_PARA, +- SPDK_NVME_DIR_TYPE_IDENTIFY, 0, payload, +- sizeof(struct spdk_nvme_identify_recv_ret_para), +- 0, nvme_completion_poll_cb, &status); ++ res = spdk_nvme_ctrlr_cmd_directive_receive(ctrlr, nsid, ++ SPDK_NVME_IDENTIFY_DIRECTIVE_RECEIVE_RETURN_PARAM, ++ SPDK_NVME_DIRECTIVE_TYPE_IDENTIFY, 0, payload, ++ sizeof(struct spdk_nvme_ns_identify_directive_param), ++ 0, 0, nvme_completion_poll_cb, &status); + if (res != 0) { + return res; + } +@@ -225,15 +210,14 @@ spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid + + if (spdk_nvme_cpl_is_error(&status.cpl)) { + SPDK_ERRLOG("Failed to Identify directive! sc[0x%x], sct[0x%x]\n", +- status.cpl.status.sc, status.cpl.status.sct); ++ status.cpl.status.sc, status.cpl.status.sct); + return -ENXIO; + } + + return 0; + } + +-uint16_t +-spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair) ++uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair) + { + return qpair->id; + } +diff --git a/lib/nvme/nvme_ns.c b/lib/nvme/nvme_ns.c +index a714773..db9e5dc 100644 +--- a/lib/nvme/nvme_ns.c ++++ b/lib/nvme/nvme_ns.c +@@ -86,11 +86,6 @@ nvme_ns_set_identify_data(struct spdk_nvme_ns *ns) + ns->flags |= SPDK_NVME_NS_DPS_PI_SUPPORTED; + ns->pi_type = nsdata->dps.pit; + } +-#ifdef SPDK_CONFIG_APP_RW +- if (nsdata->dps.md_start) { +- ns->flags |= SPDK_NVME_NS_DPS_PI_MDSTART; +- } +-#endif + } + + static int +diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c +index 519bd87..b324b4b 100644 +--- a/lib/nvme/nvme_ns_cmd.c ++++ b/lib/nvme/nvme_ns_cmd.c +@@ -494,6 +494,7 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint32_t sector_size = _nvme_get_host_buffer_sector_size(ns, io_flags); + uint32_t sectors_per_max_io = _nvme_get_sectors_per_max_io(ns, io_flags); + uint32_t sectors_per_stripe = ns->sectors_per_stripe; ++ int rc; + + assert(rc != NULL); + assert(*rc == 0); +diff --git a/lib/nvme/nvme_ns_self.c b/lib/nvme/nvme_ns_self.c +index 5aabbaa..2f1833d 100644 +--- a/lib/nvme/nvme_ns_self.c ++++ b/lib/nvme/nvme_ns_self.c +@@ -14,12 +14,13 @@ + + bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns) + { +- return (ns->flags & SPDK_NVME_NS_DPS_PI_MDSTART) ? true : false; ++ struct spdk_nvme_ns_data *nsdata = spdk_nvme_ns_get_data(ns); ++ return nsdata->dps.md_start == 1; + } + + bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns) + { +- return (ns->flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) ? true : false; ++ return (ns->flags & SPDK_NVME_NS_DEALLOCATE_SUPPORTED) == 1; + } + + int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *nsdata) +@@ -36,8 +37,7 @@ int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_d + if (spdk_nvme_ctrlr_is_ns_manage_supported(ctrlr)) { + rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS, 0, SPDK_NVME_GLOBAL_NS_TAG, 0, + nsdata, sizeof(*nsdata), nvme_completion_poll_cb, &status); +- } +- else { ++ } else { + rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_NS, 0, 1, 0, + nsdata, sizeof(*nsdata), nvme_completion_poll_cb, &status); + } +@@ -46,7 +46,8 @@ int nvme_ns_get_common_data(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_d + } + + if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) { +- SPDK_ERRLOG("Failed to identify nsdata, sct[%x], sc[%x]\n", status.cpl.status.sct, status.cpl.status.sc); ++ SPDK_ERRLOG("Failed to identify nsdata, sct[%x], sc[%x]\n", status.cpl.status.sct, ++ status.cpl.status.sc); + return -1; + } + +diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c +index 4b5e7cd..b4d2923 100644 +--- a/lib/nvme/nvme_pcie.c ++++ b/lib/nvme/nvme_pcie.c +@@ -606,6 +606,7 @@ nvme_pcie_ctrlr_unmap_pmr(struct nvme_pcie_ctrlr *pctrlr) + return rc; + } + ++#ifndef SPDK_CONFIG_APP_RW + static int + nvme_pcie_ctrlr_config_pmr(struct spdk_nvme_ctrlr *ctrlr, bool enable) + { +@@ -677,6 +678,7 @@ nvme_pcie_ctrlr_config_pmr(struct spdk_nvme_ctrlr *ctrlr, bool enable) + + return 0; + } ++#endif + + static int + nvme_pcie_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) +diff --git a/lib/nvme/nvme_pcie_common.c b/lib/nvme/nvme_pcie_common.c +index 688370d..669f66f 100644 +--- a/lib/nvme/nvme_pcie_common.c ++++ b/lib/nvme/nvme_pcie_common.c +@@ -10,6 +10,7 @@ + + #include "spdk/stdinc.h" + #include "spdk/likely.h" ++#include "spdk/bdev_module.h" + #include "spdk/string.h" + #include "nvme_internal.h" + #include "nvme_pcie_internal.h" +diff --git a/lib/nvme/nvme_rebind.c b/lib/nvme/nvme_rebind.c +index 5836fa3..1d8dadf 100644 +--- a/lib/nvme/nvme_rebind.c ++++ b/lib/nvme/nvme_rebind.c +@@ -11,13 +11,8 @@ + * GNU General Public License for more details. + */ + +-#include +-#include +-#include +-#include +-#include ++#include "spdk/stdinc.h" + #include +-#include + #include + #include "spdk/log.h" + #include "spdk/nvme.h" +@@ -25,7 +20,7 @@ + #define PATH_LEN 4096 + #define ID_LEN 16 + +-// nvme that fails to bind uio ++/* nvme that fails to bind uio */ + struct failed_nvme { + char *pci_addr; + TAILQ_ENTRY(failed_nvme) tailq; +@@ -35,25 +30,29 @@ struct failed_nvme { + * failed nvmes list, failed nvme will send a "nvme add uevent" when we bind it back to nvme driver + * in spdk_rebind_driver, we should ignore this event or we wouldn't stop binding this nvme to uio. + */ +-static TAILQ_HEAD(failed_nvme_list, failed_nvme) g_failed_nvmes = TAILQ_HEAD_INITIALIZER(g_failed_nvmes); ++static TAILQ_HEAD(failed_nvme_list, ++ failed_nvme) g_failed_nvmes = TAILQ_HEAD_INITIALIZER(g_failed_nvmes); + +-// get vendor id from /sys/bus/pci/devices/pci_addr/vendor +-// get device id from /sys/bus/pci/devices/pci_addr/device +-static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char *ret_id, uint8_t ret_id_len) ++/* get vendor id from /sys/bus/pci/devices/pci_addr/vendor ++ * get device id from /sys/bus/pci/devices/pci_addr/device ++ */ ++static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char *ret_id, ++ uint8_t ret_id_len) + { + int32_t fd = -1; + char sysfs_path[PATH_LEN]; + char tmp_id[ID_LEN] = {0}; + char *tmp = NULL; + +- // id's length is 5 byte,like XXXX'\0' ++ /* id's length is 5 byte,like XXXX'\0' */ + if (ret_id_len < 5) { + SPDK_ERRLOG("ret_id_len is less than 5 bytes\n"); + return -1; + } + +- // construct path in sysfs which stores id +- if (snprintf_s(sysfs_path, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/devices/%s/%s", pci_addr, id_type) > 0) { ++ /* construct path in sysfs which stores i */ ++ if (snprintf_s(sysfs_path, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/devices/%s/%s", pci_addr, ++ id_type) > 0) { + fd = open(sysfs_path, O_RDONLY); + } + if (fd < 0) { +@@ -61,16 +60,16 @@ static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char + return -1; + } + +- // id in sysfs is like 0xDDDD ++ /* id in sysfs is like 0xDDDD */ + if (read(fd, tmp_id, ID_LEN - 1) <= 0) { + SPDK_ERRLOG("fail to read id from %s, errno(%d): %s\n", sysfs_path, errno, strerror(errno)); + close(fd); + return -1; + } + +- // 2 means skipping prefix "0x" of id read from sysfs ++ /* 2 means skipping prefix "0x" of id read from sysfs */ + tmp = tmp_id + 2; +- // 4 means the value of id read from sysfs, not including prefix "0x" ++ /* 4 means the value of id read from sysfs, not including prefix "0x" */ + if (snprintf_s(ret_id, ret_id_len, 4, "%s", tmp) <= 0) { + SPDK_ERRLOG("string copy failed\n"); + } +@@ -79,24 +78,24 @@ static int32_t get_id_from_sysfs(const char *pci_addr, const char *id_type, char + return 0; + } + +-// get ven_dev_id which combines vendor id and device id ++/* get ven_dev_id which combines vendor id and device id */ + static int32_t get_ven_dev_id(const char *pci_addr, char *ven_dev_id, uint8_t ven_dev_id_len) + { + char ven_id[ID_LEN], dev_id[ID_LEN]; + +- // ven_dev_id combines with vendor id and device id,like "DDDD XXXX'\0'",length is 10 bytes ++ /* ven_dev_id combines with vendor id and device id,like "DDDD XXXX'\0'",length is 10 bytes */ + if (ven_dev_id_len < 10) { + SPDK_ERRLOG("ven_dev_id_len is less than 10 bytes\n"); + return -1; + } + +- // get vendor id from sysfs,format is like "DDDD" ++ /* get vendor id from sysfs,format is like "DDDD" */ + if (get_id_from_sysfs(pci_addr, "vendor", ven_id, ID_LEN) < 0) { + SPDK_ERRLOG("fail to get vendor id\n"); + return -1; + } + +- // get device id from sysfs,format is like "XXXX" ++ /* get device id from sysfs,format is like "XXXX" */ + if (get_id_from_sysfs(pci_addr, "device", dev_id, ID_LEN) < 0) { + SPDK_ERRLOG("fail to get device id\n"); + return -1; +@@ -109,13 +108,13 @@ static int32_t get_ven_dev_id(const char *pci_addr, char *ven_dev_id, uint8_t ve + return 0; + } + +-// unbind driver by writing remove_id and unbind files in sysfs ++/* unbind driver by writing remove_id and unbind files in sysfs */ + static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) + { +- char sysfs_dev_remove_id[PATH_LEN]; // remove_id file path in sysfs +- char sysfs_dev_unbind[PATH_LEN]; // unbind file path in sysfs +- int32_t remove_id_fd = -1; // file description of remove_id file +- int32_t unbind_fd = -1; // file description of unbind file ++ char sysfs_dev_remove_id[PATH_LEN]; /* remove_id file path in sysfs */ ++ char sysfs_dev_unbind[PATH_LEN]; /* unbind file path in sysfs */ ++ int32_t remove_id_fd = -1; /* file description of remove_id file */ ++ int32_t unbind_fd = -1; /* file description of unbind file */ + int32_t ret; + + ret = snprintf_s(sysfs_dev_remove_id, PATH_LEN, PATH_LEN - 1, +@@ -140,7 +139,7 @@ static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) + (void)write(remove_id_fd, ven_dev_id, strlen(ven_dev_id) + 1); + close(remove_id_fd); + +- // unbind driver by wrting unbind file ++ /* unbind driver by wrting unbind file */ + unbind_fd = open(sysfs_dev_unbind, O_WRONLY); + if (unbind_fd < 0) { + SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_dev_unbind, errno, strerror(errno)); +@@ -149,7 +148,8 @@ static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) + + ret = write(unbind_fd, pci_addr, strlen(pci_addr) + 1); + if (ret < 0) { +- SPDK_ERRLOG("write %s to %s fail, errno(%d): %s\n",pci_addr, sysfs_dev_unbind, errno, strerror(errno)); ++ SPDK_ERRLOG("write %s to %s fail, errno(%d): %s\n", pci_addr, sysfs_dev_unbind, errno, ++ strerror(errno)); + close(unbind_fd); + return -1; + } +@@ -159,25 +159,27 @@ static int32_t unbind_driver(char *pci_addr, const char *ven_dev_id) + return 0; + } + +-// bind device to new driver by writing new_id and bind files in sysfs ++/* bind device to new driver by writing new_id and bind files in sysfs */ + static int32_t bind_driver(const char *pci_addr, const char *ven_dev_id, const char *driver_name) + { +- char sysfs_driver_new_id[PATH_LEN]; // new_id file path in sysfs +- char sysfs_driver_bind[PATH_LEN]; // bind file path in sysfs +- int32_t new_id_fd = -1; // file description of new_id file +- int32_t bind_fd = -1; // file descriptoin of bind file ++ char sysfs_driver_new_id[PATH_LEN]; /* new_id file path in sysfs */ ++ char sysfs_driver_bind[PATH_LEN]; /* bind file path in sysfs */ ++ int32_t new_id_fd = -1; /* file description of new_id file */ ++ int32_t bind_fd = -1; /* file descriptoin of bind file */ + int rc; + +- rc = snprintf_s(sysfs_driver_new_id, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/new_id", driver_name); ++ rc = snprintf_s(sysfs_driver_new_id, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/new_id", ++ driver_name); + if (rc > 0) { +- rc = snprintf_s(sysfs_driver_bind, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/bind", driver_name); ++ rc = snprintf_s(sysfs_driver_bind, PATH_LEN, PATH_LEN - 1, "/sys/bus/pci/drivers/%s/bind", ++ driver_name); + } + if (rc <= 0) { + SPDK_ERRLOG("string copy failed\n"); + return -1; + } + +- // try to bind driver by write ven_dev_id to new_id file ++ /* try to bind driver by write ven_dev_id to new_id file */ + new_id_fd = open(sysfs_driver_new_id, O_WRONLY); + if (new_id_fd < 0) { + SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_driver_new_id, errno, strerror(errno)); +@@ -187,7 +189,7 @@ static int32_t bind_driver(const char *pci_addr, const char *ven_dev_id, const c + (void)write(new_id_fd, ven_dev_id, strlen(ven_dev_id) + 1); + close(new_id_fd); + +- // bind driver by writing pci_addr to bind file if writing new_id file failed ++ /* bind driver by writing pci_addr to bind file if writing new_id file failed */ + bind_fd = open(sysfs_driver_bind, O_WRONLY); + if (bind_fd < 0) { + SPDK_ERRLOG("fail to open %s, errno(%d): %s\n", sysfs_driver_bind, errno, strerror(errno)); +@@ -210,10 +212,10 @@ int32_t spdk_rebind_driver(char *pci_addr, char *driver_name) + return -1; + } + +- // ignore event from binding pci back to nvme driver ++ /* ignore event from binding pci back to nvme driver */ + TAILQ_FOREACH(iter, &g_failed_nvmes, tailq) { + if (strncmp(iter->pci_addr, pci_addr, strlen(iter->pci_addr)) == 0) { +- // oncely ignore nvme add event from binding back to nvme,so do rebind when next hotplug of this pci happen ++ /* oncely ignore nvme add event from binding back to nvme,so do rebind when next hotplug of this pci happen */ + TAILQ_REMOVE(&g_failed_nvmes, iter, tailq); + free(iter->pci_addr); + free(iter); +@@ -237,10 +239,10 @@ int32_t spdk_rebind_driver(char *pci_addr, char *driver_name) + } + + if (bind_driver(pci_addr, ven_dev_id, driver_name) < 0) { +- // retry ++ /* retry */ + if (bind_driver(pci_addr, ven_dev_id, driver_name) < 0) { + SPDK_ERRLOG("fail to bind %s to %s\n", pci_addr, driver_name); +- // add fialed nvme to g_failed_nvmes ++ /* add fialed nvme to g_failed_nvmes */ + struct failed_nvme *failed_nvme = (struct failed_nvme *)malloc(sizeof(struct failed_nvme)); + if (failed_nvme == NULL) { + SPDK_ERRLOG("failed to malloc for failed_nvme,can't bind %s back to nvme\n", pci_addr); +@@ -254,7 +256,7 @@ int32_t spdk_rebind_driver(char *pci_addr, char *driver_name) + } + TAILQ_INSERT_TAIL(&g_failed_nvmes, failed_nvme, tailq); + +- // bind device back to nvme driver if failed to bind uio ++ /* bind device back to nvme driver if failed to bind uio */ + bind_driver(pci_addr, ven_dev_id, "nvme"); + } + } +diff --git a/lib/rpc/rpc.c b/lib/rpc/rpc.c +index 1bccf92..a14e353 100644 +--- a/lib/rpc/rpc.c ++++ b/lib/rpc/rpc.c +@@ -106,6 +106,8 @@ jsonrpc_handler(struct spdk_jsonrpc_request *request, + + assert(method != NULL); + ++ SPDK_NOTICELOG("[spdk] jsonrpc handle request: %p, handling method: %s\n", request, ++ (char *)method->start); + m = _get_rpc_method(method); + if (m == NULL) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_METHOD_NOT_FOUND, "Method not found"); +diff --git a/lib/thread/thread.c b/lib/thread/thread.c +index 95fc354..344869d 100644 +--- a/lib/thread/thread.c ++++ b/lib/thread/thread.c +@@ -530,7 +530,6 @@ spdk_thread_create(const char *name, const struct spdk_cpuset *cpumask) + thread->msg_cache_count++; + } + } +- + if (name) { + snprintf(thread->name, sizeof(thread->name), "%s", name); + } else { +@@ -549,8 +548,8 @@ spdk_thread_create(const char *name, const struct spdk_cpuset *cpumask) + g_thread_count++; + pthread_mutex_unlock(&g_devlist_mutex); + +- SPDK_DEBUGLOG(thread, "Allocating new thread (%" PRIu64 ", %s)\n", +- thread->id, thread->name); ++ SPDK_NOTICELOG("Allocating new thread (%" PRIu64 ", %s)\n", ++ thread->id, thread->name); + + if (spdk_interrupt_mode_is_enabled()) { + thread->in_interrupt = true; +@@ -1349,7 +1348,6 @@ spdk_thread_send_msg(const struct spdk_thread *thread, spdk_msg_fn fn, void *ctx + int rc; + + assert(thread != NULL); +- + if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) { + SPDK_ERRLOG("Thread %s is marked as exited.\n", thread->name); + return -EIO; +@@ -2198,8 +2196,12 @@ io_device_free(struct io_device *dev) + assert(dev->unregister_thread != NULL); + SPDK_DEBUGLOG(thread, "io_device %s (%p) needs to unregister from thread %s\n", + dev->name, dev->io_device, dev->unregister_thread->name); ++#ifndef SPDK_CONFIG_APP_RW + rc = spdk_thread_send_msg(dev->unregister_thread, _finish_unregister, dev); + assert(rc == 0); ++#else ++ _finish_unregister((void *)dev); ++#endif + } + } + +@@ -2613,8 +2615,12 @@ spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx, + end: + pthread_mutex_unlock(&g_devlist_mutex); + ++#ifndef SPDK_CONFIG_APP_RW + rc = spdk_thread_send_msg(i->orig_thread, _call_completion, i); + assert(rc == 0); ++#else ++ _call_completion(i); ++#endif + } + + static void +diff --git a/mk/spdk.common.mk b/mk/spdk.common.mk +index 63de157..a9cef8f 100644 +--- a/mk/spdk.common.mk ++++ b/mk/spdk.common.mk +@@ -72,7 +72,7 @@ else ifneq ($(filter loongarch%,$(TARGET_MACHINE)),) + COMMON_CFLAGS += -march=$(TARGET_ARCHITECTURE) + COMMON_CFLAGS += -DPAGE_SIZE=$(shell getconf PAGESIZE) + else +-COMMON_CFLAGS += -march=$(TARGET_ARCHITECTURE) ++COMMON_CFLAGS += -march=core-avx-i + endif + + ifeq ($(TARGET_MACHINE),x86_64) +@@ -330,7 +330,7 @@ ifeq ($(CONFIG_IDXD_KERNEL),y) + SYS_LIBS += -laccel-config + endif + +-CFLAGS += $(COMMON_CFLAGS) -Wno-pointer-sign -Wstrict-prototypes -Wold-style-definition -std=gnu11 ++CFLAGS += $(COMMON_CFLAGS) -Wno-pointer-sign -Wstrict-prototypes -Wold-style-definition -std=gnu11 -include spdk/config.h + CXXFLAGS += $(COMMON_CFLAGS) -std=c++11 + + SYS_LIBS += -lrt +@@ -340,6 +340,7 @@ endif + SYS_LIBS += -lssl + SYS_LIBS += -lcrypto + SYS_LIBS += -lm ++SYS_LIBS += -lsecurec + + PKGCONF ?= pkg-config + ifneq ($(strip $(CONFIG_OPENSSL_PATH)),) +diff --git a/mk/spdk.modules.mk b/mk/spdk.modules.mk +index 25a9e7b..07da2f0 100644 +--- a/mk/spdk.modules.mk ++++ b/mk/spdk.modules.mk +@@ -7,7 +7,7 @@ + BLOCKDEV_MODULES_LIST = bdev_malloc bdev_null bdev_nvme bdev_passthru bdev_lvol + BLOCKDEV_MODULES_LIST += bdev_raid bdev_error bdev_gpt bdev_split bdev_delay + BLOCKDEV_MODULES_LIST += bdev_zone_block +-BLOCKDEV_MODULES_LIST += blobfs blobfs_bdev blob_bdev blob lvol vmd nvme ++BLOCKDEV_MODULES_LIST += blobfs blobfs_bdev blob_bdev blob lvol vmd nvme conf + + # Some bdev modules don't have pollers, so they can directly run in interrupt mode + INTR_BLOCKDEV_MODULES_LIST = bdev_malloc bdev_passthru bdev_error bdev_gpt bdev_split bdev_raid +diff --git a/module/bdev/nvme/Makefile b/module/bdev/nvme/Makefile +index ca25636..6c71fc0 100644 +--- a/module/bdev/nvme/Makefile ++++ b/module/bdev/nvme/Makefile +@@ -12,6 +12,7 @@ SO_MINOR := 0 + + C_SRCS = bdev_nvme.c bdev_nvme_rpc.c nvme_rpc.c bdev_mdns_client.c + C_SRCS-$(CONFIG_NVME_CUSE) += bdev_nvme_cuse_rpc.c ++C_SRCS-$(CONFIG_APP_RW) += bdev_nvme_self.c + + ifeq ($(OS),Linux) + C_SRCS += vbdev_opal.c vbdev_opal_rpc.c +diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c +index 9cfd473..6cc2628 100644 +--- a/module/bdev/nvme/bdev_nvme.c ++++ b/module/bdev/nvme/bdev_nvme.c +@@ -24,6 +24,7 @@ + #include "spdk/string.h" + #include "spdk/util.h" + #include "spdk/uuid.h" ++#include "spdk/conf.h" + + #include "spdk/bdev_module.h" + #include "spdk/log.h" +@@ -31,7 +32,12 @@ + #include "spdk_internal/usdt.h" + #include "spdk_internal/trace_defs.h" + ++#ifdef SPDK_CONFIG_APP_RW ++#include "bdev_nvme_self.h" ++#define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT false ++#else + #define SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT true ++#endif + #define SPDK_BDEV_NVME_DEFAULT_KEEP_ALIVE_TIMEOUT_IN_MS (10000) + + #define NSID_STR_LEN 10 +@@ -191,7 +197,7 @@ static void bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch, + static void bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio); + static int bdev_nvme_reset_ctrlr(struct nvme_ctrlr *nvme_ctrlr); + static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr); +-static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); ++void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); + static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr); + + static struct nvme_ns *nvme_ns_alloc(void); +@@ -1610,6 +1616,10 @@ bdev_nvme_poll(void *arg) + group->start_ticks = spdk_get_ticks(); + } + ++#ifdef SPDK_CONFIG_APP_RW ++ bdev_update_ch_timeout(group); ++#endif ++ + num_completions = spdk_nvme_poll_group_process_completions(group->group, 0, + bdev_nvme_disconnected_qpair_cb); + if (group->collect_spin_stat) { +@@ -1641,6 +1651,7 @@ bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_ + nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, + nvme_ctrlr, new_period_us); + } ++#endif + + static int + bdev_nvme_poll_adminq(void *arg) +@@ -3095,8 +3106,11 @@ bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_i + * Simply fall through even if it is not found. + */ + } +- ++#ifdef SPDK_CONFIG_APP_RW ++ _bdev_nvme_submit_request_self(ch, bdev_io); ++#else + _bdev_nvme_submit_request(nbdev_ch, bdev_io); ++#endif + } + + static bool +@@ -3121,6 +3135,12 @@ bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) + case SPDK_BDEV_IO_TYPE_NVME_ADMIN: + case SPDK_BDEV_IO_TYPE_NVME_IO: + case SPDK_BDEV_IO_TYPE_ABORT: ++#ifdef SPDK_CONFIG_APP_RW ++ case SPDK_BDEV_IO_TYPE_READ_NVME: ++ case SPDK_BDEV_IO_TYPE_WRITE_NVME: ++ case SPDK_BDEV_IO_TYPE_READV_NVME: ++ case SPDK_BDEV_IO_TYPE_WRITEV_NVME: ++#endif + return true; + + case SPDK_BDEV_IO_TYPE_COMPARE: +@@ -3392,7 +3412,7 @@ bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf) + + group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us); + +- if (group->poller == NULL) { ++ if (group->poller == NULL && spdk_get_reactor_type()) { + spdk_nvme_poll_group_destroy(group->group); + return -1; + } +@@ -3694,6 +3714,7 @@ nvme_bdev_get_mp_policy_str(struct nvme_bdev *nbdev) + } + } + ++#ifndef SPDK_CONFIG_APP_RW + static int + bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) + { +@@ -3711,6 +3732,7 @@ bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) + + return 0; + } ++#endif + + static void + bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) +@@ -3848,7 +3870,6 @@ static const struct spdk_bdev_fn_table nvmelib_fn_table = { + .submit_request = bdev_nvme_submit_request, + .io_type_supported = bdev_nvme_io_type_supported, + .get_io_channel = bdev_nvme_get_io_channel, +- .dump_info_json = bdev_nvme_dump_info_json, + .write_config_json = bdev_nvme_write_config_json, + .get_spin_time = bdev_nvme_get_spin_time, + .get_module_ctx = bdev_nvme_get_module_ctx, +@@ -3856,6 +3877,14 @@ static const struct spdk_bdev_fn_table nvmelib_fn_table = { + .accel_sequence_supported = bdev_nvme_accel_sequence_supported, + .reset_device_stat = bdev_nvme_reset_device_stat, + .dump_device_stat_json = bdev_nvme_dump_device_stat_json, ++#ifdef SPDK_CONFIG_APP_RW ++ .dump_info_json = bdev_nvme_dump_info_json_self, ++ .bdev_poll_rsp = bdev_nvme_poll, ++ .get_io_channel_id = bdev_nvme_get_io_channel_id, ++ .get_timeout_count = bdev_nvme_get_timeout_count, ++#else ++ .dump_info_json = bdev_nvme_dump_info_json, ++#endif + }; + + typedef int (*bdev_nvme_parse_ana_log_page_cb)( +@@ -4031,7 +4060,12 @@ nvme_disk_create(struct spdk_bdev *disk, const char *base_name, + if (cdata->oncs.write_zeroes) { + disk->max_write_zeroes = UINT16_MAX + 1; + } ++ ++#ifdef SPDK_CONFIG_APP_RW ++ disk->blocklen = spdk_nvme_ns_get_sector_size(ns); ++#else + disk->blocklen = spdk_nvme_ns_get_extended_sector_size(ns); ++#endif + disk->blockcnt = spdk_nvme_ns_get_num_sectors(ns); + disk->max_segment_size = spdk_nvme_ctrlr_get_max_xfer_size(ctrlr); + /* NVMe driver will split one request into multiple requests +@@ -4519,7 +4553,7 @@ bdev_nvme_delete_io_path_done(struct spdk_io_channel_iter *i, int status) + nvme_ctrlr_depopulate_namespace_done(nvme_ns); + } + +-static void ++void + nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) + { + struct nvme_bdev *bdev; +@@ -5373,7 +5407,7 @@ bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts) + opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec; + } + +-static void ++void + attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts) + { +@@ -5445,7 +5479,7 @@ bdev_nvme_delete_ctrlr(struct nvme_ctrlr *nvme_ctrlr, bool hotplug) + return rc; + } + +-static void ++void + remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) + { + struct nvme_ctrlr *nvme_ctrlr = cb_ctx; +@@ -5988,6 +6022,9 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid, + } + ctx->poller = SPDK_POLLER_REGISTER(bdev_nvme_async_poll, ctx, 1000); + ++#ifdef SPDK_CONFIG_APP_RW ++ return bdev_probe_ctrlr(); ++#endif + return 0; + } + +@@ -7174,11 +7211,13 @@ bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) + } + } + +-static void ++void + bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl) + { + struct nvme_bdev_io *bio = ref; +- ++#ifdef SPDK_CONFIG_APP_RW ++ spdk_bdev_set_io_location(ref, (uint8_t)LOCAL_LIBSTORAGE_FROM_DISK); ++#endif + bdev_nvme_io_complete_nvme_status(bio, cpl); + } + +@@ -7359,7 +7398,7 @@ bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl) + bdev_nvme_admin_passthru_complete_nvme_status, bio); + } + +-static void ++void + bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) + { + struct nvme_bdev_io *bio = ref; +@@ -7376,7 +7415,7 @@ bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset) + } + } + +-static int ++int + bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length) + { + struct nvme_bdev_io *bio = ref; +@@ -8279,6 +8318,223 @@ bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w) + spdk_json_write_array_end(w); + } + ++#ifdef SPDK_CONFIG_APP_RW ++void * ++nvme_channel_get_group(void *io_ch) ++{ ++ struct nvme_io_channel *nvme_io_ch = io_ch; ++ return nvme_io_ch->group; ++} ++struct nvme_bdev_io *nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, ++ int iovcnt) ++{ ++ bio->iovs = iov; ++ bio->iovcnt = iovcnt; ++ bio->iovpos = 0; ++ bio->iov_offset = 0; ++ return bio; ++} ++ ++struct nvme_probe_ctx *bdev_nvme_create_probe_ctx(struct spdk_nvme_transport_id *trid, ++ const char *base_name, const char *hostnqn) ++{ ++ struct nvme_probe_ctx *probe_ctx = calloc(1, sizeof(*probe_ctx)); ++ if (probe_ctx == NULL) { ++ SPDK_ERRLOG("Failed to allocate probe_ctx\n"); ++ return NULL; ++ } ++ ++ probe_ctx->count = 1; ++ probe_ctx->trids[0] = *trid; ++ probe_ctx->names[0] = base_name; ++ probe_ctx->hostnqn = hostnqn; ++ return probe_ctx; ++} ++ ++bool ++probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, ++ struct spdk_nvme_ctrlr_opts *opts) ++{ ++ struct nvme_probe_ctx *ctx = cb_ctx; ++ ++ SPDK_DEBUGLOG(nvme, "Probing device %s\n", trid->traddr); ++ ++ if (nvme_bdev_ctrlr_get(trid)) { ++ SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", ++ trid->traddr); ++ return false; ++ } ++ ++ if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) { ++ bool claim_device = false; ++ size_t i; ++ ++ for (i = 0; i < ctx->count; i++) { ++ if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) { ++ claim_device = true; ++ break; ++ } ++ } ++ ++ if (!claim_device) { ++ SPDK_DEBUGLOG(nvme, "Not claiming device at %s\n", trid->traddr); ++ return false; ++ } ++ } ++ ++ if (ctx->hostnqn) { ++ snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn); ++ } ++ ++ opts->arbitration_burst = (uint8_t)g_opts.arbitration_burst; ++ opts->low_priority_weight = (uint8_t)g_opts.low_priority_weight; ++ opts->medium_priority_weight = (uint8_t)g_opts.medium_priority_weight; ++ opts->high_priority_weight = (uint8_t)g_opts.high_priority_weight; ++ ++ return true; ++} ++ ++int bdev_probe_ctrlr(void) ++{ ++ struct spdk_conf_section *sp; ++ const char *val; ++ int rc = 0; ++ int64_t intval = 0; ++ size_t i; ++ struct nvme_probe_ctx *probe_ctx = NULL; ++ int retry_count; ++ uint32_t local_nvme_num = 0; ++ ++ sp = spdk_conf_find_section(NULL, "Nvme"); ++ if (sp == NULL) { ++ SPDK_ERRLOG("config file does not contain [Nvme] section, which need to be provided\n"); ++ goto end; ++ } ++ ++ probe_ctx = calloc(1, sizeof(*probe_ctx)); ++ if (probe_ctx == NULL) { ++ SPDK_ERRLOG("Failed to allocate probe_ctx\n"); ++ rc = -1; ++ goto end; ++ } ++ ++ retry_count = spdk_conf_section_get_intval(sp, "RetryCount"); ++ if (retry_count >= 0) { ++ g_opts.retry_count = retry_count; ++ } ++ if (retry_count > 255) { ++ SPDK_WARNLOG("RetryCount:%d should not be greater than 255, set it to 255 this time\n", ++ retry_count); ++ retry_count = 255; ++ } ++ syslog(LOG_INFO, "RetryCount is set to %d\n", retry_count); ++ ++ val = spdk_conf_section_get_val(sp, "TimeoutUsec"); ++ if (val != NULL) { ++ intval = spdk_strtoll(val, 10); ++ if (intval < 0) { ++ SPDK_ERRLOG("Invalid TimeoutUsec value\n"); ++ rc = -1; ++ goto end; ++ } ++ } ++ syslog(LOG_INFO, "TimeoutUsec is set to %ld\n", intval); ++ g_opts.timeout_us = intval; ++ ++ if (g_opts.timeout_us > 0) { ++ val = spdk_conf_section_get_val(sp, "ActionOnTimeout"); ++ if (val != NULL) { ++ if (!strcasecmp(val, "Reset")) { ++ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET; ++ } else if (!strcasecmp(val, "Abort")) { ++ g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT; ++ } ++ } ++ } ++ ++ intval = spdk_conf_section_get_intval(sp, "AdminPollRate"); ++ if (intval > 0) { ++ g_opts.nvme_adminq_poll_period_us = intval; ++ } ++ syslog(LOG_INFO, "AdminPollRate is set to %lu\n", g_opts.nvme_adminq_poll_period_us); ++ intval = spdk_conf_section_get_intval(sp, "IOPollRate"); ++ if (intval > 0) { ++ g_opts.nvme_ioq_poll_period_us = intval; ++ } ++ ++ g_opts.delay_cmd_submit = spdk_conf_section_get_boolval(sp, "DelayCmdSubmit", ++ SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT); ++ ++ for (i = 0; i < NVME_MAX_CONTROLLERS; i++) { ++ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0); ++ if (val == NULL) { ++ break; ++ } ++ ++ rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val); ++ if (rc < 0) { ++ SPDK_ERRLOG("Unable to parse TransportID: %s\n", val); ++ rc = -1; ++ goto end; ++ } ++ ++ rc = spdk_nvme_host_id_parse(&probe_ctx->hostids[i], val); ++ if (rc < 0) { ++ SPDK_ERRLOG("Unable to parse HostID: %s\n", val); ++ rc = -1; ++ goto end; ++ } ++ ++ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1); ++ if (val == NULL) { ++ SPDK_ERRLOG("No name provided for TransportID\n"); ++ rc = -1; ++ goto end; ++ } ++ ++ probe_ctx->names[i] = val; ++ ++ val = spdk_conf_section_get_nmval(sp, "TransportID", i, 2); ++ if (val != NULL) { ++ rc = spdk_nvme_prchk_flags_parse(&probe_ctx->prchk_flags[i], val); ++ if (rc < 0) { ++ SPDK_ERRLOG("Unable to parse prchk: %s\n", val); ++ rc = -1; ++ goto end; ++ } ++ } ++ ++ probe_ctx->count++; ++ ++ if (probe_ctx->trids[i].trtype == SPDK_NVME_TRANSPORT_PCIE) { ++ local_nvme_num++; ++ } ++ } ++ ++ if (local_nvme_num > 0) { ++ /* used to probe local NVMe device */ ++ if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, remove_cb)) { ++ rc = -1; ++ goto end; ++ } ++ ++ for (i = 0; i < probe_ctx->count; i++) { ++ if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) { ++ continue; ++ } ++ ++ if (!nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { ++ SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr); ++ SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n"); ++ } ++ } ++ } ++end: ++ free(probe_ctx); ++ return rc; ++} ++#endif ++ + SPDK_LOG_REGISTER_COMPONENT(bdev_nvme) + + SPDK_TRACE_REGISTER_FN(bdev_nvme_trace, "bdev_nvme", TRACE_GROUP_BDEV_NVME) +diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h +index 7487765..7aca15e 100644 +--- a/module/bdev/nvme/bdev_nvme.h ++++ b/module/bdev/nvme/bdev_nvme.h +@@ -262,6 +262,9 @@ struct nvme_ns *nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid); + struct nvme_ns *nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr); + struct nvme_ns *nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns); + ++struct nvme_bdev_io; ++struct nvme_probe_ctx; ++ + enum spdk_bdev_timeout_action { + SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0, + SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET, +@@ -420,4 +423,43 @@ void bdev_nvme_set_multipath_policy(const char *name, + bdev_nvme_set_multipath_policy_cb cb_fn, + void *cb_arg); + ++#ifdef SPDK_CONFIG_APP_RW ++void ++bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl); ++ ++void ++bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset); ++ ++int ++bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length); ++ ++bool ++probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, ++ struct spdk_nvme_ctrlr_opts *opts); ++ ++void ++attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, ++ struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts); ++ ++void ++remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); ++ ++void ++nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns, ++ struct nvme_async_probe_ctx *ctx); ++ ++void ++nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns); ++ ++int ++bdev_probe_ctrlr(void); ++ ++struct nvme_bdev_io * ++nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt); ++ ++struct nvme_probe_ctx * ++bdev_nvme_create_probe_ctx(struct spdk_nvme_transport_id *trid, const char *base_name, ++ const char *hostnqn); ++#endif ++ + #endif /* SPDK_BDEV_NVME_H */ +diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c +new file mode 100644 +index 0000000..7371ecb +--- /dev/null ++++ b/module/bdev/nvme/bdev_nvme_self.c +@@ -0,0 +1,661 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++#include "bdev_nvme.h" ++ ++#include "spdk/json.h" ++#include "spdk/likely.h" ++#include "spdk/bdev_module.h" ++#include "spdk/nvme_ocssd.h" ++#include "spdk/nvme.h" ++ ++#include "spdk_internal/bdev_stat.h" ++#include "bdev_nvme_self.h" ++#include "common.h" ++#include ++ ++enum data_direction { ++ BDEV_DISK_READ = 0, ++ BDEV_DISK_WRITE = 1 ++}; ++ ++void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group) ++{ ++ uint64_t current_ticks = 0; ++ uint64_t poll_ticks = 0; ++ int64_t poll_time = 0; ++ ++ current_ticks = spdk_get_ticks(); ++ ++ if (spdk_unlikely(g_polltime_threshold)) { ++ if (group->save_start_ticks) { ++ poll_ticks = current_ticks - group->save_start_ticks; ++ poll_time = (poll_ticks * 1000ULL) / spdk_get_ticks_hz(); ++ if (poll_time >= g_polltime_threshold) { ++ group->num_poll_timeout++; ++ SPDK_WARNLOG("group[%p] poll timeout in %ldms", group, poll_time); ++ } ++ } ++ group->save_start_ticks = current_ticks; ++ } ++} ++ ++int ++_bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) ++{ ++ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); ++ ++ if (nvme_ch->qpair == NULL) { ++ /* The device is currently resetting */ ++ return -1; ++ } ++ ++ switch (bdev_io->type) { ++ case SPDK_BDEV_IO_TYPE_READ_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "read %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, ++ bdev_io->u.contig.offset_blocks); ++ return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, ++ bdev_io->driver_ctx, bdev_io->u.contig.buf, ++ bdev_io->u.contig.md_buf, BDEV_DISK_READ, ++ bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_WRITE_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "write %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, ++ bdev_io->u.contig.offset_blocks); ++ return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, ++ bdev_io->driver_ctx, bdev_io->u.contig.buf, ++ bdev_io->u.contig.md_buf, BDEV_DISK_WRITE, ++ bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_READV_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "readv %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, ++ bdev_io->u.bdev.offset_blocks); ++ return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, ++ bdev_io->driver_ctx, BDEV_DISK_READ, ++ bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, ++ bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_WRITEV_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "writev %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, ++ bdev_io->u.bdev.offset_blocks); ++ return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, ++ bdev_io->driver_ctx, BDEV_DISK_WRITE, ++ bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, ++ bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS: ++ return bdev_nvme_unmap_blocks((struct nvme_bdev *)bdev_io->bdev->ctxt, ++ ch, ++ (void *)bdev_io->driver_ctx, ++ (struct spdk_nvme_dsm_range *)bdev_io->u.contig.buf, ++ bdev_io->u.contig.num_blocks); ++ default: ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int ++bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w) ++{ ++ return 0; ++} ++ ++uint16_t ++bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) ++{ ++ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); ++ uint16_t channel_id; ++ struct spdk_nvme_qpair *qpair = nvme_ch->qpair; ++ channel_id = spdk_nvme_get_qpair_id(qpair); ++ return channel_id; ++} ++ ++uint64_t ++bdev_nvme_get_timeout_count(struct spdk_io_channel *ch) ++{ ++ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); ++ return nvme_ch->group->num_poll_timeout; ++} ++ ++int32_t ++nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) ++{ ++ uint32_t num_ctrlr = 0, i = 0; ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ struct nvme_ctrlr_info *pCtrlrInfo = NULL; ++ const struct spdk_nvme_ctrlr_data *cdata = NULL; ++ struct spdk_nvme_ctrlr_opts *opts = NULL; ++ ++ struct spdk_pci_device *pci_dev = NULL; ++ int rc; ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ num_ctrlr++; ++ } ++ if (num_ctrlr == 0) { ++ SPDK_NOTICELOG("No any nvme controller.\n"); ++ return 0; ++ } ++ if (ctrlName != NULL) { ++ num_ctrlr = 1; ++ } ++ pCtrlrInfo = calloc(num_ctrlr, sizeof(struct nvme_ctrlr_info)); ++ if (pCtrlrInfo == NULL) { ++ SPDK_ERRLOG("Failed to alloc memory for getting controller infomation.\n"); ++ return -1; ++ } ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (i >= num_ctrlr) { /* prevent having controllers be added or deleted */ ++ i++; ++ continue; ++ } ++ if (ctrlName != NULL) { ++ if (strcmp(nvme_bdev_ctrlr->name, ctrlName) != 0) { ++ continue; ++ } ++ } ++ cdata = spdk_nvme_ctrlr_get_data(nvme_bdev_ctrlr->ctrlr); ++ opts = spdk_nvme_ctrlr_get_opts(nvme_bdev_ctrlr->ctrlr); ++ pci_dev = spdk_nvme_ctrlr_get_pci_device(nvme_bdev_ctrlr->ctrlr); ++ if (pci_dev == NULL) { ++ SPDK_ERRLOG("Failed to get pci device\n"); ++ break; ++ } ++ rc = strcpy_s(pCtrlrInfo[i].ctrlName, sizeof(pCtrlrInfo[i].ctrlName), nvme_bdev_ctrlr->name); ++ if (rc != 0) { ++ SPDK_ERRLOG("String copy failed\n"); ++ } ++ rc = strcpy_s(pCtrlrInfo[i].pciAddr, sizeof(pCtrlrInfo[i].pciAddr), ++ nvme_bdev_ctrlr->connected_trid->traddr); ++ if (rc != 0) { ++ SPDK_ERRLOG("String copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].sn, sizeof(pCtrlrInfo[i].sn), cdata->sn, 20); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].fr, sizeof(pCtrlrInfo[i].fr), cdata->fr, 8); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].mn, sizeof(pCtrlrInfo[i].mn), cdata->mn, 40); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ pCtrlrInfo[i].trtype = (uint16_t)nvme_bdev_ctrlr->connected_trid->trtype; ++ pCtrlrInfo[i].tnvmcap = cdata->tnvmcap[0]; ++ pCtrlrInfo[i].unvmcap = cdata->unvmcap[0]; ++ pCtrlrInfo[i].support_ns = cdata->oacs.ns_manage; ++ pCtrlrInfo[i].directives = cdata->oacs.directives; ++ pCtrlrInfo[i].dsm = cdata->oncs.dsm; ++ pCtrlrInfo[i].max_num_ns = cdata->nn; ++ pCtrlrInfo[i].num_io_queues = opts->num_io_queues; ++ pCtrlrInfo[i].io_queue_size = opts->io_queue_size; ++ pCtrlrInfo[i].device_id = spdk_pci_device_get_device_id(pci_dev); ++ pCtrlrInfo[i].subdevice_id = spdk_pci_device_get_subdevice_id(pci_dev); ++ pCtrlrInfo[i].vid = cdata->vid; ++ pCtrlrInfo[i].ssvid = cdata->ssvid; ++ pCtrlrInfo[i].ctrlid = cdata->cntlid; ++ pCtrlrInfo[i].version = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev_ctrlr->ctrlr).raw; ++ i++; ++ if (ctrlName != NULL) { ++ break; ++ } ++ } ++ if (i != num_ctrlr) { ++ SPDK_ERRLOG("It has controller been added or deleted when fetched infomation, please try again later.\n"); ++ free(pCtrlrInfo); ++ return -1; ++ } ++ *ppCtrlr = pCtrlrInfo; ++ return num_ctrlr; ++} ++ ++struct nvme_bdev_ctrlr * ++nvme_ctrlr_get_by_name(const char *name) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ ++ if (name == NULL) { ++ return NULL; ++ } ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (strcmp(name, nvme_bdev_ctrlr->name) == 0) { ++ return nvme_bdev_ctrlr; ++ } ++ } ++ ++ return NULL; ++} ++ ++struct spdk_nvme_ctrlr * ++spdk_nvme_ctrlr_get_by_name(const char *ctrlname) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (strcmp(nvme_bdev_ctrlr->name, ctrlname) == 0) { ++ return nvme_bdev_ctrlr->ctrlr; ++ } ++ } ++ ++ return NULL; ++} ++ ++struct spdk_nvme_ctrlr * ++spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) ++{ ++ if (nvme_bdev_ctrlr == NULL) { ++ return NULL; ++ } ++ return nvme_bdev_ctrlr->ctrlr; ++} ++ ++void ++nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) ++{ ++ int i; ++ size_t size = strnlen(ctrlname, 24); ++ ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ if (strncmp(g_io_stat_map[i].bdev_name, ctrlname, size) == 0) { ++ if ((g_io_stat_map[i].bdev_name[size] == 'n') && isdigit(g_io_stat_map[i].bdev_name[size + 1])) { ++ g_io_stat_map[i].channel_id = 0; ++ memset(g_io_stat_map[i].bdev_name, 0, sizeof(g_io_stat_map[i].bdev_name)); ++ g_io_stat_map[i].num_read_ops = 0; ++ g_io_stat_map[i].num_write_ops = 0; ++ g_io_stat_map[i].bytes_read = 0; ++ g_io_stat_map[i].bytes_written = 0; ++ g_io_stat_map[i].io_outstanding = 0; ++ g_io_stat_map[i].read_latency_ticks = 0; ++ g_io_stat_map[i].write_latency_ticks = 0; ++ g_io_stat_map[i].io_ticks = 0; ++ /* used flag set false in last avoid race in channel create */ ++ g_io_stat_map[i].used = false; ++ } ++ } ++ } ++} ++ ++void ++nvme_ctrlr_clear_iostat_all(void) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ nvme_ctrlr_clear_iostat_by_name(nvme_bdev_ctrlr->name); ++ } ++} ++ ++struct spdk_nvme_ns * ++bdev_nvme_get_ns(struct nvme_bdev *nbdev) ++{ ++ return nbdev->nvme_ns->ns; ++} ++ ++void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ uint32_t i; ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ struct nvme_bdev_ns *ns = NULL; ++ struct nvme_bdev *nvme_bdev = NULL, *tmp = NULL; ++ ++ ++ pthread_mutex_lock(&g_bdev_nvme_mutex); ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (nvme_bdev_ctrlr->ctrlr != ctrlr) { ++ continue; ++ } ++ ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { ++ ns = nvme_bdev_ctrlr->namespaces[i]; ++ TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { ++ nvme_bdev->disk.blocklen = spdk_nvme_ns_get_sector_size(nvme_bdev->nvme_ns->ns); ++ nvme_bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(nvme_bdev->nvme_ns->ns); ++ } ++ } ++ return; ++ } ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++} ++ ++int ++bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; ++ struct nvme_bdev_ns *ns = NULL; ++ ++ if (nvme_bdev_ctrlr == NULL || nsid > nvme_bdev_ctrlr->num_ns) { ++ SPDK_ERRLOG("Parameter error. nsid[%u], the max nsid is[%u]\n", nsid, nvme_bdev_ctrlr->num_ns); ++ return -1; ++ } ++ ++ ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ ++ if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { ++ ns->type = NVME_BDEV_NS_OCSSD; ++ } else { ++ ns->type = NVME_BDEV_NS_STANDARD; ++ } ++ ++ if (!ns->populated && spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) { ++ SPDK_NOTICELOG("NSID %u to be added\n", nsid); ++ ns->id = nsid; ++ ns->ctrlr = nvme_bdev_ctrlr; ++ TAILQ_INIT(&ns->bdevs); ++ /* add a new bdev device in this ns */ ++ nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, NULL); ++ return 0; ++ } ++ ++ if (ns->populated && !spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) { ++ SPDK_NOTICELOG("NSID %u is removed\n", nsid); ++ nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); ++ return 0; ++ } ++ return -1; ++} ++ ++bool ++spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct nvme_bdev_ns *ns = NULL; ++ struct nvme_bdev *bdev = NULL, *tmp = NULL; ++ bool empty = false; ++ ++ ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ if (ns == NULL) { ++ return true; ++ } ++ ++ TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { ++ pthread_mutex_lock(&bdev->disk.internal.mutex); ++ empty = TAILQ_EMPTY(&bdev->disk.internal.open_descs); ++ /* for each bdev in ns, we need to check if any descs is in tailq */ ++ if (empty) { ++ /* one bdev is empty, check next until all bdev is checked */ ++ bdev->disk.internal.ns_status = SPDK_BDEV_NS_STATUS_REMOVING; ++ pthread_mutex_unlock(&bdev->disk.internal.mutex); ++ } else { ++ /* means at least one bdev is used, so we just quit this process ++ and mark the status is false. */ ++ pthread_mutex_unlock(&bdev->disk.internal.mutex); ++ break; ++ } ++ } ++ return empty; ++} ++ ++void ++spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct nvme_bdev_ns *ns = NULL; ++ struct nvme_bdev *bdev = NULL, *tmp = NULL; ++ ++ ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ if (ns == NULL) { ++ return; ++ } ++ ++ TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { ++ pthread_mutex_lock(&bdev->disk.internal.mutex); ++ /* set the ns_status to ready case ns delete fail */ ++ if (bdev->disk.internal.ns_status == SPDK_BDEV_NS_STATUS_REMOVING) { ++ bdev->disk.internal.ns_status = SPDK_BDEV_NS_STATUS_READY; ++ } ++ pthread_mutex_unlock(&bdev->disk.internal.mutex); ++ } ++} ++ ++int ++bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, void *driver_ctx, ++ void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba) ++{ ++ int rc; ++ uint32_t io_flags = 0; ++ uint8_t *bdev_io_action = (uint8_t *)driver_ctx; ++ /* filter bit 0&1 of io->pi_action to get pi_action */ ++ uint8_t pi_action = bdev_io_action[SPDK_BDEV_IO_ACTION_PI] & 0x03; ++ uint8_t dif_flag = bdev_io_action[SPDK_BDEV_IO_ACTION_PI]; ++ uint8_t fua = bdev_io_action[SPDK_BDEV_IO_ACTION_FUA]; ++ uint32_t pi_type; ++ ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ ++ if (pi_action > IO_NO_PROTECTION) { ++ pi_type = spdk_nvme_ns_get_pi_type(bdev->nvme_ns->ns); ++ if (dif_flag & FLAG_PRCHK) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; ++ } ++ /* type3 not support ref tag */ ++ if (!(dif_flag & FLAG_NO_REF) && (pi_type != SPDK_NVME_FMT_NVM_PROTECTION_TYPE3)) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; ++ } ++ if (pi_action == IO_HALF_WAY_PROTECTION) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRACT; ++ } ++ } ++ ++ if (fua) { ++ io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; ++ } ++ ++ if (direction == BDEV_DISK_READ) { ++ rc = spdk_nvme_ns_cmd_read_with_md(bdev->nvme_ns->ns, qpair, buffer, metadata, lba, ++ lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); ++ } else { ++ rc = spdk_nvme_ns_cmd_write_with_md(bdev->nvme_ns->ns, qpair, buffer, metadata, lba, ++ lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); ++ } ++ ++ if (rc != 0) { ++ if (rc == -ENOMEM) { ++ SPDK_NOTICELOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "read" : "write", rc); ++ } else { ++ SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", direction == BDEV_DISK_READ ? "read" : "write", ++ rc, qpair); ++ } ++ } ++ return rc; ++} ++ ++int ++bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) ++{ ++ int rc; ++ struct nvme_bdev_io *bio = NULL; ++ uint32_t io_flags = 0; ++ uint8_t *bdev_io_action = (uint8_t *)driver_ctx; ++ /* filter bit 0&1 of io->pi_action to get pi_action */ ++ uint8_t pi_action = bdev_io_action[SPDK_BDEV_IO_ACTION_PI] & 0x03; ++ uint8_t dif_flag = bdev_io_action[SPDK_BDEV_IO_ACTION_PI]; ++ uint8_t fua = bdev_io_action[SPDK_BDEV_IO_ACTION_FUA]; ++ uint32_t pi_type; ++ ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ ++ if (pi_action > IO_NO_PROTECTION) { ++ pi_type = spdk_nvme_ns_get_pi_type(bdev->nvme_ns->ns); ++ if (dif_flag & FLAG_PRCHK) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; ++ } ++ /* type3 not support ref tag */ ++ if (!(dif_flag & FLAG_NO_REF) && (pi_type != SPDK_NVME_FMT_NVM_PROTECTION_TYPE3)) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; ++ } ++ if (pi_action == IO_HALF_WAY_PROTECTION) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRACT; ++ } ++ } ++ ++ if (fua) { ++ io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; ++ } ++ ++ bio = nvme_bdev_io_update_args((struct nvme_bdev_io *)driver_ctx, iov, iovcnt); ++ ++ if (direction == BDEV_DISK_READ) { ++ rc = spdk_nvme_ns_cmd_readv(bdev->nvme_ns->ns, qpair, lba, ++ lba_count, bdev_nvme_queued_done, bio, io_flags, ++ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); ++ } else { ++ rc = spdk_nvme_ns_cmd_writev(bdev->nvme_ns->ns, qpair, lba, lba_count, ++ 0, bdev_nvme_queued_done, bio, io_flags, ++ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); ++ } ++ ++ if (rc != 0) { ++ if (rc == -ENOMEM) { ++ SPDK_NOTICELOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "readv" : "writev", rc); ++ } else { ++ SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", direction == BDEV_DISK_READ ? "read" : "write", rc, ++ qpair); ++ } ++ } ++ return rc; ++} ++ ++struct nvme_bdev_ctrlr * ++bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) ++{ ++ struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(bdev_desc); ++ struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev->ctxt; ++ if (nbdev == NULL) { ++ return NULL; ++ } ++ return nbdev->nvme_ns->ctrlr; ++} ++ ++int ++bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, ++ struct spdk_nvme_dsm_range *unmap_d, uint16_t unmap_count) ++{ ++ struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); ++ int i; ++ ++ if (unmap_count == 0 || unmap_count > SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES) { ++ SPDK_ERRLOG("Invalid parameter, unmap count: %u\n", unmap_count); ++ return -EINVAL; ++ } ++ ++ if (unmap_d == NULL) { ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < unmap_count; i++) { ++ if (unmap_d[i].length > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { ++ SPDK_ERRLOG("Invalid parameter, unmap block count: %u\n", unmap_d[i].length); ++ return -EINVAL; ++ } ++ unmap_d[i].attributes.raw = 0; ++ } ++ ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ return spdk_nvme_ns_cmd_unmap_blocks(nbdev->nvme_ns->ns, nvme_ch->qpair, ++ SPDK_NVME_DSM_ATTR_DEALLOCATE, ++ unmap_d, unmap_count, ++ bdev_nvme_queued_done, driver_ctx); ++} ++ ++void ++spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr) ++{ ++ remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); ++} ++ ++void spdk_bdev_fail_ctrlr(const char *traddr) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; ++ ++ pthread_mutex_lock(&g_bdev_nvme_mutex); ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (strcmp(nvme_bdev_ctrlr->connected_trid->traddr, traddr) == 0) { ++ spdk_nvme_ctrlr_fail(nvme_bdev_ctrlr->ctrlr); ++ remove_cb(NULL, nvme_bdev_ctrlr->ctrlr); ++ return; ++ } ++ } ++} ++ ++int ++spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, ++ const char *base_name, ++ const char **names, size_t *count, ++ const char *hostnqn) ++{ ++ struct nvme_probe_ctx *probe_ctx; ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; ++ struct nvme_bdev_ns *ns; ++ struct nvme_bdev *nvme_bdev; ++ struct nvme_bdev *tmp = NULL; ++ uint32_t i, nsid; ++ size_t j; ++ ++ if (nvme_bdev_ctrlr_get(trid) != NULL) { ++ SPDK_ERRLOG("A controller with the trid (traddr: %s) already exists.\n", trid->traddr); ++ return -1; ++ } ++ ++ probe_ctx = bdev_nvme_create_probe_ctx(trid, base_name, hostnqn); ++ if (probe_ctx == NULL) { ++ SPDK_ERRLOG("Failed to create probe_ctx\n"); ++ return -1; ++ } ++ ++ if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, NULL)) { ++ SPDK_ERRLOG("Failed to probe for new devices\n"); ++ free(probe_ctx); ++ return -1; ++ } ++ ++ nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid); ++ if (!nvme_bdev_ctrlr) { ++ SPDK_ERRLOG("Failed to find new NVMe controller\n"); ++ free(probe_ctx); ++ return -1; ++ } ++ ++ /* ++ * Report the new bdevs that were created in this call. ++ * There can be more than one bdev per NVMe controller since one bdev is created per namespace. ++ */ ++ j = 0; ++ for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { ++ nsid = i + 1; ++ ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ ++ if (!ns->populated) { ++ continue; ++ } ++ assert(ns->id == nsid); ++ TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { ++ if (j < *count) { ++ j++; ++ names[j] = nvme_bdev->disk.name; ++ } else { ++ SPDK_ERRLOG("Maximum number of namespaces is %zu.", *count); ++ free(probe_ctx); ++ return -1; ++ } ++ } ++ } ++ ++ *count = j; ++ ++ free(probe_ctx); ++ return 0; ++} +diff --git a/module/bdev/nvme/bdev_nvme_self.h b/module/bdev/nvme/bdev_nvme_self.h +new file mode 100644 +index 0000000..d7cc587 +--- /dev/null ++++ b/module/bdev/nvme/bdev_nvme_self.h +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++void ++bdev_update_ch_timeout(struct nvme_bdev_poll_group *group); ++ ++int ++_bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); ++ ++int ++bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w); ++ ++uint16_t ++bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch); ++ ++uint64_t ++bdev_nvme_get_timeout_count(struct spdk_io_channel *ch); ++ ++int ++bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, void *driver_ctx, ++ void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba); ++ ++int ++bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba); ++ ++struct nvme_bdev_ctrlr * ++bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc); ++ ++int ++bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, ++ struct spdk_nvme_dsm_range *unmap_d, uint16_t unmap_count); +diff --git a/scripts/setup_self.sh b/scripts/setup_self.sh +new file mode 100755 +index 0000000..9e77c29 +--- /dev/null ++++ b/scripts/setup_self.sh +@@ -0,0 +1,347 @@ ++#!/usr/bin/env bash ++ ++set -e ++ ++rootdir=$(readlink -f $(dirname $0))/.. ++ ++function linux_iter_pci { ++ # Argument is the class code ++ # TODO: More specifically match against only class codes in the grep ++ # step. ++ lspci -mm -n -D | grep $1 | tr -d '"' | awk -F " " '{print $1}' ++} ++ ++function linux_bind_driver() { ++ bdf="$1" ++ driver_name="$2" ++ old_driver_name="no driver" ++ ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /') ++ ++ if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then ++ old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) ++ ++ if [ "$driver_name" = "$old_driver_name" ]; then ++ return 0 ++ fi ++ ++ echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true ++ echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" ++ fi ++ ++ echo "$bdf ($ven_dev_id): $old_driver_name -> $driver_name" ++ ++ echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true ++ echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true ++} ++ ++function linux_hugetlbfs_mount() { ++ mount | grep ' type hugetlbfs ' | awk '{ print $3 }' ++} ++ ++function is_device_in_except_device_list() { ++ exists_flag=0 ++ if [ $# -gt 1 ]; then ++ except_dev_list=$2 ++ fi ++ ++ for dev in ${except_dev_list[@]} ++ do ++ if [ "$dev" == "$1" ]; then ++ exists_flag=1 ++ fi ++ done ++ echo ${exists_flag} ++} ++ ++function config_linux_device { ++ if [ $# -gt 0 ]; then ++ configlist=$* ++ echo configure devices $configlist ++ else ++ echo "need to specify at least one device to bind uio driver." ++ exit 1 ++ fi ++ driver_name=uio_pci_generic ++ ++ # NVMe ++ modprobe $driver_name || true ++ for bdf in ${configlist[@]}; do ++ existflag=0 ++ for confbdf in $(linux_iter_pci 0108); do ++ if [ "$bdf" == "$confbdf" ]; then ++ linux_bind_driver "$bdf" "$driver_name" ++ existflag=1 ++ break ++ fi ++ done ++ if [ $existflag -eq 0 ]; then ++ echo "nvme device \"$bdf\" is not in present" ++ fi ++ done ++ config_linux_hugepage ++} ++ ++function configure_linux { ++ if [ $# -gt 0 ]; then ++ exceptdevlist=$* ++ echo configure devices except $exceptdevlist ++ fi ++ # Use uio, Not IOMMU. ++ driver_name=uio_pci_generic ++ ++ # NVMe ++ modprobe $driver_name || true ++ for bdf in $(linux_iter_pci 0108); do ++ need_configure=`is_device_in_except_device_list ${bdf} "${exceptdevlist}"` ++ if [ $need_configure -ne 0 ]; then ++ continue ++ fi ++ linux_bind_driver "$bdf" "$driver_name" ++ done ++ ++ echo "1" > "/sys/bus/pci/rescan" ++ ++ config_linux_hugepage ++} ++ ++function config_linux_hugepage { ++ hugetlbfs_mount=$(linux_hugetlbfs_mount) ++ ++ if [ -z "$hugetlbfs_mount" ]; then ++ hugetlbfs_mount=/mnt/huge ++ echo "Mounting hugetlbfs at $hugetlbfs_mount" ++ mkdir -p "$hugetlbfs_mount" ++ mount -t hugetlbfs nodev "$hugetlbfs_mount" ++ fi ++ echo "$NRHUGE" > /proc/sys/vm/nr_hugepages ++} ++ ++function reset_linux { ++ # NVMe ++ modprobe nvme || true ++ for bdf in $(linux_iter_pci 0108); do ++ linux_bind_driver "$bdf" nvme ++ done ++ ++ echo "1" > "/sys/bus/pci/rescan" ++ ++ hugetlbfs_mount=$(linux_hugetlbfs_mount) ++ rm -f "$hugetlbfs_mount"/spdk*map_* ++} ++ ++function status_linux { ++ echo "NVMe devices" ++ ++ echo -e "BDF\t\tNuma Node\tDriver name\t\tDevice name" ++ for bdf in $(linux_iter_pci 0108); do ++ driver=`grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}'` ++ node=`cat /sys/bus/pci/devices/$bdf/numa_node`; ++ if [ "$driver" = "nvme" ]; then ++ if [ -d "/sys/bus/pci/devices/$bdf/nvme" ]; then ++ name="\t"`ls /sys/bus/pci/devices/$bdf/nvme`; ++ else ++ name="\t"`ls /sys/bus/pci/devices/$bdf/misc`; ++ fi ++ else ++ name="-"; ++ fi ++ echo -e "$bdf\t$node\t\t$driver\t\t$name"; ++ done ++} ++ ++function reset_device_linux { ++ #NVMe ++ if [ $# -gt 0 ]; then ++ resetdevlist=$* ++ echo reset nvme devices $resetdevlist ++ else ++ echo no devices to reset ++ return ++ fi ++ ++ for bdf in ${resetdevlist[@]}; do ++ exist=0 ++ for existbdf in $(linux_iter_pci 0108); do ++ if [[ "$existbdf" == "$bdf" ]]; then ++ exist=1 ++ fi ++ done ++ ++ if [ $exist -eq 0 ]; then ++ echo nvme device \"$bdf\" is not in present ++ continue ++ fi ++ ++ linux_bind_driver "$bdf" nvme ++ done ++} ++ ++function reset_all_linux { ++ # NVMe ++ echo "1" > "/sys/bus/pci/rescan" ++ reset_device_linux $(linux_iter_pci 0108) ++ ++ hugetlbfs_mount=$(linux_hugetlbfs_mount) ++ rm -f "$hugetlbfs_mount"/spdk*map_* ++} ++ ++function help_linux { ++ # NVMe ++ echo "" ++ echo "setup.sh" ++ echo "setup.sh config" ++ echo "setup.sh status" ++ echo "setup.sh reset" ++ echo "setup.sh hugepage" ++ echo "setup.sh config except_device=\"pci_addr\"" ++ echo "setup.sh config except_device=\"pci_addr1,pci_addr2,pci_addr3,...\"" ++ echo "setup.sh config_device \"pci_addr\"" ++ echo "setup.sh config_device \"pci_addr1,pci_addr2,pci_addr3,...\"" ++ echo "setup.sh reset_device \"pci_addr\"" ++ echo "setup.sh reset_device \"pci_addr1,pci_addr2,pci_addr3,...\"" ++ echo "setup.sh reset_all" ++ echo "" ++} ++ ++function configure_freebsd { ++ TMP=`mktemp` ++ ++ # NVMe ++ GREP_STR="class=0x010802" ++ ++ AWK_PROG="{if (count > 0) printf \",\"; printf \"%s:%s:%s\",\$2,\$3,\$4; count++}" ++ echo $AWK_PROG > $TMP ++ ++ BDFS=`pciconf -l | grep "${GREP_STR}" | awk -F: -f $TMP` ++ ++ kldunload nic_uio.ko || true ++ kenv hw.nic_uio.bdfs=$BDFS ++ kldload nic_uio.ko ++ rm $TMP ++ ++ kldunload contigmem.ko || true ++ kenv hw.contigmem.num_buffers=$((NRHUGE * 2 / 256)) ++ kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024)) ++ kldload contigmem.ko ++} ++ ++function reset_freebsd { ++ kldunload contigmem.ko || true ++ kldunload nic_uio.ko || true ++} ++ ++function get_slot_id { ++ pciaddr=$1 ++ ++ return_msg=`lspci -vvv -xxx -s "$pciaddr" | grep -i "Slot:"` ++ slot_id=${return_msg##* } ++ ++ echo $slot_id ++} ++ ++function get_except_device_linux { ++ param=$1 ++ if [[ $param == except_device=* ]]; then ++ devstr=${param#*=} ++ OLD_IFS="$IFS" ++ IFS="," ++ expdev=($devstr) ++ IFS=$OLD_IFS ++ fi ++ if [ ${#expdev[@]} -ne 0 ]; then ++ echo ${expdev[@]} ++ fi ++} ++ ++function get_device_linux { ++ devstr=$1 ++ OLD_IFS="$IFS" ++ IFS="," ++ resetdev=($devstr) ++ IFS=$OLD_IFS ++ ++ if [ ${#resetdev[@]} -ne 0 ]; then ++ echo ${resetdev[@]} ++ fi ++} ++ ++: ${NRHUGE:=1024} ++ ++username=$1 ++mode=$2 ++ ++if [ "$username" = "reset" -o "$username" = "config" -o "$username" = "status" ]; then ++ mode="$username" ++ username="" ++fi ++ ++if [ "$username" = "reset_device" -o "$username" = "reset_all" -o "$username" = "help" ]; then ++ mode="$username" ++ username="" ++fi ++ ++if [ "$username" = "config_device" -o "$username" = "hugepage" ]; then ++ mode="$username" ++ username="" ++fi ++ ++if [ "$mode" == "" ]; then ++ mode="config" ++fi ++ ++if [ "$username" = "" ]; then ++ username="$SUDO_USER" ++ if [ "$username" = "" ]; then ++ username=`logname 2>/dev/null` || true ++ fi ++fi ++ ++if [ "$mode" == "config" ]; then ++ paramcnt=$# ++ if [ $paramcnt -eq 2 ]; then ++ paramstr=$2 ++ exceptdev=`get_except_device_linux $paramstr` ++ fi ++fi ++ ++if [ "$mode" == "reset_device" ]; then ++ paramcnt=$# ++ if [ $paramcnt -eq 2 ]; then ++ paramstr=$2 ++ resetdev=`get_device_linux $paramstr` ++ fi ++fi ++ ++if [ "$mode" == "config_device" ]; then ++ paramcnt=$# ++ if [ $paramcnt -eq 2 ]; then ++ paramstr=$2 ++ configdev=`get_device_linux $paramstr` ++ fi ++fi ++ ++if [ `uname` = Linux ]; then ++ if [ "$mode" == "config" ]; then ++ configure_linux $exceptdev ++ elif [ "$mode" == "reset" ]; then ++ reset_linux ++ elif [ "$mode" == "status" ]; then ++ status_linux ++ elif [ "$mode" == "reset_device" ]; then ++ reset_device_linux $resetdev ++ elif [ "$mode" == "reset_all" ]; then ++ reset_all_linux ++ elif [ "$mode" == "help" ]; then ++ help_linux ++ elif [ "$mode" == "config_device" ]; then ++ config_linux_device $configdev ++ elif [ "$mode" == "hugepage" ]; then ++ config_linux_hugepage ++ fi ++else ++ if [ "$mode" == "config" ]; then ++ configure_freebsd ++ elif [ "$mode" == "reset" ]; then ++ reset_freebsd ++ fi ++fi +-- +2.43.0 + diff --git a/0008-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch b/0008-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch new file mode 100644 index 0000000..af2574a --- /dev/null +++ b/0008-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch @@ -0,0 +1,122 @@ +From 64d0c5c6fd788d3b84726d15c08ffb8d908b5427 Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Mon, 1 Mar 2021 09:20:10 +0800 +Subject: [PATCH 08/12] use spdk_nvme_ns_cmd_dataset_management and delete + spdk_nvme_ns_cmd_unmap_blocks + +Signed-off-by: sunshihao520 +--- + include/spdk/nvme.h | 33 ----------------------------- + lib/nvme/nvme_ns_cmd.c | 35 ------------------------------- + module/bdev/nvme/bdev_nvme_self.c | 8 +++---- + 3 files changed, 4 insertions(+), 72 deletions(-) + +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index 507e139..ad5da75 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -4356,39 +4356,6 @@ bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns); + bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns); + uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); + +-/** +- * Submit a data set management request to the specified NVMe namespace. Data set +- * management operations are designed to optimize interaction with the block +- * translation layer inside the device. The most common type of operation is +- * deallocate, which is often referred to as TRIM or UNMAP. +- * +- * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). +- * The user must ensure that only one thread submits I/O on a given qpair at any +- * given time. +- * +- * This is a convenience wrapper that will automatically allocate and construct +- * the correct data buffers. Therefore, ranges does not need to be allocated from +- * pinned memory and can be placed on the stack. If a higher performance, zero-copy +- * version of DSM is required, simply build and submit a raw command using +- * spdk_nvme_ctrlr_cmd_io_raw(). +- * +- * \param ns NVMe namespace to submit the DSM request +- * \param type A bit field constructed from \ref spdk_nvme_dsm_attribute. +- * \param qpair I/O queue pair to submit the request +- * \param ranges An array of \ref spdk_nvme_dsm_range elements describing the LBAs +- * to operate on. +- * \param num_ranges The number of elements in the ranges array. +- * \param cb_fn Callback function to invoke when the I/O is completed +- * \param cb_arg Argument to pass to the callback function +- * +- * \return 0 if successfully submitted, negated POSIX errno values otherwise. +- */ +-int spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- uint32_t type, +- const struct spdk_nvme_dsm_range *ranges, +- uint16_t num_ranges, +- spdk_nvme_cmd_cb cb_fn, +- void *cb_arg); + /** + * \brief Submits a write I/O to the specified NVMe namespace. + * +diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c +index b324b4b..3f37cd7 100644 +--- a/lib/nvme/nvme_ns_cmd.c ++++ b/lib/nvme/nvme_ns_cmd.c +@@ -1556,38 +1556,3 @@ spdk_nvme_ns_cmd_io_mgmt_send(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *q + + return nvme_qpair_submit_request(qpair, req); + } +- +-#ifdef SPDK_CONFIG_APP_RW +-int +-spdk_nvme_ns_cmd_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- uint32_t type, +- const struct spdk_nvme_dsm_range *ranges, uint16_t num_ranges, +- spdk_nvme_cmd_cb cb_fn, void *cb_arg) +-{ +- struct nvme_request *req = NULL; +- struct spdk_nvme_cmd *cmd = NULL; +- struct nvme_payload payload; +- +- if (ranges == NULL) { +- return -EINVAL; +- } +- +- payload = NVME_PAYLOAD_CONTIG((void *)ranges, NULL); +- +- req = nvme_allocate_request(qpair, &payload, num_ranges * sizeof(struct spdk_nvme_dsm_range), +- 0, cb_fn, cb_arg); +- if (req == NULL) { +- return -ENOMEM; +- } +- +- req->user_cb_arg = cb_arg; +- +- cmd = &req->cmd; +- cmd->opc = SPDK_NVME_OPC_DATASET_MANAGEMENT; +- cmd->nsid = ns->id; +- +- cmd->cdw10 = num_ranges - 1; +- cmd->cdw11 = type; +- +- return nvme_qpair_submit_request(qpair, req); +-} +diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c +index 7371ecb..1419b1f 100644 +--- a/module/bdev/nvme/bdev_nvme_self.c ++++ b/module/bdev/nvme/bdev_nvme_self.c +@@ -565,10 +565,10 @@ bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void + } + + spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); +- return spdk_nvme_ns_cmd_unmap_blocks(nbdev->nvme_ns->ns, nvme_ch->qpair, +- SPDK_NVME_DSM_ATTR_DEALLOCATE, +- unmap_d, unmap_count, +- bdev_nvme_queued_done, driver_ctx); ++ return spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair, ++ SPDK_NVME_DSM_ATTR_DEALLOCATE, ++ unmap_d, unmap_count, ++ bdev_nvme_queued_done, driver_ctx); + } + + void +-- +2.43.0 + diff --git a/0009-spdk-add-nvme-support-for-HSAK.patch b/0009-spdk-add-nvme-support-for-HSAK.patch new file mode 100644 index 0000000..66b56e5 --- /dev/null +++ b/0009-spdk-add-nvme-support-for-HSAK.patch @@ -0,0 +1,1158 @@ +From 3b1affa5c3211579fe5161d85bff0f36dff2cf6d Mon Sep 17 00:00:00 2001 +From: sunshihao +Date: Mon, 1 Mar 2021 10:59:02 +0800 +Subject: [PATCH 09/12] spdk: add nvme support for HSAK + +Signed-off-by: sunshihao +--- + include/spdk/bdev.h | 5 +- + include/spdk/bdev_module.h | 4 +- + include/spdk/nvme.h | 51 +-------- + include/spdk/thread.h | 2 + + lib/bdev/bdev.c | 13 ++- + lib/bdev/bdev_self.c | 36 +++---- + lib/env_dpdk/env.mk | 2 +- + lib/env_dpdk/init.c | 2 +- + lib/event/reactor.c | 11 +- + lib/nvme/nvme.c | 35 +++---- + lib/nvme/nvme_ctrlr.c | 20 +++- + lib/nvme/nvme_ctrlr_cmd.c | 19 ++-- + lib/nvme/nvme_ctrlr_self.c | 3 + + lib/nvme/nvme_internal.h | 3 +- + lib/nvme/nvme_ns_cmd.c | 1 - + lib/nvme/nvme_pcie.c | 2 - + lib/nvme/nvme_pcie_common.c | 10 +- + lib/nvme/nvme_qpair.c | 12 +-- + lib/thread/thread.c | 6 ++ + mk/spdk.common.mk | 2 +- + module/bdev/nvme/bdev_nvme.c | 9 +- + module/bdev/nvme/bdev_nvme_self.c | 167 ++++++++++++------------------ + module/bdev/nvme/bdev_nvme_self.h | 2 +- + scripts/setup_self.sh | 11 +- + 24 files changed, 196 insertions(+), 232 deletions(-) + +diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h +index 55b91f9..aae319e 100644 +--- a/include/spdk/bdev.h ++++ b/include/spdk/bdev.h +@@ -1642,12 +1642,13 @@ int spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + * negated errno on failure, in which case the callback will not be called. + */ + int spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +- void *unmap_d, uint16_t unmap_count, ++ void *unmap_d, uint32_t unmap_count, + spdk_bdev_io_completion_cb cb, void *cb_arg); + + void *spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch); + void *spdk_bdev_io_get_pool(size_t nbytes); +-bool spdk_bdev_have_io_in_channel(struct spdk_io_channel *bdevIoCh); ++bool spdk_bdev_have_io_in_channel(struct spdk_io_channel *io_ch); ++int spdk_bdev_get_channel_state(struct spdk_io_channel *io_ch); + #endif + + /** +diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h +index 7cb0da3..ff7e219 100644 +--- a/include/spdk/bdev_module.h ++++ b/include/spdk/bdev_module.h +@@ -412,8 +412,10 @@ enum reqLocation_E { + }; + + void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr); +-void spdk_bdev_fail_ctrlr(const char *traddr); ++void spdk_bdev_fail_ctrlr(void *cb_ctx, void *ctrlr); ++struct spdk_nvme_ctrlr *spdk_nvme_bdev_ctrlr_get(char *trid); + void *nvme_channel_get_group(void *io_ch); ++int nvme_channel_get_state(void *io_ch); + #endif + + /** bdev I/O completion status */ +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index ad5da75..6c20c29 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -4302,7 +4302,7 @@ bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); +-int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, ++int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, + void *payload); + int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); + int32_t spdk_nvme_ctrlr_ret_streams_param(struct spdk_nvme_ctrlr *ctrlr, void *payload); +@@ -4330,58 +4330,13 @@ int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, const char * + const char **names, size_t *count, const char *hostnqn); + + int spdk_nvme_detach_ublock(struct spdk_nvme_ctrlr *ctrlr); +-void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr); +- +-#define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1 +-#define SPDK_NVME_UEVENT_SUBSYSTEM_NVME 2 +- +-enum spdk_nvme_uevent_action { +- SPDK_NVME_UEVENT_ADD = 0, +- SPDK_NVME_UEVENT_REMOVE = 1, +-}; +- +-struct spdk_uevent { +- /* remove or add */ +- enum spdk_nvme_uevent_action action; +- int subsystem; +- /* pci address of device */ +- char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1]; +-}; +- +-int nvme_uevent_connect(void); +-int nvme_get_uevent(int fd, struct spdk_uevent *uevent); +-int nvme_get_uevent_block(int fd, struct spdk_uevent *uevent); + int32_t spdk_rebind_driver(char *pci_addr, char *driver_name); ++void spdk_nvme_ctrlr_update_unvmcap(struct spdk_nvme_ctrlr *ctrlr); ++void spdk_nvme_ctrlr_fail_hotplug(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ns_pi_md_start(struct spdk_nvme_ns *ns); + bool spdk_nvme_ns_is_dataset_mng_supported(struct spdk_nvme_ns *ns); + uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair); + +-/** +- * \brief Submits a write I/O to the specified NVMe namespace. +- * +- * \param ns NVMe namespace to submit the write I/O +- * \param qpair I/O queue pair to submit the request +- * \param lba starting LBA to write the data +- * \param lba_count length (in sectors) for the write operation +- * \param streamId The stream id for write I/O +- * \param cb_fn callback function to invoke when the I/O is completed +- * \param cb_arg argument to pass to the callback function +- * \param io_flags set flags, defined in nvme_spec.h, for this I/O +- * \param reset_sgl_fn callback function to reset scattered payload +- * \param next_sge_fn callback function to iterate each scattered +- * payload memory segment +- * +- * \return 0 if successfully submitted, ENOMEM if an nvme_request +- * structure cannot be allocated for the I/O request +- * +- * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). +- * The user must ensure that only one thread submits I/O on a given qpair at any given time. +- */ +-int spdk_nvme_ns_cmd_writev_stream(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, +- uint64_t lba, uint32_t lba_count, uint16_t streamId, +- spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, +- spdk_nvme_req_reset_sgl_cb reset_sgl_fn, +- spdk_nvme_req_next_sge_cb next_sge_fn); + #endif + + /* +diff --git a/include/spdk/thread.h b/include/spdk/thread.h +index 5f239a5..68ddf21 100644 +--- a/include/spdk/thread.h ++++ b/include/spdk/thread.h +@@ -57,6 +57,8 @@ void spdk_reactors_use(bool useOrNot); + bool spdk_get_reactor_type(void); + + void spdk_set_thread_exited(struct spdk_thread *thread); ++ ++uint32_t spdk_get_channel_ref(void *io_ch); + #endif + + /** +diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c +index 88a33e4..8a43fbd 100644 +--- a/lib/bdev/bdev.c ++++ b/lib/bdev/bdev.c +@@ -4532,10 +4532,12 @@ bdev_channel_destroy(void *io_device, void *ctx_buf) + spdk_trace_record(TRACE_BDEV_IOCH_DESTROY, 0, 0, 0, ch->bdev->name, + spdk_thread_get_id(spdk_io_channel_get_thread(ch->channel))); + ++#ifndef SPDK_CONFIG_APP_RW + /* This channel is going away, so add its statistics into the bdev so that they don't get lost. */ + spdk_spin_lock(&ch->bdev->internal.spinlock); + spdk_bdev_add_io_stat(ch->bdev->internal.stat, ch->stat); + spdk_spin_unlock(&ch->bdev->internal.spinlock); ++#endif + + bdev_abort_all_queued_io(&ch->queued_resets, ch); + +@@ -7938,7 +7940,6 @@ static int + bdev_start_qos(struct spdk_bdev *bdev) + { + struct set_qos_limit_ctx *ctx; +- + /* Enable QoS */ + if (bdev->internal.qos && bdev->internal.qos->thread == NULL) { + ctx = calloc(1, sizeof(*ctx)); +@@ -10422,6 +10423,16 @@ spdk_bdev_get_channel_group(struct spdk_io_channel *io_ch) + return nvme_channel_get_group(nvme_io_ch); + } + ++int ++spdk_bdev_get_channel_state(struct spdk_io_channel *io_ch) ++{ ++ struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(io_ch); ++ struct spdk_io_channel *under_io_ch = ch->channel; ++ void *nvme_io_ch = spdk_io_channel_get_ctx(under_io_ch); ++ ++ return nvme_channel_get_state(nvme_io_ch); ++} ++ + bool + spdk_bdev_have_io_in_channel(struct spdk_io_channel *io_ch) + { +diff --git a/lib/bdev/bdev_self.c b/lib/bdev/bdev_self.c +index 7050c30..c5b92a3 100644 +--- a/lib/bdev/bdev_self.c ++++ b/lib/bdev/bdev_self.c +@@ -30,10 +30,8 @@ struct libstorage_bdev_io_stat *g_io_stat_map; + int32_t g_libstorage_iostat = 0; + int32_t g_polltime_threshold = 0; + +-void +-spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, +- struct spdk_io_channel *io_ch, +- struct spdk_bdev_io_stat *stat) ++void spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch, struct spdk_bdev_io_stat *stat) + { + int i = 0; + bool find = false; +@@ -95,9 +93,8 @@ spdk_bdev_init_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, + stat->interval_tsc = spdk_get_ticks_hz() / 10; + } + +-void +-spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, +- struct spdk_io_channel *io_ch) ++void spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, ++ struct spdk_io_channel *io_ch) + { + int i = 0; + uint16_t channel_id; +@@ -127,10 +124,9 @@ spdk_bdev_destroy_iostat(struct spdk_bdev_channel *ch, struct spdk_bdev *bdev, + } + } + +-int +-spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +- void *unmap_d, uint16_t unmap_count, +- spdk_bdev_io_completion_cb cb, void *cb_arg) ++int spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, ++ void *unmap_d, uint32_t unmap_count, ++ spdk_bdev_io_completion_cb cb, void *cb_arg) + { + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); + struct spdk_bdev_io *bdev_io = NULL; +@@ -152,38 +148,38 @@ spdk_bdev_unmap_multiblocks(struct spdk_bdev_desc *desc, struct spdk_io_channel + return 0; + } + +-void +-bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat) ++void bdev_io_stat_update(struct spdk_bdev_io *bdev_io, uint64_t tsc_diff, ++ struct spdk_bdev_io_stat *stat) + { + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ_NVME: + stat->bytes_read += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len; + stat->num_read_ops++; +- stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ stat->read_latency_ticks += tsc_diff; + break; + case SPDK_BDEV_IO_TYPE_WRITE_NVME: + stat->bytes_written += bdev_io->u.contig.nbytes + bdev_io->u.contig.md_len; + stat->num_write_ops++; +- stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ stat->write_latency_ticks += tsc_diff; + break; + case SPDK_BDEV_IO_TYPE_READV_NVME: + stat->bytes_read += bdev_io->u.bdev.nbytes; + stat->num_read_ops++; +- stat->read_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ stat->read_latency_ticks += tsc_diff; + break; + case SPDK_BDEV_IO_TYPE_WRITEV_NVME: + stat->bytes_written += bdev_io->u.bdev.nbytes; + stat->num_write_ops++; +- stat->write_latency_ticks += (tsc - bdev_io->internal.submit_tsc); ++ stat->write_latency_ticks += tsc_diff; + break; + default: + break; + } + } + +-void +-bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, struct spdk_bdev_io_stat *stat, +- struct spdk_io_channel *channel, uint64_t io_outstanding) ++void bdev_update_iostat_map(struct spdk_bdev_io *bdev_io, uint64_t tsc, ++ struct spdk_bdev_io_stat *stat, ++ struct spdk_io_channel *channel, uint64_t io_outstanding) + { + uint64_t num_poll_timeout; + +diff --git a/lib/env_dpdk/env.mk b/lib/env_dpdk/env.mk +index 4f45c29..3569368 100644 +--- a/lib/env_dpdk/env.mk ++++ b/lib/env_dpdk/env.mk +@@ -126,7 +126,7 @@ endif + DPDK_LIB_LIST_SORTED = $(sort $(DPDK_LIB_LIST)) + + DPDK_SHARED_LIB = $(DPDK_LIB_LIST_SORTED:%=$(DPDK_LIB_DIR)/lib%.so) +-DPDK_STATIC_LIB = $(DPDK_LIB_LIST_SORTED:%=$(DPDK_LIB_DIR)/lib%.a) ++DPDK_STATIC_LIB = $(DPDK_LIB_LIST_SORTED:%=/usr/lib64/lib%.a) + DPDK_SHARED_LIB_LINKER_ARGS = $(call add_no_as_needed,$(DPDK_SHARED_LIB)) -Wl,-rpath=$(DPDK_LIB_DIR) + DPDK_STATIC_LIB_LINKER_ARGS = $(call add_whole_archive,$(DPDK_STATIC_LIB)) + +diff --git a/lib/env_dpdk/init.c b/lib/env_dpdk/init.c +index e94486c..fa32810 100644 +--- a/lib/env_dpdk/init.c ++++ b/lib/env_dpdk/init.c +@@ -637,7 +637,7 @@ spdk_env_init(const struct spdk_env_opts *opts) + return -EINVAL; + } + +- SPDK_PRINTF("Starting %s / %s initialization...\n", SPDK_VERSION_STRING, rte_version()); ++ printf("Starting %s / %s initialization...\n", SPDK_VERSION_STRING, rte_version()); + + args_print = _sprintf_alloc("[ DPDK EAL parameters: "); + if (args_print == NULL) { +diff --git a/lib/event/reactor.c b/lib/event/reactor.c +index 6f604e1..9a8e39a 100644 +--- a/lib/event/reactor.c ++++ b/lib/event/reactor.c +@@ -29,6 +29,7 @@ + #endif + + #define SPDK_EVENT_BATCH_SIZE 8 ++#define SPDK_EVENT_MAX_BATCH_SIZE 32 + + #ifdef SPDK_CONFIG_APP_RW + struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE]; +@@ -225,7 +226,7 @@ spdk_reactors_init(size_t msg_mempool_size) + sp = spdk_conf_find_section(NULL, "Reactor"); + if (sp != 0) { + g_reactor_batch_size = spdk_conf_section_get_intval(sp, "BatchSize"); +- if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_BATCH_SIZE) { ++ if (g_reactor_batch_size <= 0 || g_reactor_batch_size > SPDK_EVENT_MAX_BATCH_SIZE) { + g_reactor_batch_size = SPDK_EVENT_BATCH_SIZE; + } + syslog(LOG_INFO, "BatchSize is set to %d\n", g_reactor_batch_size); +@@ -990,9 +991,6 @@ reactor_run(void *arg) + } + + if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) { +-#ifdef SPDK_CONFIG_APP_RW +- lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_EXITED; +-#endif + break; + } + } +@@ -1029,7 +1027,10 @@ reactor_run(void *arg) + } + } + } +- ++#ifdef SPDK_CONFIG_APP_RW ++ /* When all thread in reactor is finish, inform libstorage to release resource. */ ++ lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_EXITED; ++#endif + return 0; + } + +diff --git a/lib/nvme/nvme.c b/lib/nvme/nvme.c +index 28a48c3..32f46d1 100644 +--- a/lib/nvme/nvme.c ++++ b/lib/nvme/nvme.c +@@ -12,6 +12,7 @@ + #include "spdk/nvme.h" + #include "spdk_internal/debug.h" + #include "spdk/bdev_module.h" ++#include + + #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver" + +@@ -73,6 +74,9 @@ static void admin_timer_timeout(void) + + static void *nvme_ctrlr_run_admin_timer(void *arg) + { ++#if defined(__linux__) ++ prctl(PR_SET_NAME, "nvme-admin", 0, 0, 0); ++#endif + sleep(20); + + while (1) { +@@ -566,6 +570,11 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + struct spdk_nvme_ctrlr_process *active_proc, + uint64_t now_tick) + { ++ if (req == NULL) { ++ SPDK_WARNLOG("Get invalid req from tracker!\n"); ++ return 1; ++ } ++ + struct spdk_nvme_qpair *qpair = req->qpair; + struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; + uint64_t timeout_ticks = nvme_qpair_is_admin_queue(qpair) ? +@@ -606,26 +615,16 @@ nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, + nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, + cid); + #else +- if (!nvme_qpair_is_admin_queue(qpair) && (req->cmd.opc == SPDK_NVME_OPC_WRITE || +- req->cmd.opc == SPDK_NVME_OPC_READ)) { +- SPDK_WARNLOG("IO timeout, OP[%u] NS[%u] LBA[%lu].\n", req->cmd.opc, req->cmd.nsid, +- *(uint64_t *)&req->cmd.cdw10); +- } else { +- SPDK_WARNLOG("%s Command[%u] timeout.\n", nvme_qpair_is_admin_queue(qpair) ? +- "Admin" : "IO", req->cmd.opc); +- } +- if (req->timed_out) { +- /* Reset the controller if the command was already timed out. */ +- SPDK_WARNLOG("IO Command[%u] timeout again, reset controller.\n", cid); +- active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, NULL, cid); +- } else { ++ if (!req->timed_out) { + req->timed_out = true; +- active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, +- nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, +- cid); +- /* Timing again. Reset the controller if it times out again */ +- req->submit_tick = spdk_get_ticks(); ++ SPDK_WARNLOG("%s Command[%u] timeout. ctrlr=%p qpair=%p cid=%u\n", ++ nvme_qpair_is_admin_queue(qpair) ? "Admin" : "IO", ++ req->cmd.opc, ctrlr, qpair, cid); + } ++ active_proc->timeout_cb_fn(active_proc->timeout_cb_arg, ctrlr, ++ nvme_qpair_is_admin_queue(qpair) ? NULL : qpair, cid); ++ /* Update submit tick to reduce timeout num. */ ++ req->submit_tick = spdk_get_ticks(); + #endif + return 0; + } +diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c +index 4143c15..f935318 100644 +--- a/lib/nvme/nvme_ctrlr.c ++++ b/lib/nvme/nvme_ctrlr.c +@@ -7,6 +7,7 @@ + #include "spdk/stdinc.h" + + #include "nvme_internal.h" ++#include "nvme_pcie_internal.h" + #include "nvme_io_msg.h" + + #include "spdk/env.h" +@@ -591,7 +592,9 @@ spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair) + } + + ctrlr = qpair->ctrlr; +- ++ if (ctrlr == NULL) { ++ return 0; ++ } + if (qpair->in_completion_context) { + /* + * There are many cases where it is convenient to delete an io qpair in the context +@@ -1114,6 +1117,16 @@ nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nv + } + } + ++#ifdef SPDK_CONFIG_APP_RW ++void ++spdk_nvme_ctrlr_fail_hotplug(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); ++ nvme_ctrlr_fail(ctrlr, true); ++ nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); ++} ++#endif ++ + static void + nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr, + struct nvme_ctrlr_detach_ctx *ctx) +@@ -3311,7 +3324,6 @@ struct spdk_nvme_ctrlr_process * + nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid) + { + struct spdk_nvme_ctrlr_process *active_proc; +- + TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) { + if (active_proc->pid == pid) { + return active_proc; +@@ -5326,6 +5338,10 @@ spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr) + void + spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid) + { ++ if (ctrlr->free_io_qids == NULL) { ++ return; ++ } ++ + assert(qid <= ctrlr->opts.num_io_queues); + + nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); +diff --git a/lib/nvme/nvme_ctrlr_cmd.c b/lib/nvme/nvme_ctrlr_cmd.c +index 416f099..f83ad92 100644 +--- a/lib/nvme/nvme_ctrlr_cmd.c ++++ b/lib/nvme/nvme_ctrlr_cmd.c +@@ -611,17 +611,13 @@ nvme_ctrlr_retry_queued_abort(struct spdk_nvme_ctrlr *ctrlr) + } + } + ++ return; + #ifdef SPDK_CONFIG_APP_RW +- nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); +- if (rc < 0) { +- /* If abort fail, free all of the queued abort requests */ +- STAILQ_FOREACH_SAFE(next, &ctrlr->queued_aborts, stailq, tmp) { +- STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); +- nvme_free_request(next); +- ctrlr->outstanding_aborts--; +- } ++free: ++ STAILQ_FOREACH_SAFE(next, &ctrlr->queued_aborts, stailq, tmp) { ++ STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq); ++ nvme_free_request(next); + } +- nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); + #endif + + } +@@ -650,9 +646,10 @@ nvme_ctrlr_cmd_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) + + assert(ctrlr->outstanding_aborts > 0); + ctrlr->outstanding_aborts--; +- nvme_ctrlr_retry_queued_abort(ctrlr); +- ++ /* If abort is failed, just reset the ctrlr. */ + req->user_cb_fn(req->user_cb_arg, cpl); ++ ++ nvme_ctrlr_retry_queued_abort(ctrlr); + } + + int +diff --git a/lib/nvme/nvme_ctrlr_self.c b/lib/nvme/nvme_ctrlr_self.c +index 6ad8bc9..e3f1d0c 100644 +--- a/lib/nvme/nvme_ctrlr_self.c ++++ b/lib/nvme/nvme_ctrlr_self.c +@@ -219,5 +219,8 @@ int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint1 + + uint16_t spdk_nvme_get_qpair_id(struct spdk_nvme_qpair *qpair) + { ++ if (qpair == NULL) { ++ return -1; ++ } + return qpair->id; + } +diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h +index a18fb78..ca64907 100644 +--- a/lib/nvme/nvme_internal.h ++++ b/lib/nvme/nvme_internal.h +@@ -492,6 +492,7 @@ struct spdk_nvme_qpair { + STAILQ_HEAD(, nvme_request) aborting_queued_req; + + void *req_buf; ++ uint32_t disconnected_time; + }; + + struct spdk_nvme_poll_group { +@@ -1288,8 +1289,8 @@ int nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, + } while (0); + + #ifdef SPDK_CONFIG_APP_RW ++bool nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair); + void nvme_ctrlr_destruct_ublock(struct spdk_nvme_ctrlr *ctrlr); +-void nvme_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); + #endif + + static inline struct nvme_request * +diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c +index 3f37cd7..9f88b4c 100644 +--- a/lib/nvme/nvme_ns_cmd.c ++++ b/lib/nvme/nvme_ns_cmd.c +@@ -494,7 +494,6 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint32_t sector_size = _nvme_get_host_buffer_sector_size(ns, io_flags); + uint32_t sectors_per_max_io = _nvme_get_sectors_per_max_io(ns, io_flags); + uint32_t sectors_per_stripe = ns->sectors_per_stripe; +- int rc; + + assert(rc != NULL); + assert(*rc == 0); +diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c +index b4d2923..9e55427 100644 +--- a/lib/nvme/nvme_pcie.c ++++ b/lib/nvme/nvme_pcie.c +@@ -1030,9 +1030,7 @@ nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) + spdk_pci_device_unclaim(devhandle); + spdk_pci_device_detach(devhandle); + } +- + spdk_free(pctrlr); +- + return 0; + } + +diff --git a/lib/nvme/nvme_pcie_common.c b/lib/nvme/nvme_pcie_common.c +index 669f66f..c268793 100644 +--- a/lib/nvme/nvme_pcie_common.c ++++ b/lib/nvme/nvme_pcie_common.c +@@ -937,7 +937,7 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_ + __builtin_prefetch(&tr->req->stailq); + nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true); + } else { +- SPDK_ERRLOG("cpl does not map to outstanding cmd\n"); ++ SPDK_NOTICELOG("cpl does not map to outstanding cmd\n"); + spdk_nvme_qpair_print_completion(qpair, cpl); + assert(0); + } +@@ -1038,7 +1038,6 @@ nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair) + } + + spdk_free(pqpair); +- + return 0; + } + +@@ -1086,8 +1085,6 @@ nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ + struct nvme_completion_poll_status *status; + int rc; + +- assert(ctrlr != NULL); +- + if (ctrlr->is_removed) { + goto free; + } +@@ -1773,6 +1770,10 @@ nvme_pcie_poll_group_process_completions(struct spdk_nvme_transport_poll_group * + if (spdk_unlikely(local_completions < 0)) { + disconnected_qpair_cb(qpair, tgroup->group->ctx); + total_completions = -ENXIO; ++ qpair->disconnected_time++; ++ if (qpair->disconnected_time > 50) { ++ qpair->delete_after_completion_context = 1; ++ } + } else if (spdk_likely(total_completions >= 0)) { + total_completions += local_completions; + } +@@ -1787,7 +1788,6 @@ nvme_pcie_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup) + if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) { + return -EBUSY; + } +- + free(tgroup); + + return 0; +diff --git a/lib/nvme/nvme_qpair.c b/lib/nvme/nvme_qpair.c +index 5e0aa73..11e49dd 100644 +--- a/lib/nvme/nvme_qpair.c ++++ b/lib/nvme/nvme_qpair.c +@@ -632,7 +632,7 @@ nvme_qpair_abort_queued_reqs_with_cbarg(struct spdk_nvme_qpair *qpair, void *cmd + return aborting; + } + +-static inline bool ++bool + nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair) + { + struct nvme_request *req; +@@ -645,7 +645,7 @@ nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair) + * to submit any queued requests that built up while we were in the connected or enabling state. + */ + if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTED && +- !qpair->ctrlr->is_resetting && !qpair->ctrlr->is_removed && !qpair->ctrlr->is_destructed)) { ++ !qpair->ctrlr->is_resetting)) { + nvme_qpair_set_state(qpair, NVME_QPAIR_ENABLING); + /* + * PCIe is special, for fabrics transports, we can abort requests before disconnect during reset +@@ -972,13 +972,7 @@ _nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *r + rc = nvme_qpair_submit_request(qpair, child_req); + if (spdk_unlikely(rc != 0)) { + child_req_failed = true; +-#ifdef SPDK_CONFIG_APP_RW +- if (rc == -ENXIO && child_req->num_children == 0) { +- SPDK_WARNLOG("Warning: child req submit failed.\n"); +- nvme_request_remove_child(req, child_req); +- nvme_free_request(child_req); +- } +-#endif ++ SPDK_WARNLOG("Warning: child req submit failed.\n"); + } + } else { /* free remaining child_reqs since one child_req fails */ + nvme_request_remove_child(req, child_req); +diff --git a/lib/thread/thread.c b/lib/thread/thread.c +index 344869d..7d3169e 100644 +--- a/lib/thread/thread.c ++++ b/lib/thread/thread.c +@@ -2782,6 +2782,12 @@ thread_interrupt_create(struct spdk_thread *thread) + { + return -ENOTSUP; + } ++ ++uint32_t spdk_get_channel_ref(void *io_ch) ++{ ++ struct spdk_io_channel *ch = io_ch; ++ return ch->ref; ++} + #endif + + static int +diff --git a/mk/spdk.common.mk b/mk/spdk.common.mk +index a9cef8f..2765c04 100644 +--- a/mk/spdk.common.mk ++++ b/mk/spdk.common.mk +@@ -340,7 +340,7 @@ endif + SYS_LIBS += -lssl + SYS_LIBS += -lcrypto + SYS_LIBS += -lm +-SYS_LIBS += -lsecurec ++SYS_LIBS += -lboundscheck + + PKGCONF ?= pkg-config + ifneq ($(strip $(CONFIG_OPENSSL_PATH)),) +diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c +index 6cc2628..28c0eaf 100644 +--- a/module/bdev/nvme/bdev_nvme.c ++++ b/module/bdev/nvme/bdev_nvme.c +@@ -8319,12 +8319,17 @@ bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w) + } + + #ifdef SPDK_CONFIG_APP_RW +-void * +-nvme_channel_get_group(void *io_ch) ++void *nvme_channel_get_group(void *io_ch) + { + struct nvme_io_channel *nvme_io_ch = io_ch; + return nvme_io_ch->group; + } ++ ++int nvme_channel_get_state(void *io_ch) ++{ ++ struct nvme_io_channel *nvme_ch = io_ch; ++ return nvme_ch->state; ++} + struct nvme_bdev_io *nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, + int iovcnt) + { +diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c +index 1419b1f..dc480ff 100644 +--- a/module/bdev/nvme/bdev_nvme_self.c ++++ b/module/bdev/nvme/bdev_nvme_self.c +@@ -15,7 +15,6 @@ + #include "spdk/json.h" + #include "spdk/likely.h" + #include "spdk/bdev_module.h" +-#include "spdk/nvme_ocssd.h" + #include "spdk/nvme.h" + + #include "spdk_internal/bdev_stat.h" +@@ -23,11 +22,6 @@ + #include "common.h" + #include + +-enum data_direction { +- BDEV_DISK_READ = 0, +- BDEV_DISK_WRITE = 1 +-}; +- + void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group) + { + uint64_t current_ticks = 0; +@@ -49,8 +43,7 @@ void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group) + } + } + +-int +-_bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) ++int _bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) + { + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + +@@ -65,27 +58,27 @@ _bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io * + bdev_io->u.contig.offset_blocks); + return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, + bdev_io->driver_ctx, bdev_io->u.contig.buf, +- bdev_io->u.contig.md_buf, BDEV_DISK_READ, ++ bdev_io->u.contig.md_buf, SPDK_BDEV_IO_TYPE_READ, + bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); + case SPDK_BDEV_IO_TYPE_WRITE_NVME: + SPDK_DEBUGLOG(bdev_nvme, "write %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, + bdev_io->u.contig.offset_blocks); + return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, + bdev_io->driver_ctx, bdev_io->u.contig.buf, +- bdev_io->u.contig.md_buf, BDEV_DISK_WRITE, ++ bdev_io->u.contig.md_buf, SPDK_BDEV_IO_TYPE_WRITE, + bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); + case SPDK_BDEV_IO_TYPE_READV_NVME: + SPDK_DEBUGLOG(bdev_nvme, "readv %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, + bdev_io->u.bdev.offset_blocks); + return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, +- bdev_io->driver_ctx, BDEV_DISK_READ, ++ bdev_io->driver_ctx, SPDK_BDEV_IO_TYPE_READ, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); + case SPDK_BDEV_IO_TYPE_WRITEV_NVME: + SPDK_DEBUGLOG(bdev_nvme, "writev %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, + bdev_io->u.bdev.offset_blocks); + return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, +- bdev_io->driver_ctx, BDEV_DISK_WRITE, ++ bdev_io->driver_ctx, SPDK_BDEV_IO_TYPE_WRITE, + bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); + case SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS: +@@ -100,14 +93,12 @@ _bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io * + return 0; + } + +-int +-bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w) ++int bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w) + { + return 0; + } + +-uint16_t +-bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) ++uint16_t bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) + { + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + uint16_t channel_id; +@@ -116,15 +107,13 @@ bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) + return channel_id; + } + +-uint64_t +-bdev_nvme_get_timeout_count(struct spdk_io_channel *ch) ++uint64_t bdev_nvme_get_timeout_count(struct spdk_io_channel *ch) + { + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); + return nvme_ch->group->num_poll_timeout; + } + +-int32_t +-nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) ++int32_t nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) + { + uint32_t num_ctrlr = 0, i = 0; + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; +@@ -221,8 +210,7 @@ nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) + return num_ctrlr; + } + +-struct nvme_bdev_ctrlr * +-nvme_ctrlr_get_by_name(const char *name) ++struct nvme_bdev_ctrlr *nvme_ctrlr_get_by_name(const char *name) + { + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; + +@@ -239,8 +227,7 @@ nvme_ctrlr_get_by_name(const char *name) + return NULL; + } + +-struct spdk_nvme_ctrlr * +-spdk_nvme_ctrlr_get_by_name(const char *ctrlname) ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_name(const char *ctrlname) + { + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; + +@@ -253,8 +240,7 @@ spdk_nvme_ctrlr_get_by_name(const char *ctrlname) + return NULL; + } + +-struct spdk_nvme_ctrlr * +-spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) + { + if (nvme_bdev_ctrlr == NULL) { + return NULL; +@@ -262,8 +248,7 @@ spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) + return nvme_bdev_ctrlr->ctrlr; + } + +-void +-nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) ++void nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) + { + int i; + size_t size = strnlen(ctrlname, 24); +@@ -288,8 +273,7 @@ nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) + } + } + +-void +-nvme_ctrlr_clear_iostat_all(void) ++void nvme_ctrlr_clear_iostat_all(void) + { + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; + +@@ -298,8 +282,7 @@ nvme_ctrlr_clear_iostat_all(void) + } + } + +-struct spdk_nvme_ns * +-bdev_nvme_get_ns(struct nvme_bdev *nbdev) ++struct spdk_nvme_ns *bdev_nvme_get_ns(struct nvme_bdev *nbdev) + { + return nbdev->nvme_ns->ns; + } +@@ -331,11 +314,10 @@ void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr) + pthread_mutex_unlock(&g_bdev_nvme_mutex); + } + +-int +-bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + { +- struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr; + struct nvme_bdev_ns *ns = NULL; ++ bool ns_active = spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid); + + if (nvme_bdev_ctrlr == NULL || nsid > nvme_bdev_ctrlr->num_ns) { + SPDK_ERRLOG("Parameter error. nsid[%u], the max nsid is[%u]\n", nsid, nvme_bdev_ctrlr->num_ns); +@@ -343,14 +325,9 @@ bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + } + + ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; ++ ns->type = NVME_BDEV_NS_STANDARD; + +- if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) { +- ns->type = NVME_BDEV_NS_OCSSD; +- } else { +- ns->type = NVME_BDEV_NS_STANDARD; +- } +- +- if (!ns->populated && spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) { ++ if (!ns->populated && ns_active) { + SPDK_NOTICELOG("NSID %u to be added\n", nsid); + ns->id = nsid; + ns->ctrlr = nvme_bdev_ctrlr; +@@ -360,16 +337,16 @@ bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + return 0; + } + +- if (ns->populated && !spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) { ++ if (ns->populated && !ns_active) { + SPDK_NOTICELOG("NSID %u is removed\n", nsid); + nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); + return 0; + } +- return -1; ++ ++ return 0; + } + +-bool +-spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + { + struct nvme_bdev_ns *ns = NULL; + struct nvme_bdev *bdev = NULL, *tmp = NULL; +@@ -398,8 +375,7 @@ spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + return empty; + } + +-void +-spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + { + struct nvme_bdev_ns *ns = NULL; + struct nvme_bdev *bdev = NULL, *tmp = NULL; +@@ -419,9 +395,18 @@ spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) + } + } + +-int +-bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, void *driver_ctx, +- void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba) ++static void check_error_type(int rc, bool read, void *qpair) ++{ ++ if (rc == -ENOMEM) { ++ SPDK_NOTICELOG("%s failed: rc = %d\n", read ? "read" : "write", rc); ++ } else if (rc < 0) { ++ SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", read ? "read" : "write", rc, qpair); ++ } ++} ++ ++int bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba) + { + int rc; + uint32_t io_flags = 0; +@@ -452,7 +437,7 @@ bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpai + io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; + } + +- if (direction == BDEV_DISK_READ) { ++ if (direction == SPDK_BDEV_IO_TYPE_READ) { + rc = spdk_nvme_ns_cmd_read_with_md(bdev->nvme_ns->ns, qpair, buffer, metadata, lba, + lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); + } else { +@@ -460,21 +445,13 @@ bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpai + lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); + } + +- if (rc != 0) { +- if (rc == -ENOMEM) { +- SPDK_NOTICELOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "read" : "write", rc); +- } else { +- SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", direction == BDEV_DISK_READ ? "read" : "write", +- rc, qpair); +- } +- } ++ check_error_type(rc, direction == SPDK_BDEV_IO_TYPE_READ, qpair); + return rc; + } + +-int +-bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, +- void *driver_ctx, +- int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) ++int bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) + { + int rc; + struct nvme_bdev_io *bio = NULL; +@@ -508,29 +485,21 @@ bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qp + + bio = nvme_bdev_io_update_args((struct nvme_bdev_io *)driver_ctx, iov, iovcnt); + +- if (direction == BDEV_DISK_READ) { ++ if (direction == SPDK_BDEV_IO_TYPE_READ) { + rc = spdk_nvme_ns_cmd_readv(bdev->nvme_ns->ns, qpair, lba, + lba_count, bdev_nvme_queued_done, bio, io_flags, + bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); + } else { + rc = spdk_nvme_ns_cmd_writev(bdev->nvme_ns->ns, qpair, lba, lba_count, +- 0, bdev_nvme_queued_done, bio, io_flags, ++ bdev_nvme_queued_done, bio, io_flags, + bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); + } + +- if (rc != 0) { +- if (rc == -ENOMEM) { +- SPDK_NOTICELOG("%s failed: rc = %d\n", direction == BDEV_DISK_READ ? "readv" : "writev", rc); +- } else { +- SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", direction == BDEV_DISK_READ ? "read" : "write", rc, +- qpair); +- } +- } ++ check_error_type(rc, direction == SPDK_BDEV_IO_TYPE_READ, qpair); + return rc; + } + +-struct nvme_bdev_ctrlr * +-bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) ++struct nvme_bdev_ctrlr *bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) + { + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(bdev_desc); + struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev->ctxt; +@@ -540,12 +509,11 @@ bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) + return nbdev->nvme_ns->ctrlr; + } + +-int +-bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, +- struct spdk_nvme_dsm_range *unmap_d, uint16_t unmap_count) ++int bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, ++ struct spdk_nvme_dsm_range *unmap_d, uint32_t unmap_count) + { + struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); +- int i; ++ uint32_t i; + + if (unmap_count == 0 || unmap_count > SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES) { + SPDK_ERRLOG("Invalid parameter, unmap count: %u\n", unmap_count); +@@ -566,36 +534,39 @@ bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void + + spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); + return spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair, +- SPDK_NVME_DSM_ATTR_DEALLOCATE, +- unmap_d, unmap_count, +- bdev_nvme_queued_done, driver_ctx); ++ SPDK_NVME_DSM_ATTR_DEALLOCATE, ++ unmap_d, unmap_count, ++ bdev_nvme_queued_done, driver_ctx); + } + +-void +-spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr) ++void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr) + { + remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); + } + +-void spdk_bdev_fail_ctrlr(const char *traddr) ++void spdk_bdev_fail_ctrlr(void *cb_ctx, void *ctrlr) + { +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; ++ spdk_nvme_ctrlr_fail_hotplug((struct spdk_nvme_ctrlr *)ctrlr); ++ remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); ++} ++ ++struct spdk_nvme_ctrlr *spdk_nvme_bdev_ctrlr_get(char *pci_trid) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; + +- pthread_mutex_lock(&g_bdev_nvme_mutex); + TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- if (strcmp(nvme_bdev_ctrlr->connected_trid->traddr, traddr) == 0) { +- spdk_nvme_ctrlr_fail(nvme_bdev_ctrlr->ctrlr); +- remove_cb(NULL, nvme_bdev_ctrlr->ctrlr); +- return; ++ if (strcmp(nvme_bdev_ctrlr->connected_trid->traddr, pci_trid) == 0) { ++ return nvme_bdev_ctrlr->ctrlr; + } + } ++ ++ return NULL; + } + +-int +-spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, +- const char *base_name, +- const char **names, size_t *count, +- const char *hostnqn) ++int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, ++ const char *base_name, ++ const char **names, size_t *count, ++ const char *hostnqn) + { + struct nvme_probe_ctx *probe_ctx; + struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; +@@ -616,7 +587,7 @@ spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, + return -1; + } + +- if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, NULL)) { ++ if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, remove_cb)) { + SPDK_ERRLOG("Failed to probe for new devices\n"); + free(probe_ctx); + return -1; +diff --git a/module/bdev/nvme/bdev_nvme_self.h b/module/bdev/nvme/bdev_nvme_self.h +index d7cc587..43ad7ee 100644 +--- a/module/bdev/nvme/bdev_nvme_self.h ++++ b/module/bdev/nvme/bdev_nvme_self.h +@@ -40,4 +40,4 @@ bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc); + + int + bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, +- struct spdk_nvme_dsm_range *unmap_d, uint16_t unmap_count); ++ struct spdk_nvme_dsm_range *unmap_d, uint32_t unmap_count); +diff --git a/scripts/setup_self.sh b/scripts/setup_self.sh +index 9e77c29..90b7f86 100755 +--- a/scripts/setup_self.sh ++++ b/scripts/setup_self.sh +@@ -30,8 +30,15 @@ function linux_bind_driver() { + + echo "$bdf ($ven_dev_id): $old_driver_name -> $driver_name" + +- echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true +- echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true ++ if [ "$driver_name" = "nvme" ] ++ then ++ echo 1 > /sys/bus/pci/devices/$bdf/remove ++ sleep 1 ++ echo 1 > /sys/bus/pci/rescan ++ else ++ echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true ++ echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true ++ fi + } + + function linux_hugetlbfs_mount() { +-- +2.43.0 + diff --git a/0010-Add-CUSE-switch-for-nvme-ctrlr.patch b/0010-Add-CUSE-switch-for-nvme-ctrlr.patch new file mode 100644 index 0000000..bc797e6 --- /dev/null +++ b/0010-Add-CUSE-switch-for-nvme-ctrlr.patch @@ -0,0 +1,54 @@ +From da6b7b063272297ea72de9c1e0e18915be5d945d Mon Sep 17 00:00:00 2001 +From: suweifeng +Date: Mon, 17 May 2021 16:05:40 +0800 +Subject: [PATCH 10/12] Add CUSE switch for nvme ctrlr + +Signed-off-by: suweifeng +--- + module/bdev/nvme/bdev_nvme.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c +index 28c0eaf..aacdbf1 100644 +--- a/module/bdev/nvme/bdev_nvme.c ++++ b/module/bdev/nvme/bdev_nvme.c +@@ -147,6 +147,9 @@ struct spdk_thread *g_bdev_nvme_init_thread; + static struct spdk_poller *g_hotplug_poller; + static struct spdk_poller *g_hotplug_probe_poller; + static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx; ++#ifdef SPDK_CONFIG_APP_RW ++bool g_useCUSE = false; ++#endif + + static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr, + struct nvme_async_probe_ctx *ctx); +@@ -5424,6 +5427,12 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + } else { + SPDK_ERRLOG("Failed to attach to %s (%s)\n", trid->traddr, name); + } ++#ifdef SPDK_CONFIG_APP_RW ++ /* register CUSE */ ++ if (g_useCUSE) { ++ spdk_nvme_cuse_register(ctrlr); ++ } ++#endif + + free(name); + } +@@ -5484,6 +5493,13 @@ remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) + { + struct nvme_ctrlr *nvme_ctrlr = cb_ctx; + ++#ifdef SPDK_CONFIG_APP_RW ++ /* remove CUSE */ ++ if (g_useCUSE) { ++ spdk_nvme_cuse_unregister(ctrlr); ++ } ++#endif ++ + bdev_nvme_delete_ctrlr(nvme_ctrlr, true); + } + +-- +2.43.0 + diff --git a/0011-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch b/0011-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch new file mode 100644 index 0000000..bff0bd8 --- /dev/null +++ b/0011-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch @@ -0,0 +1,30 @@ +From 346ba737a1442109535678c0a23e5bc37952af48 Mon Sep 17 00:00:00 2001 +From: suweifeng +Date: Thu, 20 May 2021 16:41:01 +0800 +Subject: [PATCH 11/12] Adapt for ES3000 serial vendor special opcode in CUSE + +With Huawei ES3000 serial NVMe PCIe SSD, Will send special opcode 0xC0 +to get self-define vendor logs, the data transfer field of opcode didn't +follow NVMe 1.3/1.4 spec, So treat the opcode as bidirectional. +All self-define opcode start with 0xC0. + +Signed-off-by: suweifeng +--- + include/spdk/nvme_spec.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/spdk/nvme_spec.h b/include/spdk/nvme_spec.h +index 18c80f8..95ec6d4 100644 +--- a/include/spdk/nvme_spec.h ++++ b/include/spdk/nvme_spec.h +@@ -1661,6 +1661,7 @@ enum spdk_nvme_admin_opcode { + SPDK_NVME_OPC_SANITIZE = 0x84, + + SPDK_NVME_OPC_GET_LBA_STATUS = 0x86, ++ SPDK_NVME_OPC_VENDOR = 0xC0, + }; + + /** +-- +2.43.0 + diff --git a/0012-adapt-for-spdk-24.01.patch b/0012-adapt-for-spdk-24.01.patch new file mode 100644 index 0000000..ed4cf73 --- /dev/null +++ b/0012-adapt-for-spdk-24.01.patch @@ -0,0 +1,2883 @@ +From a5caef861e2e693cee635af2cdfd35bc6f782da5 Mon Sep 17 00:00:00 2001 +From: suweifeng +Date: Mon, 19 Aug 2024 08:13:04 +0000 +Subject: [PATCH 12/12] adapt for spdk-24.01 + +Adapt for spdk-24.01: +1, fix compile error +2, adapt for spdk change in bdev_nvme_self.c + +Signed-off-by: suweifeng +--- + include/spdk/nvme.h | 2 - + include/spdk/thread.h | 16 +- + lib/bdev/bdev.c | 36 +- + lib/bdev/bdev_self.c | 3 +- + lib/event/reactor.c | 5 - + lib/nvme/nvme_ctrlr.c | 7 +- + lib/nvme/nvme_pcie.c | 4 +- + lib/thread/thread.c | 3 + + module/bdev/nvme/bdev_nvme.c | 51 +- + module/bdev/nvme/bdev_nvme.h | 996 +++++++++++----------- + module/bdev/nvme/bdev_nvme_self.c | 1282 +++++++++++++++-------------- + module/bdev/nvme/bdev_nvme_self.h | 86 +- + 12 files changed, 1305 insertions(+), 1186 deletions(-) + +diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h +index 6c20c29..5a78ba9 100644 +--- a/include/spdk/nvme.h ++++ b/include/spdk/nvme.h +@@ -4280,7 +4280,6 @@ struct spdk_nvme_qpair; + int32_t nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr); + struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_name(const char *ctrlname); + struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr); +-struct nvme_bdev_ctrlr *nvme_ctrlr_get_by_name(const char *ctrlname); + void nvme_ctrlr_clear_iostat_by_name(const char *ctrlname); + void nvme_ctrlr_clear_iostat_all(void); + struct nvme_bdev_ctrlr *bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev); +@@ -4301,7 +4300,6 @@ bool spdk_nvme_ctrlr_is_ns_manage_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_format_supported(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_format_all_ns(struct spdk_nvme_ctrlr *ctrlr); + bool spdk_nvme_ctrlr_is_directive_supported(struct spdk_nvme_ctrlr *ctrlr); +-bool spdk_nvme_ctrlr_is_streams_supported(struct spdk_nvme_ctrlr *ctrlr); + int32_t spdk_nvme_ctrlr_identify_directives(struct spdk_nvme_ctrlr *ctrlr, uint16_t nsid, + void *payload); + int32_t spdk_nvme_ctrlr_enable_streams(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid); +diff --git a/include/spdk/thread.h b/include/spdk/thread.h +index 68ddf21..bbc0698 100644 +--- a/include/spdk/thread.h ++++ b/include/spdk/thread.h +@@ -45,6 +45,20 @@ enum spdk_thread_poller_rc { + struct spdk_thread; + + #ifdef SPDK_CONFIG_APP_RW ++enum spdk_thread_state { ++ SPDK_THREAD_STATE_INITIALIZED, ++ /* The thread is processing poller and message by spdk_thread_poll(). */ ++ SPDK_THREAD_STATE_RUNNING, ++ ++ /* The thread is in the process of termination. It reaps unregistering ++ * * poller are releasing I/O channel. ++ * */ ++ SPDK_THREAD_STATE_EXITING, ++ ++ /* The thread is exited. It is ready to call spdk_thread_destroy(). */ ++ SPDK_THREAD_STATE_EXITED, ++}; ++ + struct spdk_iodev_thread_info { + struct spdk_thread *thread; + volatile int32_t state; +@@ -54,8 +68,6 @@ extern struct spdk_iodev_thread_info lcore_thread_info[RTE_MAX_LCORE]; + + void spdk_reactors_use(bool useOrNot); + +-bool spdk_get_reactor_type(void); +- + void spdk_set_thread_exited(struct spdk_thread *thread); + + uint32_t spdk_get_channel_ref(void *io_ch); +diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c +index 8a43fbd..d4b98b0 100644 +--- a/lib/bdev/bdev.c ++++ b/lib/bdev/bdev.c +@@ -4,6 +4,11 @@ + * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + */ + ++#ifdef SPDK_CONFIG_APP_RW ++#define SECUREC_NEED_ERRNO_TYPE ++#include ++#endif ++ + #include "spdk/stdinc.h" + + #include "spdk/bdev.h" +@@ -28,7 +33,6 @@ + #ifdef SPDK_CONFIG_APP_RW + #include "spdk/stdinc.h" + #include "spdk/barrier.h" +-#include + #include "spdk_internal/bdev_stat.h" + #endif + +@@ -3453,10 +3457,10 @@ _bdev_io_submit(void *ctx) + struct spdk_bdev_io_stat *stat = &bdev_ch->stat; + if (bdev_ch->io_outstanding > 0) { + stat->pre_ticks = stat->cur_ticks; +- stat->cur_ticks = tsc; ++ stat->cur_ticks = bdev_io->internal.submit_tsc; + stat->io_ticks += stat->cur_ticks - stat->pre_ticks; + } else { +- stat->cur_ticks = tsc; ++ stat->cur_ticks = bdev_io->internal.submit_tsc; + } + #endif + +@@ -5320,6 +5324,12 @@ bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch + bdev_io->u.bdev.memory_domain = NULL; + bdev_io->u.bdev.memory_domain_ctx = NULL; + bdev_io->u.bdev.accel_sequence = NULL; ++#else ++ struct libstorage_io *io = (struct libstorage_io *)cb_arg; ++ bdev_build_contig_io(SPDK_BDEV_IO_TYPE_READ_NVME, buf, md_buf, offset_blocks, num_blocks, ++ io, bdev_io); ++ cb_arg = &io->location; ++#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + + bdev_io_submit(bdev_io); +@@ -5415,10 +5425,12 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; +-#else ++#ifdef SPDK_CONFIG_APP_RW + struct libstorage_io *io = (struct libstorage_io *)cb_arg; +- bdev_build_contig_io(SPDK_BDEV_IO_TYPE_READ_NVME, buf, md_buf, offset_blocks, num_blocks, +- io, bdev_io); ++ bdev_io->type = SPDK_BDEV_IO_TYPE_READV_NVME; ++ bdev_io->u.bdev.nbytes = io->nbytes; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_PI] = io->pi_action; ++ bdev_io->driver_ctx[SPDK_BDEV_IO_ACTION_FUA] = io->fua; + cb_arg = &io->location; + #endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); +@@ -6057,11 +6069,9 @@ spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch); + +-#ifndef SPDK_CONFIG_APP_RW + if (!desc->write) { + return -EBADF; + } +-#endif + + if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { + return -EINVAL; +@@ -6086,12 +6096,6 @@ spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io + bdev_io->u.bdev.md_buf = NULL; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; +-#else +- LIBSTORAGE_IO_T *io = (struct libstorage_io *)cb_arg; +- bdev_build_contig_io(SPDK_BDEV_IO_TYPE_WRITE_NVME, buf, md_buf, offset_blocks, num_blocks, +- io, bdev_io); +- cb_arg = &io->location; +-#endif + bdev_io_init(bdev_io, bdev, cb_arg, cb); + bdev_io->u.bdev.memory_domain = NULL; + bdev_io->u.bdev.memory_domain_ctx = NULL; +@@ -10405,9 +10409,9 @@ spdk_bdev_io_get_pool(size_t nbytes) + } + + if (nbytes <= SPDK_BDEV_SMALL_BUF_MAX_SIZE + SPDK_BDEV_SMALL_BUF_WITH_MAX_MD) { +- pool = g_bdev_mgr.buf_small_pool; ++ pool = NULL; + } else { +- pool = g_bdev_mgr.buf_large_pool; ++ pool = NULL; + } + + return pool; +diff --git a/lib/bdev/bdev_self.c b/lib/bdev/bdev_self.c +index c5b92a3..490b747 100644 +--- a/lib/bdev/bdev_self.c ++++ b/lib/bdev/bdev_self.c +@@ -11,6 +11,8 @@ + * GNU General Public License for more details. + */ + ++#define SECUREC_NEED_ERRNO_TYPE ++#include + #include "bdev_internal.h" + + #include "spdk/stdinc.h" +@@ -20,7 +22,6 @@ + #include "spdk/nvme_spec.h" + #include "spdk/log.h" + +-#include + #include "spdk_internal/bdev_stat.h" + + pthread_mutex_t *g_io_stat_map_mutex = NULL; +diff --git a/lib/event/reactor.c b/lib/event/reactor.c +index 9a8e39a..ea53c2a 100644 +--- a/lib/event/reactor.c ++++ b/lib/event/reactor.c +@@ -1027,10 +1027,6 @@ reactor_run(void *arg) + } + } + } +-#ifdef SPDK_CONFIG_APP_RW +- /* When all thread in reactor is finish, inform libstorage to release resource. */ +- lcore_thread_info[reactor->lcore].state = SPDK_THREAD_STATE_EXITED; +-#endif + return 0; + } + +@@ -1095,7 +1091,6 @@ spdk_reactors_start(void) + spdk_env_thread_wait_all(); + + g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN; +-#endif + } + + static void +diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c +index f935318..936e197 100644 +--- a/lib/nvme/nvme_ctrlr.c ++++ b/lib/nvme/nvme_ctrlr.c +@@ -5366,6 +5366,7 @@ nvme_ctrlr_destruct_ublock(struct spdk_nvme_ctrlr *ctrlr) + { + struct spdk_nvme_qpair *qpair = NULL; + struct spdk_nvme_qpair *tmp = NULL; ++ struct spdk_nvme_ns *ns, *tmp_ns; + + SPDK_DEBUGLOG(nvme, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr); + TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) { +@@ -5373,7 +5374,11 @@ nvme_ctrlr_destruct_ublock(struct spdk_nvme_ctrlr *ctrlr) + } + + nvme_ctrlr_free_doorbell_buffer(ctrlr); +- nvme_ctrlr_destruct_namespaces(ctrlr); ++ RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) { ++ nvme_ctrlr_destruct_namespace(ctrlr, ns->id); ++ RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns); ++ spdk_free(ns); ++ } + spdk_bit_array_free(&ctrlr->free_io_qids); + nvme_transport_ctrlr_destruct(ctrlr); + } +diff --git a/lib/nvme/nvme_pcie.c b/lib/nvme/nvme_pcie.c +index 9e55427..7ee20a7 100644 +--- a/lib/nvme/nvme_pcie.c ++++ b/lib/nvme/nvme_pcie.c +@@ -606,7 +606,6 @@ nvme_pcie_ctrlr_unmap_pmr(struct nvme_pcie_ctrlr *pctrlr) + return rc; + } + +-#ifndef SPDK_CONFIG_APP_RW + static int + nvme_pcie_ctrlr_config_pmr(struct spdk_nvme_ctrlr *ctrlr, bool enable) + { +@@ -678,7 +677,6 @@ nvme_pcie_ctrlr_config_pmr(struct spdk_nvme_ctrlr *ctrlr, bool enable) + + return 0; + } +-#endif + + static int + nvme_pcie_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr) +@@ -872,6 +870,7 @@ nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, + enum_ctx.has_pci_addr = true; + } + ++#ifndef SPDK_CONFIG_APP_RW + /* Only the primary process can monitor hotplug. */ + if (spdk_process_is_primary()) { + if (_nvme_pcie_hotplug_monitor(probe_ctx) > 0) { +@@ -881,6 +880,7 @@ nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, + return 0; + } + } ++#endif + + if (enum_ctx.has_pci_addr == false) { + return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), +diff --git a/lib/thread/thread.c b/lib/thread/thread.c +index 7d3169e..d8e1b05 100644 +--- a/lib/thread/thread.c ++++ b/lib/thread/thread.c +@@ -90,7 +90,9 @@ struct spdk_poller { + char name[SPDK_MAX_POLLER_NAME_LEN + 1]; + }; + ++#ifndef SPDK_CONFIG_APP_RW + enum spdk_thread_state { ++ SPDK_THREAD_STATE_INITIALIZED, + /* The thread is processing poller and message by spdk_thread_poll(). */ + SPDK_THREAD_STATE_RUNNING, + +@@ -102,6 +104,7 @@ enum spdk_thread_state { + /* The thread is exited. It is ready to call spdk_thread_destroy(). */ + SPDK_THREAD_STATE_EXITED, + }; ++#endif + + struct spdk_thread { + uint64_t tsc_last; +diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c +index aacdbf1..a777926 100644 +--- a/module/bdev/nvme/bdev_nvme.c ++++ b/module/bdev/nvme/bdev_nvme.c +@@ -44,6 +44,7 @@ + + static int bdev_nvme_config_json(struct spdk_json_write_ctx *w); + ++#ifndef SPDK_CONFIG_APP_RW + struct nvme_bdev_io { + /** array of iovecs to transfer. */ + struct iovec *iovs; +@@ -101,6 +102,7 @@ struct nvme_bdev_io { + /* Current tsc at submit time. */ + uint64_t submit_tsc; + }; ++#endif + + struct nvme_probe_skip_entry { + struct spdk_nvme_transport_id trid; +@@ -149,10 +151,16 @@ static struct spdk_poller *g_hotplug_probe_poller; + static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx; + #ifdef SPDK_CONFIG_APP_RW + bool g_useCUSE = false; ++struct nvme_probe_ctx { ++ size_t count; ++ struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS]; ++ struct spdk_nvme_host_id hostids[NVME_MAX_CONTROLLERS]; ++ const char *names[NVME_MAX_CONTROLLERS]; ++ uint32_t prchk_flags[NVME_MAX_CONTROLLERS]; ++ const char *hostnqn; ++}; + #endif + +-static void nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr, +- struct nvme_async_probe_ctx *ctx); + static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr, + struct nvme_async_probe_ctx *ctx); + static int bdev_nvme_library_init(void); +@@ -203,7 +211,7 @@ static int bdev_nvme_failover_ctrlr(struct nvme_ctrlr *nvme_ctrlr); + void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); + static int nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr); + +-static struct nvme_ns *nvme_ns_alloc(void); ++struct nvme_ns *nvme_ns_alloc(void); + static void nvme_ns_free(struct nvme_ns *ns); + + static int +@@ -336,7 +344,7 @@ nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns) + return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns); + } + +-static struct nvme_ctrlr * ++struct nvme_ctrlr * + nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid) + { + struct nvme_bdev_ctrlr *nbdev_ctrlr; +@@ -1644,6 +1652,7 @@ bdev_nvme_poll(void *arg) + return num_completions > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; + } + ++#ifndef SPDK_CONFIG_APP_RW + static int bdev_nvme_poll_adminq(void *arg); + + static void +@@ -1654,7 +1663,6 @@ bdev_nvme_change_adminq_poll_period(struct nvme_ctrlr *nvme_ctrlr, uint64_t new_ + nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, + nvme_ctrlr, new_period_us); + } +-#endif + + static int + bdev_nvme_poll_adminq(void *arg) +@@ -1684,6 +1692,7 @@ bdev_nvme_poll_adminq(void *arg) + + return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY; + } ++#endif + + static void + nvme_bdev_free(void *io_device) +@@ -1931,8 +1940,10 @@ nvme_ctrlr_disconnect(struct nvme_ctrlr *nvme_ctrlr, nvme_ctrlr_disconnected_cb + assert(nvme_ctrlr->disconnected_cb == NULL); + nvme_ctrlr->disconnected_cb = cb_fn; + ++#ifndef SPDK_CONFIG_APP_RW + /* During disconnection, reduce the period to poll adminq more often. */ + bdev_nvme_change_adminq_poll_period(nvme_ctrlr, 0); ++#endif + } + + enum bdev_nvme_op_after_reset { +@@ -3415,7 +3426,7 @@ bdev_nvme_create_poll_group_cb(void *io_device, void *ctx_buf) + + group->poller = SPDK_POLLER_REGISTER(bdev_nvme_poll, group, g_opts.nvme_ioq_poll_period_us); + +- if (group->poller == NULL && spdk_get_reactor_type()) { ++ if (group->poller == NULL) { + spdk_nvme_poll_group_destroy(group->group); + return -1; + } +@@ -4344,7 +4355,7 @@ timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, + } + } + +-static struct nvme_ns * ++struct nvme_ns * + nvme_ns_alloc(void) + { + struct nvme_ns *nvme_ns; +@@ -4492,7 +4503,7 @@ nvme_bdev_add_ns(struct nvme_bdev *bdev, struct nvme_ns *nvme_ns) + return 0; + } + +-static void ++void + nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) + { + struct spdk_nvme_ns *ns; +@@ -5358,9 +5369,10 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr, + } else { + bdev_nvme_get_default_ctrlr_opts(&nvme_ctrlr->opts); + } +- ++#ifndef SPDK_CONFIG_APP_RW + nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr, + g_opts.nvme_adminq_poll_period_us); ++#endif + + if (g_opts.timeout_us > 0) { + /* Register timeout callback. Timeout values for IO vs. admin reqs can be different. */ +@@ -8337,15 +8349,20 @@ bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w) + #ifdef SPDK_CONFIG_APP_RW + void *nvme_channel_get_group(void *io_ch) + { +- struct nvme_io_channel *nvme_io_ch = io_ch; +- return nvme_io_ch->group; ++ struct nvme_bdev_channel *bdev_ch = io_ch; ++ struct nvme_io_path *io_path; ++ io_path = STAILQ_FIRST(&bdev_ch->io_path_list); ++ ++ return io_path->qpair->group; + } + + int nvme_channel_get_state(void *io_ch) + { +- struct nvme_io_channel *nvme_ch = io_ch; +- return nvme_ch->state; ++ struct nvme_bdev_channel *bdev_ch = io_ch; ++ ++ return bdev_ch->state; + } ++ + struct nvme_bdev_io *nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, + int iovcnt) + { +@@ -8361,7 +8378,7 @@ struct nvme_probe_ctx *bdev_nvme_create_probe_ctx(struct spdk_nvme_transport_id + { + struct nvme_probe_ctx *probe_ctx = calloc(1, sizeof(*probe_ctx)); + if (probe_ctx == NULL) { +- SPDK_ERRLOG("Failed to allocate probe_ctx\n"); ++ SPDK_ERRLOG("Failed to allocate probe_ctx"); + return NULL; + } + +@@ -8380,7 +8397,7 @@ probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, + + SPDK_DEBUGLOG(nvme, "Probing device %s\n", trid->traddr); + +- if (nvme_bdev_ctrlr_get(trid)) { ++ if (nvme_ctrlr_get(trid)) { + SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", + trid->traddr); + return false; +@@ -8441,7 +8458,7 @@ int bdev_probe_ctrlr(void) + + retry_count = spdk_conf_section_get_intval(sp, "RetryCount"); + if (retry_count >= 0) { +- g_opts.retry_count = retry_count; ++ g_opts.transport_retry_count = retry_count; + } + if (retry_count > 255) { + SPDK_WARNLOG("RetryCount:%d should not be greater than 255, set it to 255 this time\n", +@@ -8544,7 +8561,7 @@ int bdev_probe_ctrlr(void) + continue; + } + +- if (!nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) { ++ if (!nvme_ctrlr_get(&probe_ctx->trids[i])) { + SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr); + SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n"); + } +diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h +index 7aca15e..54554df 100644 +--- a/module/bdev/nvme/bdev_nvme.h ++++ b/module/bdev/nvme/bdev_nvme.h +@@ -1,465 +1,531 @@ +-/* SPDX-License-Identifier: BSD-3-Clause +- * Copyright (C) 2016 Intel Corporation. All rights reserved. +- * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. +- * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +- * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved. +- */ +- +-#ifndef SPDK_BDEV_NVME_H +-#define SPDK_BDEV_NVME_H +- +-#include "spdk/stdinc.h" +- +-#include "spdk/queue.h" +-#include "spdk/nvme.h" +-#include "spdk/bdev_module.h" +-#include "spdk/jsonrpc.h" +- +-TAILQ_HEAD(nvme_bdev_ctrlrs, nvme_bdev_ctrlr); +-extern struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs; +-extern pthread_mutex_t g_bdev_nvme_mutex; +-extern bool g_bdev_nvme_module_finish; +-extern struct spdk_thread *g_bdev_nvme_init_thread; +- +-#define NVME_MAX_CONTROLLERS 1024 +- +-enum bdev_nvme_multipath_policy { +- BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE, +- BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE, +-}; +- +-enum bdev_nvme_multipath_selector { +- BDEV_NVME_MP_SELECTOR_ROUND_ROBIN = 1, +- BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH, +-}; +- +-typedef void (*spdk_bdev_create_nvme_fn)(void *ctx, size_t bdev_count, int rc); +-typedef void (*spdk_bdev_nvme_start_discovery_fn)(void *ctx, int status); +-typedef void (*spdk_bdev_nvme_stop_discovery_fn)(void *ctx); +- +-struct nvme_ctrlr_opts { +- uint32_t prchk_flags; +- int32_t ctrlr_loss_timeout_sec; +- uint32_t reconnect_delay_sec; +- uint32_t fast_io_fail_timeout_sec; +- bool from_discovery_service; +- /* Path to the file containing PSK, used for dumping configuration. */ +- char psk_path[PATH_MAX]; +-}; +- +-struct nvme_async_probe_ctx { +- struct spdk_nvme_probe_ctx *probe_ctx; +- const char *base_name; +- const char **names; +- uint32_t max_bdevs; +- uint32_t reported_bdevs; +- struct spdk_poller *poller; +- struct spdk_nvme_transport_id trid; +- struct nvme_ctrlr_opts bdev_opts; +- struct spdk_nvme_ctrlr_opts drv_opts; +- spdk_bdev_create_nvme_fn cb_fn; +- void *cb_ctx; +- uint32_t populates_in_progress; +- bool ctrlr_attached; +- bool probe_done; +- bool namespaces_populated; +-}; +- +-struct nvme_ns { +- uint32_t id; +- struct spdk_nvme_ns *ns; +- struct nvme_ctrlr *ctrlr; +- struct nvme_bdev *bdev; +- uint32_t ana_group_id; +- enum spdk_nvme_ana_state ana_state; +- bool ana_state_updating; +- bool ana_transition_timedout; +- struct spdk_poller *anatt_timer; +- struct nvme_async_probe_ctx *probe_ctx; +- TAILQ_ENTRY(nvme_ns) tailq; +- RB_ENTRY(nvme_ns) node; +- +- /** +- * record io path stat before destroyed. Allocation of stat is +- * decided by option io_path_stat of RPC +- * bdev_nvme_set_options +- */ +- struct spdk_bdev_io_stat *stat; +-}; +- +-struct nvme_bdev_io; +-struct nvme_bdev_ctrlr; +-struct nvme_bdev; +-struct nvme_io_path; +- +-struct nvme_path_id { +- struct spdk_nvme_transport_id trid; +- struct spdk_nvme_host_id hostid; +- TAILQ_ENTRY(nvme_path_id) link; +- uint64_t last_failed_tsc; +-}; +- +-typedef void (*bdev_nvme_ctrlr_op_cb)(void *cb_arg, int rc); +-typedef void (*nvme_ctrlr_disconnected_cb)(struct nvme_ctrlr *nvme_ctrlr); +- +-struct nvme_ctrlr { +- /** +- * points to pinned, physically contiguous memory region; +- * contains 4KB IDENTIFY structure for controller which is +- * target for CONTROLLER IDENTIFY command during initialization +- */ +- struct spdk_nvme_ctrlr *ctrlr; +- struct nvme_path_id *active_path_id; +- int ref; +- +- uint32_t resetting : 1; +- uint32_t reconnect_is_delayed : 1; +- uint32_t in_failover : 1; +- uint32_t pending_failover : 1; +- uint32_t fast_io_fail_timedout : 1; +- uint32_t destruct : 1; +- uint32_t ana_log_page_updating : 1; +- uint32_t io_path_cache_clearing : 1; +- uint32_t dont_retry : 1; +- uint32_t disabled : 1; +- +- struct nvme_ctrlr_opts opts; +- +- RB_HEAD(nvme_ns_tree, nvme_ns) namespaces; +- +- struct spdk_opal_dev *opal_dev; +- +- struct spdk_poller *adminq_timer_poller; +- struct spdk_thread *thread; +- +- bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn; +- void *ctrlr_op_cb_arg; +- /* Poller used to check for reset/detach completion */ +- struct spdk_poller *reset_detach_poller; +- struct spdk_nvme_detach_ctx *detach_ctx; +- +- uint64_t reset_start_tsc; +- struct spdk_poller *reconnect_delay_timer; +- +- nvme_ctrlr_disconnected_cb disconnected_cb; +- +- /** linked list pointer for device list */ +- TAILQ_ENTRY(nvme_ctrlr) tailq; +- struct nvme_bdev_ctrlr *nbdev_ctrlr; +- +- TAILQ_HEAD(nvme_paths, nvme_path_id) trids; +- +- uint32_t max_ana_log_page_size; +- struct spdk_nvme_ana_page *ana_log_page; +- struct spdk_nvme_ana_group_descriptor *copied_ana_desc; +- +- struct nvme_async_probe_ctx *probe_ctx; +- +- pthread_mutex_t mutex; +-}; +- +-struct nvme_bdev_ctrlr { +- char *name; +- TAILQ_HEAD(, nvme_ctrlr) ctrlrs; +- TAILQ_HEAD(, nvme_bdev) bdevs; +- TAILQ_ENTRY(nvme_bdev_ctrlr) tailq; +-}; +- +-struct nvme_error_stat { +- uint32_t status_type[8]; +- uint32_t status[4][256]; +-}; +- +-struct nvme_bdev { +- struct spdk_bdev disk; +- uint32_t nsid; +- struct nvme_bdev_ctrlr *nbdev_ctrlr; +- pthread_mutex_t mutex; +- int ref; +- enum bdev_nvme_multipath_policy mp_policy; +- enum bdev_nvme_multipath_selector mp_selector; +- uint32_t rr_min_io; +- TAILQ_HEAD(, nvme_ns) nvme_ns_list; +- bool opal; +- TAILQ_ENTRY(nvme_bdev) tailq; +- struct nvme_error_stat *err_stat; +-}; +- +-struct nvme_qpair { +- struct nvme_ctrlr *ctrlr; +- struct spdk_nvme_qpair *qpair; +- struct nvme_poll_group *group; +- struct nvme_ctrlr_channel *ctrlr_ch; +- +- /* The following is used to update io_path cache of nvme_bdev_channels. */ +- TAILQ_HEAD(, nvme_io_path) io_path_list; +- +- TAILQ_ENTRY(nvme_qpair) tailq; +-}; +- +-struct nvme_ctrlr_channel { +- struct nvme_qpair *qpair; +- TAILQ_HEAD(, spdk_bdev_io) pending_resets; +- +- struct spdk_io_channel_iter *reset_iter; +- struct spdk_poller *connect_poller; +-}; +- +-struct nvme_io_path { +- struct nvme_ns *nvme_ns; +- struct nvme_qpair *qpair; +- STAILQ_ENTRY(nvme_io_path) stailq; +- +- /* The following are used to update io_path cache of the nvme_bdev_channel. */ +- struct nvme_bdev_channel *nbdev_ch; +- TAILQ_ENTRY(nvme_io_path) tailq; +- +- /* allocation of stat is decided by option io_path_stat of RPC bdev_nvme_set_options */ +- struct spdk_bdev_io_stat *stat; +-}; +- +-struct nvme_bdev_channel { +- struct nvme_io_path *current_io_path; +- enum bdev_nvme_multipath_policy mp_policy; +- enum bdev_nvme_multipath_selector mp_selector; +- uint32_t rr_min_io; +- uint32_t rr_counter; +- STAILQ_HEAD(, nvme_io_path) io_path_list; +- TAILQ_HEAD(retry_io_head, spdk_bdev_io) retry_io_list; +- struct spdk_poller *retry_io_poller; +-}; +- +-struct nvme_poll_group { +- struct spdk_nvme_poll_group *group; +- struct spdk_io_channel *accel_channel; +- struct spdk_poller *poller; +- bool collect_spin_stat; +- uint64_t spin_ticks; +- uint64_t start_ticks; +- uint64_t end_ticks; +- TAILQ_HEAD(, nvme_qpair) qpair_list; +-}; +- +-void nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path); +- +-struct nvme_ctrlr *nvme_ctrlr_get_by_name(const char *name); +- +-struct nvme_ctrlr *nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr, +- uint16_t cntlid); +- +-struct nvme_bdev_ctrlr *nvme_bdev_ctrlr_get_by_name(const char *name); +- +-typedef void (*nvme_bdev_ctrlr_for_each_fn)(struct nvme_bdev_ctrlr *nbdev_ctrlr, void *ctx); +- +-void nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx); +- +-void nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, +- struct spdk_json_write_ctx *w); +- +-void nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr); +- +-struct nvme_ns *nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid); +-struct nvme_ns *nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr); +-struct nvme_ns *nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns); +- +-struct nvme_bdev_io; +-struct nvme_probe_ctx; +- +-enum spdk_bdev_timeout_action { +- SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0, +- SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET, +- SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT, +-}; +- +-struct spdk_bdev_nvme_opts { +- enum spdk_bdev_timeout_action action_on_timeout; +- uint64_t timeout_us; +- uint64_t timeout_admin_us; +- uint32_t keep_alive_timeout_ms; +- /* The number of attempts per I/O in the transport layer before an I/O fails. */ +- uint32_t transport_retry_count; +- uint32_t arbitration_burst; +- uint32_t low_priority_weight; +- uint32_t medium_priority_weight; +- uint32_t high_priority_weight; +- uint64_t nvme_adminq_poll_period_us; +- uint64_t nvme_ioq_poll_period_us; +- uint32_t io_queue_requests; +- bool delay_cmd_submit; +- /* The number of attempts per I/O in the bdev layer before an I/O fails. */ +- int32_t bdev_retry_count; +- uint8_t transport_ack_timeout; +- int32_t ctrlr_loss_timeout_sec; +- uint32_t reconnect_delay_sec; +- uint32_t fast_io_fail_timeout_sec; +- bool disable_auto_failback; +- bool generate_uuids; +- /* Type of Service - RDMA only */ +- uint8_t transport_tos; +- bool nvme_error_stat; +- uint32_t rdma_srq_size; +- bool io_path_stat; +- bool allow_accel_sequence; +- uint32_t rdma_max_cq_size; +-}; +- +-struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch); +-void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts); +-int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts); +-int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx); +- +-void bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts); +- +-int bdev_nvme_create(struct spdk_nvme_transport_id *trid, +- const char *base_name, +- const char **names, +- uint32_t count, +- spdk_bdev_create_nvme_fn cb_fn, +- void *cb_ctx, +- struct spdk_nvme_ctrlr_opts *drv_opts, +- struct nvme_ctrlr_opts *bdev_opts, +- bool multipath); +- +-int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name, +- struct spdk_nvme_ctrlr_opts *drv_opts, struct nvme_ctrlr_opts *bdev_opts, +- uint64_t timeout, bool from_mdns, +- spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx); +-int bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, +- void *cb_ctx); +-void bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w); +- +-int bdev_nvme_start_mdns_discovery(const char *base_name, +- const char *svcname, +- struct spdk_nvme_ctrlr_opts *drv_opts, +- struct nvme_ctrlr_opts *bdev_opts); +-int bdev_nvme_stop_mdns_discovery(const char *name); +-void bdev_nvme_get_mdns_discovery_info(struct spdk_jsonrpc_request *request); +-void bdev_nvme_mdns_discovery_config_json(struct spdk_json_write_ctx *w); +- +-struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev); +- +-typedef void (*bdev_nvme_delete_done_fn)(void *ctx, int rc); +- +-/** +- * Delete NVMe controller with all bdevs on top of it, or delete the specified path +- * if there is any alternative path. Requires to pass name of NVMe controller. +- * +- * \param name NVMe controller name +- * \param path_id The specified path to remove (optional) +- * \param delete_done Callback function on delete complete (optional) +- * \param delete_done_ctx Context passed to callback (optional) +- * \return zero on success, +- * -EINVAL on wrong parameters or +- * -ENODEV if controller is not found or +- * -ENOMEM on no memory +- */ +-int bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id, +- bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx); +- +-enum nvme_ctrlr_op { +- NVME_CTRLR_OP_RESET = 1, +- NVME_CTRLR_OP_ENABLE, +- NVME_CTRLR_OP_DISABLE, +-}; +- +-/** +- * Perform specified operation on an NVMe controller. +- * +- * NOTE: The callback function is always called after this function returns except for +- * out of memory cases. +- * +- * \param nvme_ctrlr The specified NVMe controller to operate +- * \param op Operation code +- * \param cb_fn Function to be called back after operation completes +- * \param cb_arg Argument for callback function +- */ +-void nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op, +- bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg); +- +-/** +- * Perform specified operation on all NVMe controllers in an NVMe bdev controller. +- * +- * NOTE: The callback function is always called after this function returns except for +- * out of memory cases. +- * +- * \param nbdev_ctrlr The specified NVMe bdev controller to operate +- * \param op Operation code +- * \param cb_fn Function to be called back after operation completes +- * \param cb_arg Argument for callback function +- */ +-void nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op, +- bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg); +- +-typedef void (*bdev_nvme_set_preferred_path_cb)(void *cb_arg, int rc); +- +-/** +- * Set the preferred I/O path for an NVMe bdev in multipath mode. +- * +- * NOTE: This function does not support NVMe bdevs in failover mode. +- * +- * \param name NVMe bdev name +- * \param cntlid NVMe-oF controller ID +- * \param cb_fn Function to be called back after completion. +- * \param cb_arg Argument for callback function. +- */ +-void bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid, +- bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg); +- +-typedef void (*bdev_nvme_set_multipath_policy_cb)(void *cb_arg, int rc); +- +-/** +- * Set multipath policy of the NVMe bdev. +- * +- * \param name NVMe bdev name +- * \param policy Multipath policy (active-passive or active-active) +- * \param selector Multipath selector (round_robin, queue_depth) +- * \param rr_min_io Number of IO to route to a path before switching to another for round-robin +- * \param cb_fn Function to be called back after completion. +- */ +-void bdev_nvme_set_multipath_policy(const char *name, +- enum bdev_nvme_multipath_policy policy, +- enum bdev_nvme_multipath_selector selector, +- uint32_t rr_min_io, +- bdev_nvme_set_multipath_policy_cb cb_fn, +- void *cb_arg); +- +-#ifdef SPDK_CONFIG_APP_RW +-void +-bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl); +- +-void +-bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset); +- +-int +-bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length); +- +-bool +-probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, +- struct spdk_nvme_ctrlr_opts *opts); +- +-void +-attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, +- struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts); +- +-void +-remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); +- +-void +-nvme_ctrlr_populate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns, +- struct nvme_async_probe_ctx *ctx); +- +-void +-nvme_ctrlr_depopulate_namespace(struct nvme_bdev_ctrlr *ctrlr, struct nvme_bdev_ns *ns); +- +-int +-bdev_probe_ctrlr(void); +- +-struct nvme_bdev_io * +-nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt); +- +-struct nvme_probe_ctx * +-bdev_nvme_create_probe_ctx(struct spdk_nvme_transport_id *trid, const char *base_name, +- const char *hostnqn); +-#endif +- +-#endif /* SPDK_BDEV_NVME_H */ ++/* SPDX-License-Identifier: BSD-3-Clause ++ * Copyright (C) 2016 Intel Corporation. All rights reserved. ++ * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. ++ * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. ++ * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved. ++ */ ++ ++#ifndef SPDK_BDEV_NVME_H ++#define SPDK_BDEV_NVME_H ++ ++#include "spdk/stdinc.h" ++ ++#include "spdk/queue.h" ++#include "spdk/nvme.h" ++#include "spdk/bdev_module.h" ++#include "spdk/jsonrpc.h" ++ ++struct nvme_bdev_io; ++struct nvme_probe_ctx; ++ ++TAILQ_HEAD(nvme_bdev_ctrlrs, nvme_bdev_ctrlr); ++extern struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs; ++extern pthread_mutex_t g_bdev_nvme_mutex; ++extern bool g_bdev_nvme_module_finish; ++extern struct spdk_thread *g_bdev_nvme_init_thread; ++ ++enum bdev_nvme_multipath_policy { ++ BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE, ++ BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE, ++}; ++ ++enum bdev_nvme_multipath_selector { ++ BDEV_NVME_MP_SELECTOR_ROUND_ROBIN = 1, ++ BDEV_NVME_MP_SELECTOR_QUEUE_DEPTH, ++}; ++ ++typedef void (*spdk_bdev_create_nvme_fn)(void *ctx, size_t bdev_count, int rc); ++typedef void (*spdk_bdev_nvme_start_discovery_fn)(void *ctx, int status); ++typedef void (*spdk_bdev_nvme_stop_discovery_fn)(void *ctx); ++ ++struct nvme_ctrlr_opts { ++ uint32_t prchk_flags; ++ int32_t ctrlr_loss_timeout_sec; ++ uint32_t reconnect_delay_sec; ++ uint32_t fast_io_fail_timeout_sec; ++ bool from_discovery_service; ++ /* Path to the file containing PSK, used for dumping configuration. */ ++ char psk_path[PATH_MAX]; ++}; ++ ++struct nvme_async_probe_ctx { ++ struct spdk_nvme_probe_ctx *probe_ctx; ++ const char *base_name; ++ const char **names; ++ uint32_t max_bdevs; ++ uint32_t reported_bdevs; ++ struct spdk_poller *poller; ++ struct spdk_nvme_transport_id trid; ++ struct nvme_ctrlr_opts bdev_opts; ++ struct spdk_nvme_ctrlr_opts drv_opts; ++ spdk_bdev_create_nvme_fn cb_fn; ++ void *cb_ctx; ++ uint32_t populates_in_progress; ++ bool ctrlr_attached; ++ bool probe_done; ++ bool namespaces_populated; ++}; ++ ++struct nvme_ns { ++ uint32_t id; ++ struct spdk_nvme_ns *ns; ++ struct nvme_ctrlr *ctrlr; ++ struct nvme_bdev *bdev; ++ uint32_t ana_group_id; ++ enum spdk_nvme_ana_state ana_state; ++ bool ana_state_updating; ++ bool ana_transition_timedout; ++ struct spdk_poller *anatt_timer; ++ struct nvme_async_probe_ctx *probe_ctx; ++ TAILQ_ENTRY(nvme_ns) tailq; ++ RB_ENTRY(nvme_ns) node; ++ ++ /** ++ * record io path stat before destroyed. Allocation of stat is ++ * decided by option io_path_stat of RPC ++ * bdev_nvme_set_options ++ */ ++ struct spdk_bdev_io_stat *stat; ++}; ++ ++struct nvme_bdev_io; ++struct nvme_bdev_ctrlr; ++struct nvme_bdev; ++struct nvme_io_path; ++ ++struct nvme_path_id { ++ struct spdk_nvme_transport_id trid; ++ struct spdk_nvme_host_id hostid; ++ TAILQ_ENTRY(nvme_path_id) link; ++ uint64_t last_failed_tsc; ++}; ++ ++typedef void (*bdev_nvme_ctrlr_op_cb)(void *cb_arg, int rc); ++typedef void (*nvme_ctrlr_disconnected_cb)(struct nvme_ctrlr *nvme_ctrlr); ++ ++struct nvme_ctrlr { ++ /** ++ * points to pinned, physically contiguous memory region; ++ * contains 4KB IDENTIFY structure for controller which is ++ * target for CONTROLLER IDENTIFY command during initialization ++ */ ++ struct spdk_nvme_ctrlr *ctrlr; ++ struct nvme_path_id *active_path_id; ++ int ref; ++ ++ uint32_t resetting : 1; ++ uint32_t reconnect_is_delayed : 1; ++ uint32_t in_failover : 1; ++ uint32_t pending_failover : 1; ++ uint32_t fast_io_fail_timedout : 1; ++ uint32_t destruct : 1; ++ uint32_t ana_log_page_updating : 1; ++ uint32_t io_path_cache_clearing : 1; ++ uint32_t dont_retry : 1; ++ uint32_t disabled : 1; ++ ++ struct nvme_ctrlr_opts opts; ++ ++ RB_HEAD(nvme_ns_tree, nvme_ns) namespaces; ++ ++ struct spdk_opal_dev *opal_dev; ++ ++ struct spdk_poller *adminq_timer_poller; ++ struct spdk_thread *thread; ++ ++ bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn; ++ void *ctrlr_op_cb_arg; ++ /* Poller used to check for reset/detach completion */ ++ struct spdk_poller *reset_detach_poller; ++ struct spdk_nvme_detach_ctx *detach_ctx; ++ ++ uint64_t reset_start_tsc; ++ struct spdk_poller *reconnect_delay_timer; ++ ++ nvme_ctrlr_disconnected_cb disconnected_cb; ++ ++ /** linked list pointer for device list */ ++ TAILQ_ENTRY(nvme_ctrlr) tailq; ++ struct nvme_bdev_ctrlr *nbdev_ctrlr; ++ ++ TAILQ_HEAD(nvme_paths, nvme_path_id) trids; ++ ++ uint32_t max_ana_log_page_size; ++ struct spdk_nvme_ana_page *ana_log_page; ++ struct spdk_nvme_ana_group_descriptor *copied_ana_desc; ++ ++ struct nvme_async_probe_ctx *probe_ctx; ++ ++ pthread_mutex_t mutex; ++}; ++ ++struct nvme_bdev_ctrlr { ++ char *name; ++ TAILQ_HEAD(, nvme_ctrlr) ctrlrs; ++ TAILQ_HEAD(, nvme_bdev) bdevs; ++ TAILQ_ENTRY(nvme_bdev_ctrlr) tailq; ++}; ++ ++struct nvme_error_stat { ++ uint32_t status_type[8]; ++ uint32_t status[4][256]; ++}; ++ ++struct nvme_bdev { ++ struct spdk_bdev disk; ++ uint32_t nsid; ++ struct nvme_bdev_ctrlr *nbdev_ctrlr; ++ pthread_mutex_t mutex; ++ int ref; ++ enum bdev_nvme_multipath_policy mp_policy; ++ enum bdev_nvme_multipath_selector mp_selector; ++ uint32_t rr_min_io; ++ TAILQ_HEAD(, nvme_ns) nvme_ns_list; ++ bool opal; ++ TAILQ_ENTRY(nvme_bdev) tailq; ++ struct nvme_error_stat *err_stat; ++}; ++ ++struct nvme_qpair { ++ struct nvme_ctrlr *ctrlr; ++ struct spdk_nvme_qpair *qpair; ++ struct nvme_poll_group *group; ++ struct nvme_ctrlr_channel *ctrlr_ch; ++ ++ /* The following is used to update io_path cache of nvme_bdev_channels. */ ++ TAILQ_HEAD(, nvme_io_path) io_path_list; ++ ++ TAILQ_ENTRY(nvme_qpair) tailq; ++}; ++ ++struct nvme_ctrlr_channel { ++ struct nvme_qpair *qpair; ++ TAILQ_HEAD(, spdk_bdev_io) pending_resets; ++ ++ struct spdk_io_channel_iter *reset_iter; ++ struct spdk_poller *connect_poller; ++}; ++ ++struct nvme_io_path { ++ struct nvme_ns *nvme_ns; ++ struct nvme_qpair *qpair; ++ STAILQ_ENTRY(nvme_io_path) stailq; ++ ++ /* The following are used to update io_path cache of the nvme_bdev_channel. */ ++ struct nvme_bdev_channel *nbdev_ch; ++ TAILQ_ENTRY(nvme_io_path) tailq; ++ ++ /* allocation of stat is decided by option io_path_stat of RPC bdev_nvme_set_options */ ++ struct spdk_bdev_io_stat *stat; ++}; ++ ++struct nvme_bdev_channel { ++ struct nvme_io_path *current_io_path; ++ enum bdev_nvme_multipath_policy mp_policy; ++ enum bdev_nvme_multipath_selector mp_selector; ++ uint32_t rr_min_io; ++ uint32_t rr_counter; ++ STAILQ_HEAD(, nvme_io_path) io_path_list; ++ TAILQ_HEAD(retry_io_head, spdk_bdev_io) retry_io_list; ++ struct spdk_poller *retry_io_poller; ++ int state; ++}; ++ ++struct nvme_poll_group { ++ struct spdk_nvme_poll_group *group; ++ struct spdk_io_channel *accel_channel; ++ struct spdk_poller *poller; ++ bool collect_spin_stat; ++ uint64_t spin_ticks; ++ uint64_t start_ticks; ++ uint64_t end_ticks; ++#ifdef SPDK_CONFIG_APP_RW ++ uint64_t save_start_ticks; ++ uint64_t num_poll_timeout; ++#endif ++ TAILQ_HEAD(, nvme_qpair) qpair_list; ++}; ++ ++void nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path); ++ ++struct nvme_ctrlr *nvme_ctrlr_get_by_name(const char *name); ++ ++struct nvme_ctrlr *nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr, ++ uint16_t cntlid); ++ ++struct nvme_bdev_ctrlr *nvme_bdev_ctrlr_get_by_name(const char *name); ++ ++typedef void (*nvme_bdev_ctrlr_for_each_fn)(struct nvme_bdev_ctrlr *nbdev_ctrlr, void *ctx); ++ ++void nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx); ++ ++void nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, ++ struct spdk_json_write_ctx *w); ++ ++void nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr); ++ ++struct nvme_ns *nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid); ++struct nvme_ns *nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr); ++struct nvme_ns *nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns); ++ ++enum spdk_bdev_timeout_action { ++ SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0, ++ SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET, ++ SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT, ++}; ++ ++struct spdk_bdev_nvme_opts { ++ enum spdk_bdev_timeout_action action_on_timeout; ++ uint64_t timeout_us; ++ uint64_t timeout_admin_us; ++ uint32_t keep_alive_timeout_ms; ++ /* The number of attempts per I/O in the transport layer before an I/O fails. */ ++ uint32_t transport_retry_count; ++ uint32_t arbitration_burst; ++ uint32_t low_priority_weight; ++ uint32_t medium_priority_weight; ++ uint32_t high_priority_weight; ++ uint64_t nvme_adminq_poll_period_us; ++ uint64_t nvme_ioq_poll_period_us; ++ uint32_t io_queue_requests; ++ bool delay_cmd_submit; ++ /* The number of attempts per I/O in the bdev layer before an I/O fails. */ ++ int32_t bdev_retry_count; ++ uint8_t transport_ack_timeout; ++ int32_t ctrlr_loss_timeout_sec; ++ uint32_t reconnect_delay_sec; ++ uint32_t fast_io_fail_timeout_sec; ++ bool disable_auto_failback; ++ bool generate_uuids; ++ /* Type of Service - RDMA only */ ++ uint8_t transport_tos; ++ bool nvme_error_stat; ++ uint32_t rdma_srq_size; ++ bool io_path_stat; ++ bool allow_accel_sequence; ++ uint32_t rdma_max_cq_size; ++}; ++ ++struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch); ++void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts); ++int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts); ++int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx); ++ ++void bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts); ++ ++int bdev_nvme_create(struct spdk_nvme_transport_id *trid, ++ const char *base_name, ++ const char **names, ++ uint32_t count, ++ spdk_bdev_create_nvme_fn cb_fn, ++ void *cb_ctx, ++ struct spdk_nvme_ctrlr_opts *drv_opts, ++ struct nvme_ctrlr_opts *bdev_opts, ++ bool multipath); ++ ++int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name, ++ struct spdk_nvme_ctrlr_opts *drv_opts, struct nvme_ctrlr_opts *bdev_opts, ++ uint64_t timeout, bool from_mdns, ++ spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx); ++int bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, ++ void *cb_ctx); ++void bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w); ++ ++int bdev_nvme_start_mdns_discovery(const char *base_name, ++ const char *svcname, ++ struct spdk_nvme_ctrlr_opts *drv_opts, ++ struct nvme_ctrlr_opts *bdev_opts); ++int bdev_nvme_stop_mdns_discovery(const char *name); ++void bdev_nvme_get_mdns_discovery_info(struct spdk_jsonrpc_request *request); ++void bdev_nvme_mdns_discovery_config_json(struct spdk_json_write_ctx *w); ++ ++struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev); ++ ++typedef void (*bdev_nvme_delete_done_fn)(void *ctx, int rc); ++ ++/** ++ * Delete NVMe controller with all bdevs on top of it, or delete the specified path ++ * if there is any alternative path. Requires to pass name of NVMe controller. ++ * ++ * \param name NVMe controller name ++ * \param path_id The specified path to remove (optional) ++ * \param delete_done Callback function on delete complete (optional) ++ * \param delete_done_ctx Context passed to callback (optional) ++ * \return zero on success, ++ * -EINVAL on wrong parameters or ++ * -ENODEV if controller is not found or ++ * -ENOMEM on no memory ++ */ ++int bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id, ++ bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx); ++ ++enum nvme_ctrlr_op { ++ NVME_CTRLR_OP_RESET = 1, ++ NVME_CTRLR_OP_ENABLE, ++ NVME_CTRLR_OP_DISABLE, ++}; ++ ++/** ++ * Perform specified operation on an NVMe controller. ++ * ++ * NOTE: The callback function is always called after this function returns except for ++ * out of memory cases. ++ * ++ * \param nvme_ctrlr The specified NVMe controller to operate ++ * \param op Operation code ++ * \param cb_fn Function to be called back after operation completes ++ * \param cb_arg Argument for callback function ++ */ ++void nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op, ++ bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg); ++ ++/** ++ * Perform specified operation on all NVMe controllers in an NVMe bdev controller. ++ * ++ * NOTE: The callback function is always called after this function returns except for ++ * out of memory cases. ++ * ++ * \param nbdev_ctrlr The specified NVMe bdev controller to operate ++ * \param op Operation code ++ * \param cb_fn Function to be called back after operation completes ++ * \param cb_arg Argument for callback function ++ */ ++void nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op, ++ bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg); ++ ++typedef void (*bdev_nvme_set_preferred_path_cb)(void *cb_arg, int rc); ++ ++/** ++ * Set the preferred I/O path for an NVMe bdev in multipath mode. ++ * ++ * NOTE: This function does not support NVMe bdevs in failover mode. ++ * ++ * \param name NVMe bdev name ++ * \param cntlid NVMe-oF controller ID ++ * \param cb_fn Function to be called back after completion. ++ * \param cb_arg Argument for callback function. ++ */ ++void bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid, ++ bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg); ++ ++typedef void (*bdev_nvme_set_multipath_policy_cb)(void *cb_arg, int rc); ++ ++/** ++ * Set multipath policy of the NVMe bdev. ++ * ++ * \param name NVMe bdev name ++ * \param policy Multipath policy (active-passive or active-active) ++ * \param selector Multipath selector (round_robin, queue_depth) ++ * \param rr_min_io Number of IO to route to a path before switching to another for round-robin ++ * \param cb_fn Function to be called back after completion. ++ */ ++void bdev_nvme_set_multipath_policy(const char *name, ++ enum bdev_nvme_multipath_policy policy, ++ enum bdev_nvme_multipath_selector selector, ++ uint32_t rr_min_io, ++ bdev_nvme_set_multipath_policy_cb cb_fn, ++ void *cb_arg); ++ ++#ifdef SPDK_CONFIG_APP_RW ++ ++struct nvme_bdev_io { ++ /** array of iovecs to transfer. */ ++ struct iovec *iovs; ++ ++ /** Number of iovecs in iovs array. */ ++ int iovcnt; ++ ++ /** Current iovec position. */ ++ int iovpos; ++ ++ /** Offset in current iovec. */ ++ uint32_t iov_offset; ++ ++ /** I/O path the current I/O or admin passthrough is submitted on, or the I/O path ++ * being reset in a reset I/O. ++ */ ++ struct nvme_io_path *io_path; ++ ++ /** array of iovecs to transfer. */ ++ struct iovec *fused_iovs; ++ ++ /** Number of iovecs in iovs array. */ ++ int fused_iovcnt; ++ ++ /** Current iovec position. */ ++ int fused_iovpos; ++ ++ /** Offset in current iovec. */ ++ uint32_t fused_iov_offset; ++ ++ /** Saved status for admin passthru completion event, PI error verification, or intermediate compare-and-write status */ ++ struct spdk_nvme_cpl cpl; ++ ++ /** Extended IO opts passed by the user to bdev layer and mapped to NVME format */ ++ struct spdk_nvme_ns_cmd_ext_io_opts ext_opts; ++ ++ /** Keeps track if first of fused commands was submitted */ ++ bool first_fused_submitted; ++ ++ /** Keeps track if first of fused commands was completed */ ++ bool first_fused_completed; ++ ++ /** Temporary pointer to zone report buffer */ ++ struct spdk_nvme_zns_zone_report *zone_report_buf; ++ ++ /** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */ ++ uint64_t handled_zones; ++ ++ /** Expiration value in ticks to retry the current I/O. */ ++ uint64_t retry_ticks; ++ ++ /* How many times the current I/O was retried. */ ++ int32_t retry_count; ++ ++ /* Current tsc at submit time. */ ++ uint64_t submit_tsc; ++}; ++ ++void ++bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl); ++ ++void ++bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset); ++ ++int ++bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length); ++ ++bool ++probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, ++ struct spdk_nvme_ctrlr_opts *opts); ++ ++void ++attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, ++ struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts); ++ ++void ++remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); ++ ++ ++void ++nvme_ctrlr_populate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns); ++ ++void ++nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns); ++ ++int ++bdev_probe_ctrlr(void); ++ ++struct nvme_bdev_io * ++nvme_bdev_io_update_args(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt); ++ ++struct nvme_probe_ctx * ++bdev_nvme_create_probe_ctx(struct spdk_nvme_transport_id *trid, const char *base_name, ++ const char *hostnqn); ++ ++struct nvme_ctrlr *nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid); ++ ++struct nvme_ns *nvme_ns_alloc(void); ++#endif ++ ++#endif /* SPDK_BDEV_NVME_H */ +diff --git a/module/bdev/nvme/bdev_nvme_self.c b/module/bdev/nvme/bdev_nvme_self.c +index dc480ff..781eb58 100644 +--- a/module/bdev/nvme/bdev_nvme_self.c ++++ b/module/bdev/nvme/bdev_nvme_self.c +@@ -1,632 +1,650 @@ +-/* +- * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License version 2 and +- * only version 2 as published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- */ +-#include "bdev_nvme.h" +- +-#include "spdk/json.h" +-#include "spdk/likely.h" +-#include "spdk/bdev_module.h" +-#include "spdk/nvme.h" +- +-#include "spdk_internal/bdev_stat.h" +-#include "bdev_nvme_self.h" +-#include "common.h" +-#include +- +-void bdev_update_ch_timeout(struct nvme_bdev_poll_group *group) +-{ +- uint64_t current_ticks = 0; +- uint64_t poll_ticks = 0; +- int64_t poll_time = 0; +- +- current_ticks = spdk_get_ticks(); +- +- if (spdk_unlikely(g_polltime_threshold)) { +- if (group->save_start_ticks) { +- poll_ticks = current_ticks - group->save_start_ticks; +- poll_time = (poll_ticks * 1000ULL) / spdk_get_ticks_hz(); +- if (poll_time >= g_polltime_threshold) { +- group->num_poll_timeout++; +- SPDK_WARNLOG("group[%p] poll timeout in %ldms", group, poll_time); +- } +- } +- group->save_start_ticks = current_ticks; +- } +-} +- +-int _bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +-{ +- struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); +- +- if (nvme_ch->qpair == NULL) { +- /* The device is currently resetting */ +- return -1; +- } +- +- switch (bdev_io->type) { +- case SPDK_BDEV_IO_TYPE_READ_NVME: +- SPDK_DEBUGLOG(bdev_nvme, "read %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, +- bdev_io->u.contig.offset_blocks); +- return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, +- bdev_io->driver_ctx, bdev_io->u.contig.buf, +- bdev_io->u.contig.md_buf, SPDK_BDEV_IO_TYPE_READ, +- bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); +- case SPDK_BDEV_IO_TYPE_WRITE_NVME: +- SPDK_DEBUGLOG(bdev_nvme, "write %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, +- bdev_io->u.contig.offset_blocks); +- return bdev_nvme_queue_cmd_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, +- bdev_io->driver_ctx, bdev_io->u.contig.buf, +- bdev_io->u.contig.md_buf, SPDK_BDEV_IO_TYPE_WRITE, +- bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); +- case SPDK_BDEV_IO_TYPE_READV_NVME: +- SPDK_DEBUGLOG(bdev_nvme, "readv %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, +- bdev_io->u.bdev.offset_blocks); +- return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, +- bdev_io->driver_ctx, SPDK_BDEV_IO_TYPE_READ, +- bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, +- bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); +- case SPDK_BDEV_IO_TYPE_WRITEV_NVME: +- SPDK_DEBUGLOG(bdev_nvme, "writev %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, +- bdev_io->u.bdev.offset_blocks); +- return bdev_nvme_queue_cmd_v_with_md((struct nvme_bdev *)bdev_io->bdev->ctxt, nvme_ch->qpair, +- bdev_io->driver_ctx, SPDK_BDEV_IO_TYPE_WRITE, +- bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, +- bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); +- case SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS: +- return bdev_nvme_unmap_blocks((struct nvme_bdev *)bdev_io->bdev->ctxt, +- ch, +- (void *)bdev_io->driver_ctx, +- (struct spdk_nvme_dsm_range *)bdev_io->u.contig.buf, +- bdev_io->u.contig.num_blocks); +- default: +- return -EINVAL; +- } +- return 0; +-} +- +-int bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w) +-{ +- return 0; +-} +- +-uint16_t bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) +-{ +- struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); +- uint16_t channel_id; +- struct spdk_nvme_qpair *qpair = nvme_ch->qpair; +- channel_id = spdk_nvme_get_qpair_id(qpair); +- return channel_id; +-} +- +-uint64_t bdev_nvme_get_timeout_count(struct spdk_io_channel *ch) +-{ +- struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); +- return nvme_ch->group->num_poll_timeout; +-} +- +-int32_t nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) +-{ +- uint32_t num_ctrlr = 0, i = 0; +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; +- struct nvme_ctrlr_info *pCtrlrInfo = NULL; +- const struct spdk_nvme_ctrlr_data *cdata = NULL; +- struct spdk_nvme_ctrlr_opts *opts = NULL; +- +- struct spdk_pci_device *pci_dev = NULL; +- int rc; +- +- TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- num_ctrlr++; +- } +- if (num_ctrlr == 0) { +- SPDK_NOTICELOG("No any nvme controller.\n"); +- return 0; +- } +- if (ctrlName != NULL) { +- num_ctrlr = 1; +- } +- pCtrlrInfo = calloc(num_ctrlr, sizeof(struct nvme_ctrlr_info)); +- if (pCtrlrInfo == NULL) { +- SPDK_ERRLOG("Failed to alloc memory for getting controller infomation.\n"); +- return -1; +- } +- TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- if (i >= num_ctrlr) { /* prevent having controllers be added or deleted */ +- i++; +- continue; +- } +- if (ctrlName != NULL) { +- if (strcmp(nvme_bdev_ctrlr->name, ctrlName) != 0) { +- continue; +- } +- } +- cdata = spdk_nvme_ctrlr_get_data(nvme_bdev_ctrlr->ctrlr); +- opts = spdk_nvme_ctrlr_get_opts(nvme_bdev_ctrlr->ctrlr); +- pci_dev = spdk_nvme_ctrlr_get_pci_device(nvme_bdev_ctrlr->ctrlr); +- if (pci_dev == NULL) { +- SPDK_ERRLOG("Failed to get pci device\n"); +- break; +- } +- rc = strcpy_s(pCtrlrInfo[i].ctrlName, sizeof(pCtrlrInfo[i].ctrlName), nvme_bdev_ctrlr->name); +- if (rc != 0) { +- SPDK_ERRLOG("String copy failed\n"); +- } +- rc = strcpy_s(pCtrlrInfo[i].pciAddr, sizeof(pCtrlrInfo[i].pciAddr), +- nvme_bdev_ctrlr->connected_trid->traddr); +- if (rc != 0) { +- SPDK_ERRLOG("String copy failed\n"); +- } +- +- rc = memcpy_s(pCtrlrInfo[i].sn, sizeof(pCtrlrInfo[i].sn), cdata->sn, 20); +- if (rc != 0) { +- SPDK_ERRLOG("Memory copy failed\n"); +- } +- +- rc = memcpy_s(pCtrlrInfo[i].fr, sizeof(pCtrlrInfo[i].fr), cdata->fr, 8); +- if (rc != 0) { +- SPDK_ERRLOG("Memory copy failed\n"); +- } +- +- rc = memcpy_s(pCtrlrInfo[i].mn, sizeof(pCtrlrInfo[i].mn), cdata->mn, 40); +- if (rc != 0) { +- SPDK_ERRLOG("Memory copy failed\n"); +- } +- +- pCtrlrInfo[i].trtype = (uint16_t)nvme_bdev_ctrlr->connected_trid->trtype; +- pCtrlrInfo[i].tnvmcap = cdata->tnvmcap[0]; +- pCtrlrInfo[i].unvmcap = cdata->unvmcap[0]; +- pCtrlrInfo[i].support_ns = cdata->oacs.ns_manage; +- pCtrlrInfo[i].directives = cdata->oacs.directives; +- pCtrlrInfo[i].dsm = cdata->oncs.dsm; +- pCtrlrInfo[i].max_num_ns = cdata->nn; +- pCtrlrInfo[i].num_io_queues = opts->num_io_queues; +- pCtrlrInfo[i].io_queue_size = opts->io_queue_size; +- pCtrlrInfo[i].device_id = spdk_pci_device_get_device_id(pci_dev); +- pCtrlrInfo[i].subdevice_id = spdk_pci_device_get_subdevice_id(pci_dev); +- pCtrlrInfo[i].vid = cdata->vid; +- pCtrlrInfo[i].ssvid = cdata->ssvid; +- pCtrlrInfo[i].ctrlid = cdata->cntlid; +- pCtrlrInfo[i].version = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev_ctrlr->ctrlr).raw; +- i++; +- if (ctrlName != NULL) { +- break; +- } +- } +- if (i != num_ctrlr) { +- SPDK_ERRLOG("It has controller been added or deleted when fetched infomation, please try again later.\n"); +- free(pCtrlrInfo); +- return -1; +- } +- *ppCtrlr = pCtrlrInfo; +- return num_ctrlr; +-} +- +-struct nvme_bdev_ctrlr *nvme_ctrlr_get_by_name(const char *name) +-{ +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; +- +- if (name == NULL) { +- return NULL; +- } +- +- TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- if (strcmp(name, nvme_bdev_ctrlr->name) == 0) { +- return nvme_bdev_ctrlr; +- } +- } +- +- return NULL; +-} +- +-struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_name(const char *ctrlname) +-{ +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; +- +- TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- if (strcmp(nvme_bdev_ctrlr->name, ctrlname) == 0) { +- return nvme_bdev_ctrlr->ctrlr; +- } +- } +- +- return NULL; +-} +- +-struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nvme_bdev_ctrlr) +-{ +- if (nvme_bdev_ctrlr == NULL) { +- return NULL; +- } +- return nvme_bdev_ctrlr->ctrlr; +-} +- +-void nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) +-{ +- int i; +- size_t size = strnlen(ctrlname, 24); +- +- for (i = 0; i < STAT_MAX_NUM; i++) { +- if (strncmp(g_io_stat_map[i].bdev_name, ctrlname, size) == 0) { +- if ((g_io_stat_map[i].bdev_name[size] == 'n') && isdigit(g_io_stat_map[i].bdev_name[size + 1])) { +- g_io_stat_map[i].channel_id = 0; +- memset(g_io_stat_map[i].bdev_name, 0, sizeof(g_io_stat_map[i].bdev_name)); +- g_io_stat_map[i].num_read_ops = 0; +- g_io_stat_map[i].num_write_ops = 0; +- g_io_stat_map[i].bytes_read = 0; +- g_io_stat_map[i].bytes_written = 0; +- g_io_stat_map[i].io_outstanding = 0; +- g_io_stat_map[i].read_latency_ticks = 0; +- g_io_stat_map[i].write_latency_ticks = 0; +- g_io_stat_map[i].io_ticks = 0; +- /* used flag set false in last avoid race in channel create */ +- g_io_stat_map[i].used = false; +- } +- } +- } +-} +- +-void nvme_ctrlr_clear_iostat_all(void) +-{ +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; +- +- TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- nvme_ctrlr_clear_iostat_by_name(nvme_bdev_ctrlr->name); +- } +-} +- +-struct spdk_nvme_ns *bdev_nvme_get_ns(struct nvme_bdev *nbdev) +-{ +- return nbdev->nvme_ns->ns; +-} +- +-void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr) +-{ +- uint32_t i; +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; +- struct nvme_bdev_ns *ns = NULL; +- struct nvme_bdev *nvme_bdev = NULL, *tmp = NULL; +- +- +- pthread_mutex_lock(&g_bdev_nvme_mutex); +- TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- if (nvme_bdev_ctrlr->ctrlr != ctrlr) { +- continue; +- } +- +- pthread_mutex_unlock(&g_bdev_nvme_mutex); +- for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { +- ns = nvme_bdev_ctrlr->namespaces[i]; +- TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { +- nvme_bdev->disk.blocklen = spdk_nvme_ns_get_sector_size(nvme_bdev->nvme_ns->ns); +- nvme_bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(nvme_bdev->nvme_ns->ns); +- } +- } +- return; +- } +- pthread_mutex_unlock(&g_bdev_nvme_mutex); +-} +- +-int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) +-{ +- struct nvme_bdev_ns *ns = NULL; +- bool ns_active = spdk_nvme_ctrlr_is_active_ns(nvme_bdev_ctrlr->ctrlr, nsid); +- +- if (nvme_bdev_ctrlr == NULL || nsid > nvme_bdev_ctrlr->num_ns) { +- SPDK_ERRLOG("Parameter error. nsid[%u], the max nsid is[%u]\n", nsid, nvme_bdev_ctrlr->num_ns); +- return -1; +- } +- +- ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; +- ns->type = NVME_BDEV_NS_STANDARD; +- +- if (!ns->populated && ns_active) { +- SPDK_NOTICELOG("NSID %u to be added\n", nsid); +- ns->id = nsid; +- ns->ctrlr = nvme_bdev_ctrlr; +- TAILQ_INIT(&ns->bdevs); +- /* add a new bdev device in this ns */ +- nvme_ctrlr_populate_namespace(nvme_bdev_ctrlr, ns, NULL); +- return 0; +- } +- +- if (ns->populated && !ns_active) { +- SPDK_NOTICELOG("NSID %u is removed\n", nsid); +- nvme_ctrlr_depopulate_namespace(nvme_bdev_ctrlr, ns); +- return 0; +- } +- +- return 0; +-} +- +-bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) +-{ +- struct nvme_bdev_ns *ns = NULL; +- struct nvme_bdev *bdev = NULL, *tmp = NULL; +- bool empty = false; +- +- ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; +- if (ns == NULL) { +- return true; +- } +- +- TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { +- pthread_mutex_lock(&bdev->disk.internal.mutex); +- empty = TAILQ_EMPTY(&bdev->disk.internal.open_descs); +- /* for each bdev in ns, we need to check if any descs is in tailq */ +- if (empty) { +- /* one bdev is empty, check next until all bdev is checked */ +- bdev->disk.internal.ns_status = SPDK_BDEV_NS_STATUS_REMOVING; +- pthread_mutex_unlock(&bdev->disk.internal.mutex); +- } else { +- /* means at least one bdev is used, so we just quit this process +- and mark the status is false. */ +- pthread_mutex_unlock(&bdev->disk.internal.mutex); +- break; +- } +- } +- return empty; +-} +- +-void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) +-{ +- struct nvme_bdev_ns *ns = NULL; +- struct nvme_bdev *bdev = NULL, *tmp = NULL; +- +- ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; +- if (ns == NULL) { +- return; +- } +- +- TAILQ_FOREACH_SAFE(bdev, &ns->bdevs, tailq, tmp) { +- pthread_mutex_lock(&bdev->disk.internal.mutex); +- /* set the ns_status to ready case ns delete fail */ +- if (bdev->disk.internal.ns_status == SPDK_BDEV_NS_STATUS_REMOVING) { +- bdev->disk.internal.ns_status = SPDK_BDEV_NS_STATUS_READY; +- } +- pthread_mutex_unlock(&bdev->disk.internal.mutex); +- } +-} +- +-static void check_error_type(int rc, bool read, void *qpair) +-{ +- if (rc == -ENOMEM) { +- SPDK_NOTICELOG("%s failed: rc = %d\n", read ? "read" : "write", rc); +- } else if (rc < 0) { +- SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", read ? "read" : "write", rc, qpair); +- } +-} +- +-int bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, +- void *driver_ctx, +- void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba) +-{ +- int rc; +- uint32_t io_flags = 0; +- uint8_t *bdev_io_action = (uint8_t *)driver_ctx; +- /* filter bit 0&1 of io->pi_action to get pi_action */ +- uint8_t pi_action = bdev_io_action[SPDK_BDEV_IO_ACTION_PI] & 0x03; +- uint8_t dif_flag = bdev_io_action[SPDK_BDEV_IO_ACTION_PI]; +- uint8_t fua = bdev_io_action[SPDK_BDEV_IO_ACTION_FUA]; +- uint32_t pi_type; +- +- spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); +- +- if (pi_action > IO_NO_PROTECTION) { +- pi_type = spdk_nvme_ns_get_pi_type(bdev->nvme_ns->ns); +- if (dif_flag & FLAG_PRCHK) { +- io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; +- } +- /* type3 not support ref tag */ +- if (!(dif_flag & FLAG_NO_REF) && (pi_type != SPDK_NVME_FMT_NVM_PROTECTION_TYPE3)) { +- io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; +- } +- if (pi_action == IO_HALF_WAY_PROTECTION) { +- io_flags |= SPDK_NVME_IO_FLAGS_PRACT; +- } +- } +- +- if (fua) { +- io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; +- } +- +- if (direction == SPDK_BDEV_IO_TYPE_READ) { +- rc = spdk_nvme_ns_cmd_read_with_md(bdev->nvme_ns->ns, qpair, buffer, metadata, lba, +- lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); +- } else { +- rc = spdk_nvme_ns_cmd_write_with_md(bdev->nvme_ns->ns, qpair, buffer, metadata, lba, +- lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); +- } +- +- check_error_type(rc, direction == SPDK_BDEV_IO_TYPE_READ, qpair); +- return rc; +-} +- +-int bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, +- void *driver_ctx, +- int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) +-{ +- int rc; +- struct nvme_bdev_io *bio = NULL; +- uint32_t io_flags = 0; +- uint8_t *bdev_io_action = (uint8_t *)driver_ctx; +- /* filter bit 0&1 of io->pi_action to get pi_action */ +- uint8_t pi_action = bdev_io_action[SPDK_BDEV_IO_ACTION_PI] & 0x03; +- uint8_t dif_flag = bdev_io_action[SPDK_BDEV_IO_ACTION_PI]; +- uint8_t fua = bdev_io_action[SPDK_BDEV_IO_ACTION_FUA]; +- uint32_t pi_type; +- +- spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); +- +- if (pi_action > IO_NO_PROTECTION) { +- pi_type = spdk_nvme_ns_get_pi_type(bdev->nvme_ns->ns); +- if (dif_flag & FLAG_PRCHK) { +- io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; +- } +- /* type3 not support ref tag */ +- if (!(dif_flag & FLAG_NO_REF) && (pi_type != SPDK_NVME_FMT_NVM_PROTECTION_TYPE3)) { +- io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; +- } +- if (pi_action == IO_HALF_WAY_PROTECTION) { +- io_flags |= SPDK_NVME_IO_FLAGS_PRACT; +- } +- } +- +- if (fua) { +- io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; +- } +- +- bio = nvme_bdev_io_update_args((struct nvme_bdev_io *)driver_ctx, iov, iovcnt); +- +- if (direction == SPDK_BDEV_IO_TYPE_READ) { +- rc = spdk_nvme_ns_cmd_readv(bdev->nvme_ns->ns, qpair, lba, +- lba_count, bdev_nvme_queued_done, bio, io_flags, +- bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); +- } else { +- rc = spdk_nvme_ns_cmd_writev(bdev->nvme_ns->ns, qpair, lba, lba_count, +- bdev_nvme_queued_done, bio, io_flags, +- bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); +- } +- +- check_error_type(rc, direction == SPDK_BDEV_IO_TYPE_READ, qpair); +- return rc; +-} +- +-struct nvme_bdev_ctrlr *bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) +-{ +- struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(bdev_desc); +- struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev->ctxt; +- if (nbdev == NULL) { +- return NULL; +- } +- return nbdev->nvme_ns->ctrlr; +-} +- +-int bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, +- struct spdk_nvme_dsm_range *unmap_d, uint32_t unmap_count) +-{ +- struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch); +- uint32_t i; +- +- if (unmap_count == 0 || unmap_count > SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES) { +- SPDK_ERRLOG("Invalid parameter, unmap count: %u\n", unmap_count); +- return -EINVAL; +- } +- +- if (unmap_d == NULL) { +- return -EINVAL; +- } +- +- for (i = 0; i < unmap_count; i++) { +- if (unmap_d[i].length > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) { +- SPDK_ERRLOG("Invalid parameter, unmap block count: %u\n", unmap_d[i].length); +- return -EINVAL; +- } +- unmap_d[i].attributes.raw = 0; +- } +- +- spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); +- return spdk_nvme_ns_cmd_dataset_management(nbdev->nvme_ns->ns, nvme_ch->qpair, +- SPDK_NVME_DSM_ATTR_DEALLOCATE, +- unmap_d, unmap_count, +- bdev_nvme_queued_done, driver_ctx); +-} +- +-void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr) +-{ +- remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); +-} +- +-void spdk_bdev_fail_ctrlr(void *cb_ctx, void *ctrlr) +-{ +- spdk_nvme_ctrlr_fail_hotplug((struct spdk_nvme_ctrlr *)ctrlr); +- remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); +-} +- +-struct spdk_nvme_ctrlr *spdk_nvme_bdev_ctrlr_get(char *pci_trid) +-{ +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; +- +- TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { +- if (strcmp(nvme_bdev_ctrlr->connected_trid->traddr, pci_trid) == 0) { +- return nvme_bdev_ctrlr->ctrlr; +- } +- } +- +- return NULL; +-} +- +-int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, +- const char *base_name, +- const char **names, size_t *count, +- const char *hostnqn) +-{ +- struct nvme_probe_ctx *probe_ctx; +- struct nvme_bdev_ctrlr *nvme_bdev_ctrlr; +- struct nvme_bdev_ns *ns; +- struct nvme_bdev *nvme_bdev; +- struct nvme_bdev *tmp = NULL; +- uint32_t i, nsid; +- size_t j; +- +- if (nvme_bdev_ctrlr_get(trid) != NULL) { +- SPDK_ERRLOG("A controller with the trid (traddr: %s) already exists.\n", trid->traddr); +- return -1; +- } +- +- probe_ctx = bdev_nvme_create_probe_ctx(trid, base_name, hostnqn); +- if (probe_ctx == NULL) { +- SPDK_ERRLOG("Failed to create probe_ctx\n"); +- return -1; +- } +- +- if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, remove_cb)) { +- SPDK_ERRLOG("Failed to probe for new devices\n"); +- free(probe_ctx); +- return -1; +- } +- +- nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid); +- if (!nvme_bdev_ctrlr) { +- SPDK_ERRLOG("Failed to find new NVMe controller\n"); +- free(probe_ctx); +- return -1; +- } +- +- /* +- * Report the new bdevs that were created in this call. +- * There can be more than one bdev per NVMe controller since one bdev is created per namespace. +- */ +- j = 0; +- for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) { +- nsid = i + 1; +- ns = nvme_bdev_ctrlr->namespaces[nsid - 1]; +- +- if (!ns->populated) { +- continue; +- } +- assert(ns->id == nsid); +- TAILQ_FOREACH_SAFE(nvme_bdev, &ns->bdevs, tailq, tmp) { +- if (j < *count) { +- j++; +- names[j] = nvme_bdev->disk.name; +- } else { +- SPDK_ERRLOG("Maximum number of namespaces is %zu.", *count); +- free(probe_ctx); +- return -1; +- } +- } +- } +- +- *count = j; +- +- free(probe_ctx); +- return 0; +-} ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#define SECUREC_NEED_ERRNO_TYPE ++#include ++#include "bdev_nvme.h" ++ ++#include "spdk/json.h" ++#include "spdk/likely.h" ++#include "spdk/bdev_module.h" ++#include "bdev_nvme.h" ++#include "spdk/nvme.h" ++#include "spdk/tree.h" ++ ++#include "spdk_internal/bdev_stat.h" ++#include "bdev_nvme_self.h" ++ ++void bdev_update_ch_timeout(struct nvme_poll_group *group) ++{ ++ uint64_t current_ticks = 0; ++ uint64_t poll_ticks = 0; ++ int64_t poll_time = 0; ++ ++ current_ticks = spdk_get_ticks(); ++ ++ if (spdk_unlikely(g_polltime_threshold)) { ++ if (group->save_start_ticks) { ++ poll_ticks = current_ticks - group->save_start_ticks; ++ poll_time = (poll_ticks * 1000ULL) / spdk_get_ticks_hz(); ++ if (poll_time >= g_polltime_threshold) { ++ group->num_poll_timeout++; ++ SPDK_NOTICELOG("group[%p] poll timeout in %ldms", group, poll_time); ++ } ++ } ++ group->save_start_ticks = current_ticks; ++ } ++} ++ ++int _bdev_nvme_submit_request_self(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io) ++{ ++ struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx; ++ ++ if (nbdev_io->io_path->qpair->qpair == NULL) { ++ /* The device is currently resetting */ ++ return -1; ++ } ++ ++ switch (bdev_io->type) { ++ case SPDK_BDEV_IO_TYPE_READ_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "read %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, ++ bdev_io->u.contig.offset_blocks); ++ return bdev_nvme_queue_cmd_with_md(nbdev_io->io_path->nvme_ns->ns, nbdev_io->io_path->qpair->qpair, ++ bdev_io->driver_ctx, bdev_io->u.contig.buf, ++ bdev_io->u.contig.md_buf, SPDK_BDEV_IO_TYPE_READ, ++ bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_WRITE_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "write %lu lbas with offset %#lx\n", bdev_io->u.contig.num_blocks, ++ bdev_io->u.contig.offset_blocks); ++ return bdev_nvme_queue_cmd_with_md(nbdev_io->io_path->nvme_ns->ns, nbdev_io->io_path->qpair->qpair, ++ bdev_io->driver_ctx, bdev_io->u.contig.buf, ++ bdev_io->u.contig.md_buf, SPDK_BDEV_IO_TYPE_WRITE, ++ bdev_io->u.contig.num_blocks, bdev_io->u.contig.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_READV_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "readv %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, ++ bdev_io->u.bdev.offset_blocks); ++ return bdev_nvme_queue_cmd_v_with_md(nbdev_io->io_path->nvme_ns->ns, nbdev_io->io_path->qpair->qpair, ++ bdev_io->driver_ctx, SPDK_BDEV_IO_TYPE_READ, ++ bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, ++ bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_WRITEV_NVME: ++ SPDK_DEBUGLOG(bdev_nvme, "writev %lu lbas with offset %#lx\n", bdev_io->u.bdev.num_blocks, ++ bdev_io->u.bdev.offset_blocks); ++ return bdev_nvme_queue_cmd_v_with_md(nbdev_io->io_path->nvme_ns->ns, nbdev_io->io_path->qpair->qpair, ++ bdev_io->driver_ctx, SPDK_BDEV_IO_TYPE_WRITE, ++ bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, ++ bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.offset_blocks); ++ case SPDK_BDEV_IO_TYPE_UNMAP_BLOCKS: ++ return bdev_nvme_unmap_blocks(nbdev_io->io_path->nvme_ns->ns, ++ nbdev_io->io_path->qpair->qpair, ++ (void *)bdev_io->driver_ctx, ++ (struct spdk_nvme_dsm_range *)bdev_io->u.contig.buf, ++ bdev_io->u.contig.num_blocks); ++ default: ++ return -EINVAL; ++ } ++} ++ ++int bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w) ++{ ++ return 0; ++} ++ ++int bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch) ++{ ++ uint16_t channel_id = -1; ++ struct nvme_io_path *io_path = NULL; ++ struct nvme_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch); ++ io_path = STAILQ_FIRST(&bdev_ch->io_path_list); ++ if (io_path == NULL) { ++ return channel_id; ++ } ++ struct spdk_nvme_qpair *qpair = io_path->qpair->qpair; ++ if (qpair != NULL) { ++ channel_id = spdk_nvme_get_qpair_id(qpair); ++ } ++ return channel_id; ++} ++ ++uint64_t bdev_nvme_get_timeout_count(struct spdk_io_channel *ch) ++{ ++ struct nvme_ctrlr_channel *ctrlr_ch; ++ if (ch == NULL) { ++ return 0; ++ } ++ ctrlr_ch = spdk_io_channel_get_ctx(ch); ++ if (ctrlr_ch->qpair == NULL || ctrlr_ch->qpair->group == NULL) { ++ return 0; ++ } ++ ++ return ctrlr_ch->qpair->group->num_poll_timeout; ++} ++ ++int32_t nvme_ctrlr_get_info(const char *ctrlName, struct nvme_ctrlr_info **ppCtrlr) ++{ ++ uint32_t num_ctrlr = 0, i = 0; ++ struct nvme_bdev_ctrlr *nbdev_ctrlr = NULL; ++ struct nvme_ctrlr *nvme_ctrlr = NULL; ++ struct nvme_ctrlr_info *pCtrlrInfo = NULL; ++ const struct spdk_nvme_ctrlr_data *cdata = NULL; ++ const struct spdk_nvme_ctrlr_opts *opts = NULL; ++ ++ struct spdk_pci_device *pci_dev = NULL; ++ int rc; ++ ++ TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ num_ctrlr++; ++ } ++ if (num_ctrlr == 0) { ++ SPDK_NOTICELOG("No any nvme controller.\n"); ++ return 0; ++ } ++ if (ctrlName != NULL) { ++ num_ctrlr = 1; ++ } ++ pCtrlrInfo = calloc(num_ctrlr, sizeof(struct nvme_ctrlr_info)); ++ if (pCtrlrInfo == NULL) { ++ SPDK_ERRLOG("Failed to alloc memory for getting controller infomation.\n"); ++ return -1; ++ } ++ TAILQ_FOREACH(nbdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (i >= num_ctrlr) { /* prevent having controllers be added or deleted */ ++ i++; ++ continue; ++ } ++ if (ctrlName != NULL) { ++ if (strcmp(nbdev_ctrlr->name, ctrlName) != 0) { ++ continue; ++ } ++ } ++ nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs); ++ cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr); ++ opts = spdk_nvme_ctrlr_get_opts(nvme_ctrlr->ctrlr); ++ pci_dev = spdk_nvme_ctrlr_get_pci_device(nvme_ctrlr->ctrlr); ++ if (pci_dev == NULL) { ++ SPDK_ERRLOG("Failed to get pci device\n"); ++ break; ++ } ++ rc = strcpy_s(pCtrlrInfo[i].ctrlName, sizeof(pCtrlrInfo[i].ctrlName), nbdev_ctrlr->name); ++ if (rc != 0) { ++ SPDK_ERRLOG("String copy failed\n"); ++ } ++ rc = strcpy_s(pCtrlrInfo[i].pciAddr, sizeof(pCtrlrInfo[i].pciAddr), ++ nvme_ctrlr->active_path_id->trid.traddr); ++ if (rc != 0) { ++ SPDK_ERRLOG("String copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].sn, sizeof(pCtrlrInfo[i].sn), cdata->sn, 20); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].fr, sizeof(pCtrlrInfo[i].fr), cdata->fr, 8); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ rc = memcpy_s(pCtrlrInfo[i].mn, sizeof(pCtrlrInfo[i].mn), cdata->mn, 40); ++ if (rc != 0) { ++ SPDK_ERRLOG("Memory copy failed\n"); ++ } ++ ++ pCtrlrInfo[i].trtype = (uint16_t)nvme_ctrlr->active_path_id->trid.trtype; ++ pCtrlrInfo[i].tnvmcap = cdata->tnvmcap[0]; ++ pCtrlrInfo[i].unvmcap = cdata->unvmcap[0]; ++ pCtrlrInfo[i].support_ns = cdata->oacs.ns_manage; ++ pCtrlrInfo[i].directives = cdata->oacs.directives; ++ pCtrlrInfo[i].dsm = cdata->oncs.dsm; ++ pCtrlrInfo[i].max_num_ns = cdata->nn; ++ pCtrlrInfo[i].num_io_queues = opts->num_io_queues; ++ pCtrlrInfo[i].io_queue_size = opts->io_queue_size; ++ pCtrlrInfo[i].device_id = spdk_pci_device_get_device_id(pci_dev); ++ pCtrlrInfo[i].subdevice_id = spdk_pci_device_get_subdevice_id(pci_dev); ++ pCtrlrInfo[i].vid = cdata->vid; ++ pCtrlrInfo[i].ssvid = cdata->ssvid; ++ pCtrlrInfo[i].ctrlid = cdata->cntlid; ++ pCtrlrInfo[i].version = spdk_nvme_ctrlr_get_regs_vs(nvme_ctrlr->ctrlr).raw; ++ i++; ++ if (ctrlName != NULL) { ++ break; ++ } ++ } ++ if (i != num_ctrlr) { ++ SPDK_ERRLOG("It has controller been added or deleted when fetched infomation, please try again later.\n"); ++ free(pCtrlrInfo); ++ return -1; ++ } ++ *ppCtrlr = pCtrlrInfo; ++ return num_ctrlr; ++} ++ ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_name(const char *ctrlname) ++{ ++ struct nvme_ctrlr *nvme_ctrlr = NULL; ++ ++ nvme_ctrlr = nvme_ctrlr_get_by_name(ctrlname); ++ if (nvme_ctrlr == NULL) { ++ return NULL; ++ } ++ ++ return nvme_ctrlr->ctrlr; ++} ++ ++struct spdk_nvme_ctrlr *spdk_nvme_ctrlr_get_by_ctrlr(const struct nvme_bdev_ctrlr *nbdev_ctrlr) ++{ ++ struct nvme_ctrlr *nvme_ctrlr = NULL; ++ ++ if (nbdev_ctrlr == NULL) { ++ return NULL; ++ } ++ nvme_ctrlr = TAILQ_FIRST(&nbdev_ctrlr->ctrlrs); ++ ++ return nvme_ctrlr->ctrlr; ++} ++ ++void nvme_ctrlr_clear_iostat_by_name(const char *ctrlname) ++{ ++ int i; ++ size_t size = strnlen(ctrlname, 24); ++ ++ for (i = 0; i < STAT_MAX_NUM; i++) { ++ if (strncmp(g_io_stat_map[i].bdev_name, ctrlname, size) == 0) { ++ if ((g_io_stat_map[i].bdev_name[size] == 'n') && isdigit(g_io_stat_map[i].bdev_name[size + 1])) { ++ g_io_stat_map[i].channel_id = 0; ++ (void)memset_s(g_io_stat_map[i].bdev_name, sizeof(g_io_stat_map[i].bdev_name), 0, sizeof(g_io_stat_map[i].bdev_name)); ++ g_io_stat_map[i].num_read_ops = 0; ++ g_io_stat_map[i].num_write_ops = 0; ++ g_io_stat_map[i].bytes_read = 0; ++ g_io_stat_map[i].bytes_written = 0; ++ g_io_stat_map[i].io_outstanding = 0; ++ g_io_stat_map[i].read_latency_ticks = 0; ++ g_io_stat_map[i].write_latency_ticks = 0; ++ g_io_stat_map[i].io_ticks = 0; ++ /* used flag set false in last avoid race in channel create */ ++ g_io_stat_map[i].used = false; ++ } ++ } ++ } ++} ++ ++void nvme_ctrlr_clear_iostat_all(void) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ nvme_ctrlr_clear_iostat_by_name(nvme_bdev_ctrlr->name); ++ } ++} ++ ++struct spdk_nvme_ns *bdev_nvme_get_ns(struct nvme_bdev *nbdev) ++{ ++ struct spdk_nvme_ctrlr *ctrlr; ++ if (nbdev == NULL) { ++ return NULL; ++ } ++ ++ ctrlr = spdk_nvme_ctrlr_get_by_ctrlr(nbdev->nbdev_ctrlr); ++ if (ctrlr == NULL) { ++ return NULL; ++ } ++ ++ return spdk_nvme_ctrlr_get_ns(ctrlr, nbdev->nsid); ++} ++ ++void bdev_nvme_update_block_by_nvme_ctrlr(struct spdk_nvme_ctrlr *ctrlr) ++{ ++ uint32_t nsid; ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ struct nvme_bdev *nvme_bdev = NULL; ++ struct spdk_nvme_ns *ns; ++ ++ pthread_mutex_lock(&g_bdev_nvme_mutex); ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ if (spdk_nvme_ctrlr_get_by_ctrlr(nvme_bdev_ctrlr) != ctrlr) { ++ continue; ++ } ++ ++ for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); ++ nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { ++ ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); ++ TAILQ_FOREACH(nvme_bdev, &nvme_bdev_ctrlr->bdevs, tailq) { ++ if (nvme_bdev->nsid == nsid) { ++ nvme_bdev->disk.blocklen = spdk_nvme_ns_get_sector_size(ns); ++ nvme_bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns); ++ } ++ } ++ } ++ ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++ return; ++ } ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++} ++ ++static int ++nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2) ++{ ++ return ns1->id < ns2->id ? -1 : ns1->id > ns2->id; ++} ++ ++RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp); ++ ++int bdev_nvme_update_ns(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct nvme_ns *ns = NULL; ++ bool ns_active = false; ++ struct spdk_nvme_ctrlr *ctrlr = NULL; ++ struct nvme_ctrlr *nvme_ctrlr = NULL; ++ ++ ++ ctrlr = spdk_nvme_ctrlr_get_by_ctrlr(nvme_bdev_ctrlr); ++ ++ if (nvme_bdev_ctrlr == NULL || ctrlr == NULL || nsid > spdk_nvme_ctrlr_get_num_ns(ctrlr)) { ++ SPDK_ERRLOG("Parameter error. nsid[%u], the max nsid is[%u]\n", nsid, ++ spdk_nvme_ctrlr_get_num_ns(ctrlr)); ++ return -1; ++ } ++ ++ nvme_ctrlr = TAILQ_FIRST(&nvme_bdev_ctrlr->ctrlrs); ++ if (nvme_ctrlr == NULL) { ++ SPDK_ERRLOG("Can not get nvme_ctrlr, update abort.\n"); ++ return -1; ++ } ++ ++ ns_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid); ++ ++ ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid); ++ if (ns == NULL && ns_active) { ++ ns = nvme_ns_alloc(); ++ if (ns == NULL) { ++ SPDK_ERRLOG("Failed to allocate namespace\n"); ++ return -1; ++ } ++ SPDK_NOTICELOG("NSID %u to be added\n", nsid); ++ ns->id = nsid; ++ ns->ctrlr = nvme_ctrlr; ++ ns->bdev = NULL; ++ RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns); ++ /* add a new bdev device in this ns */ ++ nvme_ctrlr_populate_namespace(nvme_ctrlr, ns); ++ } ++ ++ if (ns != NULL && !ns_active) { ++ SPDK_NOTICELOG("NSID %u is removed\n", nsid); ++ nvme_ctrlr_depopulate_namespace(nvme_ctrlr, ns); ++ return 0; ++ } ++ ++ return 0; ++} ++ ++bool spdk_bdev_can_remove(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct nvme_bdev *bdev = NULL; ++ bool empty = false; ++ ++ pthread_mutex_lock(&g_bdev_nvme_mutex); ++ TAILQ_FOREACH(bdev, &nvme_bdev_ctrlr->bdevs, tailq) { ++ if (bdev->nsid == nsid) { ++ break; ++ } ++ } ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++ ++ if (bdev == NULL) { ++ return true; ++ } ++ ++ spdk_spin_lock(&bdev->disk.internal.spinlock); ++ empty = TAILQ_EMPTY(&bdev->disk.internal.open_descs); ++ ++ if (empty) { ++ /* one bdev is empty, check next until all bdev is checked */ ++ bdev->disk.internal.ns_status = SPDK_BDEV_NS_STATUS_REMOVING; ++ } ++ /* means at least one bdev is used, so we just quit this process ++ and mark the status is false. */ ++ spdk_spin_unlock(&bdev->disk.internal.spinlock); ++ ++ return empty; ++} ++ ++void spdk_bdev_set_ns_normal(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid) ++{ ++ struct nvme_bdev *bdev = NULL; ++ ++ pthread_mutex_lock(&g_bdev_nvme_mutex); ++ TAILQ_FOREACH(bdev, &nvme_bdev_ctrlr->bdevs, tailq) { ++ if (bdev->nsid == nsid) { ++ break; ++ } ++ } ++ pthread_mutex_unlock(&g_bdev_nvme_mutex); ++ ++ if (bdev == NULL) { ++ return; ++ } ++ ++ spdk_spin_lock(&bdev->disk.internal.spinlock); ++ if (bdev->disk.internal.ns_status == SPDK_BDEV_NS_STATUS_REMOVING) { ++ bdev->disk.internal.ns_status = SPDK_BDEV_NS_STATUS_READY; ++ } ++ spdk_spin_unlock(&bdev->disk.internal.spinlock); ++} ++ ++static void check_error_type(int rc, bool read, void *qpair) ++{ ++ if (rc == -ENOMEM) { ++ SPDK_NOTICELOG("%s failed: rc = %d\n", read ? "read" : "write", rc); ++ } else if (rc < 0) { ++ SPDK_ERRLOG("%s failed: rc = %d, qpair is %p\n", read ? "read" : "write", rc, qpair); ++ } ++} ++ ++int bdev_nvme_queue_cmd_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba) ++{ ++ int rc; ++ uint32_t io_flags = 0; ++ uint8_t* bdev_io_action = (uint8_t *)driver_ctx; ++ /* filter bit 0&1 of io->pi_action to get pi_action */ ++ uint8_t pi_action = bdev_io_action[SPDK_BDEV_IO_ACTION_PI] & 0x03; ++ uint8_t dif_flag = bdev_io_action[SPDK_BDEV_IO_ACTION_PI]; ++ uint8_t fua = bdev_io_action[SPDK_BDEV_IO_ACTION_FUA]; ++ uint16_t streamId = bdev_io_action[SPDK_BDEV_IO_STREAM_ID_0]; ++ uint32_t pi_type; ++ ++ streamId |= ((uint16_t)bdev_io_action[SPDK_BDEV_IO_STREAM_ID_1] << 8); ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ ++ if (streamId > 0) { ++ io_flags |= SPDK_NVME_IO_FLAGS_STREAMS_DIRECTIVE; ++ } ++ ++ if (pi_action > IO_NO_PROTECTION) { ++ pi_type = spdk_nvme_ns_get_pi_type(ns); ++ if (dif_flag & FLAG_PRCHK) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; ++ } ++ /* type3 not support ref tag */ ++ if (!(dif_flag & FLAG_NO_REF) && (pi_type != SPDK_NVME_FMT_NVM_PROTECTION_TYPE3)) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; ++ } ++ if (pi_action == IO_HALF_WAY_PROTECTION) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRACT; ++ } ++ } ++ ++ if (fua) { ++ io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; ++ } ++ ++ if (direction == SPDK_BDEV_IO_TYPE_READ) { ++ rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, buffer, metadata, lba, ++ lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, 0); ++ } else { ++ rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, buffer, metadata, lba, ++ lba_count, bdev_nvme_queued_done, driver_ctx, io_flags, 0, streamId); ++ } ++ ++ check_error_type(rc, direction == SPDK_BDEV_IO_TYPE_READ, qpair); ++ return rc; ++} ++ ++int bdev_nvme_queue_cmd_v_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba) ++{ ++ int rc; ++ struct nvme_bdev_io *bio = NULL; ++ uint32_t io_flags = 0; ++ uint8_t* bdev_io_action = (uint8_t *)driver_ctx; ++ /* filter bit 0&1 of io->pi_action to get pi_action */ ++ uint8_t pi_action = bdev_io_action[SPDK_BDEV_IO_ACTION_PI] & 0x03; ++ uint8_t dif_flag = bdev_io_action[SPDK_BDEV_IO_ACTION_PI]; ++ uint8_t fua = bdev_io_action[SPDK_BDEV_IO_ACTION_FUA]; ++ uint16_t streamId = bdev_io_action[SPDK_BDEV_IO_STREAM_ID_0]; ++ uint32_t pi_type; ++ ++ streamId |= ((uint16_t)bdev_io_action[SPDK_BDEV_IO_STREAM_ID_1] << 8); ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ ++ if (streamId > 0) { ++ io_flags |= SPDK_NVME_IO_FLAGS_STREAMS_DIRECTIVE; ++ } ++ ++ if (pi_action > IO_NO_PROTECTION) { ++ pi_type = spdk_nvme_ns_get_pi_type(ns); ++ if (dif_flag & FLAG_PRCHK) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_GUARD; ++ } ++ /* type3 not support ref tag */ ++ if (!(dif_flag & FLAG_NO_REF) && (pi_type != SPDK_NVME_FMT_NVM_PROTECTION_TYPE3)) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; ++ } ++ if (pi_action == IO_HALF_WAY_PROTECTION) { ++ io_flags |= SPDK_NVME_IO_FLAGS_PRACT; ++ } ++ } ++ ++ if (fua) { ++ io_flags |= SPDK_NVME_IO_FLAGS_FORCE_UNIT_ACCESS; ++ } ++ ++ bio = nvme_bdev_io_update_args((struct nvme_bdev_io *)driver_ctx, iov, iovcnt); ++ ++ if (direction == SPDK_BDEV_IO_TYPE_READ) { ++ rc = spdk_nvme_ns_cmd_readv(ns, qpair, lba, ++ lba_count, bdev_nvme_queued_done, bio, io_flags, ++ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); ++ } else { ++ rc = spdk_nvme_ns_cmd_writev(ns, qpair, lba, lba_count, ++ bdev_nvme_queued_done, bio, io_flags, ++ bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge); ++ } ++ ++ check_error_type(rc, direction == SPDK_BDEV_IO_TYPE_READ, qpair); ++ return rc; ++} ++ ++struct nvme_bdev_ctrlr *bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc) ++{ ++ struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(bdev_desc); ++ struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev->ctxt; ++ if (nbdev == NULL) { ++ return NULL; ++ } ++ return nbdev->nbdev_ctrlr; ++} ++ ++int bdev_nvme_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *driver_ctx, ++ struct spdk_nvme_dsm_range *unmap_d, uint32_t unmap_count) ++{ ++ uint32_t i; ++ ++ if (unmap_count == 0 || unmap_count > SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES) { ++ SPDK_ERRLOG("Invalid parameter, unmap count: %u\n", unmap_count); ++ return -EINVAL; ++ } ++ ++ if (unmap_d == NULL) { ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < unmap_count; i++) { ++ unmap_d[i].attributes.raw = 0; ++ } ++ ++ spdk_bdev_set_io_location(driver_ctx, (uint8_t)LOCAL_LIBSTORAGE_BDEV_NVME); ++ return spdk_nvme_ns_cmd_dataset_management(ns, qpair, ++ SPDK_NVME_DSM_ATTR_DEALLOCATE, ++ unmap_d, unmap_count, ++ bdev_nvme_queued_done, driver_ctx); ++} ++ ++void spdk_bdev_nvme_remove_cb(void *cb_ctx, void *ctrlr) ++{ ++ remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); ++} ++ ++void spdk_bdev_fail_ctrlr(void *cb_ctx, void *ctrlr) ++{ ++ spdk_nvme_ctrlr_fail_hotplug((struct spdk_nvme_ctrlr *)ctrlr); ++ remove_cb(cb_ctx, (struct spdk_nvme_ctrlr *)ctrlr); ++} ++ ++struct spdk_nvme_ctrlr *spdk_nvme_bdev_ctrlr_get(char *pci_trid) ++{ ++ struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL; ++ struct nvme_ctrlr *nvme_ctrlr; ++ ++ TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) { ++ TAILQ_FOREACH(nvme_ctrlr, &nvme_bdev_ctrlr->ctrlrs, tailq) { ++ if (strcmp(nvme_ctrlr->active_path_id->trid.traddr, pci_trid) == 0) { ++ return nvme_ctrlr->ctrlr; ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++int spdk_bdev_nvme_create_self(struct spdk_nvme_transport_id *trid, ++ const char *base_name, ++ const char **names, size_t *count, ++ const char *hostnqn) ++{ ++ struct nvme_probe_ctx *probe_ctx = NULL; ++ ++ if (nvme_ctrlr_get(trid) != NULL) { ++ SPDK_ERRLOG("A controller with the trid (traddr: %s) already exists.\n", trid->traddr); ++ return -1; ++ } ++ ++ probe_ctx = bdev_nvme_create_probe_ctx(trid, base_name, hostnqn); ++ if (probe_ctx == NULL) { ++ SPDK_ERRLOG("Failed to create probe_ctx\n"); ++ return -1; ++ } ++ ++ if (spdk_nvme_probe(trid, probe_ctx, probe_cb, attach_cb, remove_cb)) { ++ SPDK_ERRLOG("Failed to probe for new devices\n"); ++ free(probe_ctx); ++ return -1; ++ } ++ ++ free(probe_ctx); ++ return 0; ++} +diff --git a/module/bdev/nvme/bdev_nvme_self.h b/module/bdev/nvme/bdev_nvme_self.h +index 43ad7ee..91af571 100644 +--- a/module/bdev/nvme/bdev_nvme_self.h ++++ b/module/bdev/nvme/bdev_nvme_self.h +@@ -1,43 +1,43 @@ +-/* +- * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License version 2 and +- * only version 2 as published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- */ +- +-void +-bdev_update_ch_timeout(struct nvme_bdev_poll_group *group); +- +-int +-_bdev_nvme_submit_request_self(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); +- +-int +-bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w); +- +-uint16_t +-bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch); +- +-uint64_t +-bdev_nvme_get_timeout_count(struct spdk_io_channel *ch); +- +-int +-bdev_nvme_queue_cmd_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, void *driver_ctx, +- void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba); +- +-int +-bdev_nvme_queue_cmd_v_with_md(struct nvme_bdev *bdev, struct spdk_nvme_qpair *qpair, +- void *driver_ctx, +- int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba); +- +-struct nvme_bdev_ctrlr * +-bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc); +- +-int +-bdev_nvme_unmap_blocks(struct nvme_bdev *nbdev, struct spdk_io_channel *ch, void *driver_ctx, +- struct spdk_nvme_dsm_range *unmap_d, uint32_t unmap_count); ++/* ++ * Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 and ++ * only version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++void ++bdev_update_ch_timeout(struct nvme_poll_group *group); ++ ++int ++_bdev_nvme_submit_request_self(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io); ++ ++int ++bdev_nvme_dump_info_json_self(void *ctx, struct spdk_json_write_ctx *w); ++ ++int ++bdev_nvme_get_io_channel_id(struct spdk_io_channel *ch); ++ ++uint64_t ++bdev_nvme_get_timeout_count(struct spdk_io_channel *ch); ++ ++int ++bdev_nvme_queue_cmd_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *driver_ctx, ++ void *buffer, void *metadata, int direction, uint64_t lba_count, uint64_t lba); ++ ++int ++bdev_nvme_queue_cmd_v_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, ++ void *driver_ctx, ++ int direction, struct iovec *iov, int iovcnt, uint64_t lba_count, uint64_t lba); ++ ++struct nvme_bdev_ctrlr * ++bdev_nvme_get_ctrlr_by_bdev_desc(void *bdev_desc); ++ ++int ++bdev_nvme_unmap_blocks(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *driver_ctx, ++ struct spdk_nvme_dsm_range *unmap_d, uint32_t unmap_count); +-- +2.43.0 + diff --git a/spdk.spec b/spdk.spec index bd9bed0..d2871d1 100644 --- a/spdk.spec +++ b/spdk.spec @@ -4,13 +4,23 @@ Name: spdk Version: 24.01 -Release: 6 +Release: 7 Summary: Set of libraries and utilities for high performance user-mode storage License: BSD and MIT URL: http://spdk.io Source0: https://github.com/spdk/spdk/archive/refs/tags/v%{version}.tar.gz Patch1: 0001-Add-without-ISA-L-option-and-disabled-by-default.patch Patch2: 0002-backport-Add-ctrlr_lock-for-cuse-register-and-unregister.patch +Patch3: 0003-add-HSAK-needed-head-file-and-API-to-spdk.patch +Patch4: 0004-lib-bdev-Add-bdev-support-for-HSAK.patch +Patch5: 0005-lib-env_dpdk-Add-config-args-for-HSAK.patch +Patch6: 0006-lib-nvme-Add-nvme-support-for-HSAK.patch +Patch7: 0007-module-bdev-Add-bdev-module-support-for-HSAK.patch +Patch8: 0008-use-spdk_nvme_ns_cmd_dataset_management-and-delete-s.patch +Patch9: 0009-spdk-add-nvme-support-for-HSAK.patch +Patch10: 0010-Add-CUSE-switch-for-nvme-ctrlr.patch +Patch11: 0011-Adapt-for-ES3000-serial-vendor-special-opcode-in-CUS.patch +Patch12: 0012-adapt-for-spdk-24.01.patch %define package_version %{version}-%{release} @@ -94,6 +104,9 @@ BuildArch: noarch %build ./configure --prefix=%{_usr} \ --disable-tests \ + --disable-examples \ + --disable-unit-tests \ + --disable-apps \ --without-crypto \ --without-isal \ --with-dpdk \ @@ -104,6 +117,7 @@ BuildArch: noarch --with-shared \ --with-iscsi-initiator \ --without-vtune \ + --enable-raw \ --with-nvme-cuse \ make -j`nproc` all @@ -112,11 +126,16 @@ make -j`nproc` all make -C doc %endif -%check -test/unit/unittest.sh - %install %make_install -j`nproc` prefix=%{_usr} libdir=%{_libdir} datadir=%{_datadir} +install -d $RPM_BUILD_ROOT%{_sysconfdir}/spdk +install -d $RPM_BUILD_ROOT/opt/spdk +install -d $RPM_BUILD_ROOT/usr/include/spdk_internal +install -m 0744 ./scripts/setup_self.sh $RPM_BUILD_ROOT/opt/spdk/setup.sh +install -m 0644 ./etc/spdk/nvme.conf.in $RPM_BUILD_ROOT%{_sysconfdir}/spdk +install -m 0644 include/spdk_internal/*.h $RPM_BUILD_ROOT/usr/include/spdk_internal +install -m 0644 lib/nvme/nvme_internal.h $RPM_BUILD_ROOT/usr/include/spdk_internal +install -m 0644 lib/env_dpdk/22.11/*.h $RPM_BUILD_ROOT/usr/include/spdk # Install tools mkdir -p %{install_datadir} @@ -150,9 +169,10 @@ mv doc/output/html/ %{install_docdir} %files -%{_bindir}/iscsi_tgt -%{_bindir}/nvmf_tgt -%{_bindir}/vhost +%dir /opt/spdk +/opt/spdk/setup.sh +%dir %{_sysconfdir}/spdk +%{_sysconfdir}/spdk/nvme.conf.in /usr/lib/python%{python3_version}/site-packages/spdk*/* %{_bindir}/spdk_* %{_libdir}/*.so.* @@ -163,6 +183,8 @@ mv doc/output/html/ %{install_docdir} %{_includedir}/%{name} %{_libdir}/*.a %{_libdir}/*.so +%dir /usr/include/spdk_internal +/usr/include/spdk_internal/*.h %files tools @@ -177,6 +199,9 @@ mv doc/output/html/ %{install_docdir} %changelog +* Sat Aug 17 2024 Weifeng Su - 24.01-7 +- Adapt for HSAK + * Thu Jun 6 2024 baiguo - 24.01-6 - Add ctrlr_lock for cuse register and unregister -- Gitee