From e2c2fe28f31e27304d7a68782844f0400c26f836 Mon Sep 17 00:00:00 2001 From: LiuYongYang Date: Wed, 21 Jun 2023 09:26:25 +0800 Subject: [PATCH] zlib v2.5 fix --- KAEZlib/gtest/main.cpp | 2 +- KAEZlib/patch/kaezip_for_zlib-1.2.11.patch | 45 +++- KAEZlib/src/kaezip_adapter.c | 62 ++--- KAEZlib/src/kaezip_adapter.h | 11 - KAEZlib/src/v2/kaezip_comp.c | 204 ++++++++-------- KAEZlib/src/v2/kaezip_comp.h | 15 +- KAEZlib/src/v2/kaezip_init.c | 265 +++++++++++++++++++++ KAEZlib/src/v2/kaezip_init.h | 12 + KAEZlib/test/kaezip_perf.c | 71 ++++-- KAEZlib/test/makefile | 2 +- uadk/Makefile.am | 2 +- 11 files changed, 484 insertions(+), 207 deletions(-) create mode 100644 KAEZlib/src/v2/kaezip_init.c create mode 100644 KAEZlib/src/v2/kaezip_init.h diff --git a/KAEZlib/gtest/main.cpp b/KAEZlib/gtest/main.cpp index 72ed6b0..3898dc5 100644 --- a/KAEZlib/gtest/main.cpp +++ b/KAEZlib/gtest/main.cpp @@ -154,7 +154,7 @@ static void common_test(int windowBits, int level, bool is_pref = false, ofstrea // data_size less than 64K case(deflate, zlib, gzip) TEST(ZlibTest, CompressAndDecompress_SmallCase) { - uLong input_size = 1024UL * 1024 * 48; // 48k + uLong input_size = 1024UL * 48; // 48k Bytef *input = new Bytef[input_size]; ASSERT_NE(input, nullptr); generate_random_data(input, input_size); diff --git a/KAEZlib/patch/kaezip_for_zlib-1.2.11.patch b/KAEZlib/patch/kaezip_for_zlib-1.2.11.patch index d513c6a..0c857ff 100644 --- a/KAEZlib/patch/kaezip_for_zlib-1.2.11.patch +++ b/KAEZlib/patch/kaezip_for_zlib-1.2.11.patch @@ -1,6 +1,15 @@ diff -Naru zlib-1.2.11/compress.c zlib-1.2.11_new/compress.c --- zlib-1.2.11/compress.c 2017-01-16 01:29:40.000000000 +0800 -+++ zlib-1.2.11_new/compress.c 2023-05-09 10:27:01.000000000 +0800 ++++ zlib-1.2.11_new/compress.c 2023-06-19 17:23:32.555445560 +0800 +@@ -28,7 +28,7 @@ + { + z_stream stream; + int err; +- const uInt max = (uInt)-1; ++ const uInt max = ((uInt)-1) - 3; // make sure its multiples of 4B + uLong left; + + left = *destLen; @@ -81,6 +81,10 @@ uLong ZEXPORT compressBound (sourceLen) uLong sourceLen; @@ -15,7 +24,7 @@ diff -Naru zlib-1.2.11/compress.c zlib-1.2.11_new/compress.c } diff -Naru zlib-1.2.11/deflate.c zlib-1.2.11_new/deflate.c --- zlib-1.2.11/deflate.c 2017-01-16 01:29:40.000000000 +0800 -+++ zlib-1.2.11_new/deflate.c 2023-05-09 09:59:58.000000000 +0800 ++++ zlib-1.2.11_new/deflate.c 2023-06-19 17:21:03.747445560 +0800 @@ -50,6 +50,7 @@ /* @(#) $Id$ */ @@ -100,7 +109,7 @@ diff -Naru zlib-1.2.11/deflate.c zlib-1.2.11_new/deflate.c /* ========================================================================= */ int ZEXPORT deflateSetHeader (strm, head) z_streamp strm; -@@ -653,6 +694,17 @@ +@@ -653,6 +695,17 @@ z_streamp strm; uLong sourceLen; { @@ -118,7 +127,7 @@ diff -Naru zlib-1.2.11/deflate.c zlib-1.2.11_new/deflate.c deflate_state *s; uLong complen, wraplen; -@@ -760,7 +812,8 @@ +@@ -760,7 +813,8 @@ } while (0) /* ========================================================================= */ @@ -128,7 +137,7 @@ diff -Naru zlib-1.2.11/deflate.c zlib-1.2.11_new/deflate.c z_streamp strm; int flush; { -@@ -1072,10 +1125,27 @@ +@@ -1072,10 +1126,27 @@ return s->pending != 0 ? Z_OK : Z_STREAM_END; } @@ -157,7 +166,7 @@ diff -Naru zlib-1.2.11/deflate.c zlib-1.2.11_new/deflate.c int status; if (deflateStateCheck(strm)) return Z_STREAM_ERROR; -@@ -1091,9 +1161,24 @@ +@@ -1091,9 +1162,24 @@ ZFREE(strm, strm->state); strm->state = Z_NULL; @@ -182,7 +191,7 @@ diff -Naru zlib-1.2.11/deflate.c zlib-1.2.11_new/deflate.c /* ========================================================================= * Copy the source state to the destination state. * To simplify the source, this is not supported for 16-bit MSDOS (which -@@ -2161,3 +2246,36 @@ +@@ -2161,3 +2247,36 @@ FLUSH_BLOCK(s, 0); return block_done; } @@ -221,7 +230,7 @@ diff -Naru zlib-1.2.11/deflate.c zlib-1.2.11_new/deflate.c + diff -Naru zlib-1.2.11/deflate.h zlib-1.2.11_new/deflate.h --- zlib-1.2.11/deflate.h 2017-01-16 01:29:40.000000000 +0800 -+++ zlib-1.2.11_new/deflate.h 2023-05-09 10:27:01.000000000 +0800 ++++ zlib-1.2.11_new/deflate.h 2023-06-19 17:21:03.747445560 +0800 @@ -272,7 +272,7 @@ * longest match routines access bytes past the input. This is then * updated to the new high water mark. @@ -233,7 +242,7 @@ diff -Naru zlib-1.2.11/deflate.h zlib-1.2.11_new/deflate.h /* Output a byte on the stream. diff -Naru zlib-1.2.11/inflate.c zlib-1.2.11_new/inflate.c --- zlib-1.2.11/inflate.c 2017-01-16 01:29:40.000000000 +0800 -+++ zlib-1.2.11_new/inflate.c 2023-05-09 10:14:44.000000000 +0800 ++++ zlib-1.2.11_new/inflate.c 2023-06-19 17:21:03.747445560 +0800 @@ -84,6 +84,7 @@ #include "inftrees.h" #include "inflate.h" @@ -441,7 +450,7 @@ diff -Naru zlib-1.2.11/inflate.c zlib-1.2.11_new/inflate.c + diff -Naru zlib-1.2.11/inflate.h zlib-1.2.11_new/inflate.h --- zlib-1.2.11/inflate.h 2017-01-16 01:29:40.000000000 +0800 -+++ zlib-1.2.11_new/inflate.h 2023-05-09 10:27:01.000000000 +0800 ++++ zlib-1.2.11_new/inflate.h 2023-06-19 17:21:03.747445560 +0800 @@ -122,4 +122,5 @@ int sane; /* if false, allow invalid distance too far */ int back; /* bits back of last unprocessed length/lit */ @@ -450,14 +459,14 @@ diff -Naru zlib-1.2.11/inflate.h zlib-1.2.11_new/inflate.h }; diff -Naru zlib-1.2.11/Makefile.in zlib-1.2.11_new/Makefile.in --- zlib-1.2.11/Makefile.in 2017-01-16 01:29:40.000000000 +0800 -+++ zlib-1.2.11_new/Makefile.in 2023-05-09 10:29:11.000000000 +0800 ++++ zlib-1.2.11_new/Makefile.in 2023-06-19 17:21:03.747445560 +0800 @@ -26,7 +26,15 @@ SFLAGS=-O LDFLAGS= + +KAEZIP_CFLAGS=-I/usr/local/kaezip/include -I$(KAEZLIBPATH)/include -I. -DCONF_KAEZIP -+KAEZIP_LDFLAGS=-L/usr/lib64 -L/usr/local/lib -L/usr/local/kaezip/lib -L$(KAEBUILDPATH)/lib -L$(KAEZLIBPATH) -Wl,-rpath,/usr/local/kaezip/lib:/usr/local/lib:$(KAEBUILDPATH)/lib:$(KAEZLIBPATH) -lkaezip -lwd -lwd_comp ++KAEZIP_LDFLAGS=-L/usr/lib64 -L/usr/local/lib -L/usr/local/kaezip/lib -L$(KAEBUILDPATH)/lib -L$(KAEZLIBPATH) -Wl,-rpath,/usr/local/kaezip/lib:/usr/local/lib:$(KAEBUILDPATH)/lib:$(KAEZLIBPATH) -l:libkaezip.so.2.0.0 -lwd -lwd_comp +CFLAGS+=$(KAEZIP_CFLAGS) +SFLAGS+=$(KAEZIP_CFLAGS) +LDFLAGS+=$(KAEZIP_LDFLAGS) @@ -480,3 +489,15 @@ diff -Naru zlib-1.2.11/Makefile.in zlib-1.2.11_new/Makefile.in example64$(EXE): example64.o $(STATICLIB) $(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS) +diff -Naru zlib-1.2.11/uncompr.c zlib-1.2.11_new/uncompr.c +--- zlib-1.2.11/uncompr.c 2017-01-16 01:29:40.000000000 +0800 ++++ zlib-1.2.11_new/uncompr.c 2023-06-19 17:24:09.055445560 +0800 +@@ -32,7 +32,7 @@ + { + z_stream stream; + int err; +- const uInt max = (uInt)-1; ++ const uInt max = ((uInt)-1) - 3; // make sure its multiples of 4B + uLong len, left; + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ + diff --git a/KAEZlib/src/kaezip_adapter.c b/KAEZlib/src/kaezip_adapter.c index 30e7aa9..5e9615c 100644 --- a/KAEZlib/src/kaezip_adapter.c +++ b/KAEZlib/src/kaezip_adapter.c @@ -8,6 +8,7 @@ #include "kaezip.h" #include "wd_comp.h" #include "kaezip_adapter.h" +#include "kaezip_init.h" #include "kaezip_comp.h" #include "kaezip_deflate.h" #include "kaezip_inflate.h" @@ -44,26 +45,6 @@ static void uadk_get_accel_platform(void) g_platform = HW_NONE; } -static int kz_exdata_init(z_streamp strm) -{ - kaezip_exdata *kz_exdata = malloc(sizeof(kaezip_exdata)); - if (unlikely(!kz_exdata)) { - US_ERR("kz_exdata malloc failed!\n"); - return -1; - } - memset(kz_exdata, 0, sizeof(kaezip_exdata)); - strm->opaque = kz_exdata; - return 0; -} - -static void kz_exdata_uninit(z_streamp strm) -{ - if (strm->opaque) { - free(strm->opaque); - strm->opaque = NULL; - } -} - /* -----------------------------------------------DEFLATE----------------------------------------------- */ int kz_deflateInit2_(z_streamp strm, int level, int metho, int windowBit, int memLevel, int strategy, const char *version, int stream_size) @@ -80,17 +61,17 @@ int kz_deflateInit2_(z_streamp strm, int level, int metho, int windowBit, int me ret = kz_deflateInit2_v1(strm, level, metho, windowBit, memLevel, strategy, version, stream_size); break; case HW_V2: + strm->adler = 0; level = (level <= 0 || level > 15) ? 1 : level; - ret = wd_deflate_init(strm, level, windowBit); + ret = kz_deflate_init(strm, level, windowBit); if (ret == Z_OK) { - (void)wd_deflate_reset(strm); - (void)kz_exdata_init(strm); + (void)kz_deflate_reset(strm); } break; default: break; } - US_DEBUG("kz_deflateInit2 level %d, windowBit %d, return code is %d\n", level, windowBit, ret); + US_INFO("kz_deflateInit2 return code is %d\n", ret); return ret; } @@ -114,12 +95,12 @@ int kz_deflate(z_streamp strm, int flush) } break; case HW_V2: - ret = wd_deflate_v2(strm, flush); // implement in src/v2 + ret = kz_deflate_v2(strm, flush); // implement in src/v2 break; default: break; } - US_DEBUG("kz_deflate flush is %d, return code is %d\n", flush, ret); + US_INFO("kz_deflate return code is %d\n", ret); return ret; } @@ -137,13 +118,12 @@ int kz_deflateEnd(z_streamp strm) ret = kz_deflateEnd_v1(strm); break; case HW_V2: - ret = wd_deflate_end(strm); - kz_exdata_uninit(strm); + ret = kz_deflate_end(strm); break; default: break; } - US_DEBUG("kz_deflateEnd return code is %d\n\n", ret); + US_INFO("kz_deflateEnd return code is %d\n", ret); return ret; } @@ -161,12 +141,12 @@ int kz_deflateReset(z_streamp strm) ret = kz_deflateReset_v1(strm); break; case HW_V2: - ret = wd_deflate_reset(strm); + ret = kz_deflate_reset(strm); break; default: break; } - US_DEBUG("kz_deflateReset return code is %d\n", ret); + US_INFO("kz_deflateReset return code is %d\n", ret); return ret; } @@ -186,16 +166,15 @@ int kz_inflateInit2_(z_streamp strm, int windowBits, const char *version, int st break; case HW_V2: strm->adler = 0; - ret = wd_inflate_init(strm, windowBits); + ret = kz_inflate_init(strm, windowBits); if (ret == Z_OK) { - (void)wd_inflate_reset(strm); - (void)kz_exdata_init(strm); + (void)kz_inflate_reset(strm); } break; default: break; } - US_DEBUG("kz_inflateInit2 windowBit %d, return code is %d\n", windowBits, ret); + US_INFO("kz_inflateInit2 return code is %d\n", ret); return ret; } @@ -223,12 +202,12 @@ int kz_inflate(z_streamp strm, int flush) } break; case HW_V2: - ret = wd_inflate_v2(strm, flush); + ret = kz_inflate_v2(strm, flush); break; default: break; } - US_DEBUG("kz_inflate flush %d, return code is %d\n", flush, ret); + US_INFO("kz_inflate return code is %d\n", ret); return ret; } @@ -246,13 +225,12 @@ int kz_inflateEnd(z_streamp strm) ret = kz_inflateEnd_v1(strm); break; case HW_V2: - ret = wd_inflate_end(strm); - kz_exdata_uninit(strm); + ret = kz_inflate_end(strm); break; default: break; } - US_DEBUG("kz_inflateEnd return code is %d\n", ret); + US_INFO("kz_inflateEnd return code is %d\n", ret); return ret; } @@ -270,11 +248,11 @@ int kz_inflateReset(z_streamp strm) ret = kz_inflateReset_v1(strm); break; case HW_V2: - ret = wd_inflate_reset(strm); + ret = kz_inflate_reset(strm); break; default: break; } - US_DEBUG("kz_inflateReset return code is %d\n", ret); + US_INFO("kz_inflateReset return code is %d\n", ret); return ret; } diff --git a/KAEZlib/src/kaezip_adapter.h b/KAEZlib/src/kaezip_adapter.h index c571a00..6e1a678 100644 --- a/KAEZlib/src/kaezip_adapter.h +++ b/KAEZlib/src/kaezip_adapter.h @@ -13,17 +13,6 @@ */ extern int wd_get_available_dev_num(const char* alogrithm); -/** - * UADK Zlib v2 interface from wd_zlibwrapper.h -*/ -extern int wd_deflate_init(z_streamp strm, int level, int windowbits); -extern int wd_deflate_reset(z_streamp strm); -extern int wd_deflate_end(z_streamp strm); - -extern int wd_inflate_init(z_streamp strm, int windowbits); -extern int wd_inflate_reset(z_streamp strm); -extern int wd_inflate_end(z_streamp strm); - /** * adapter interface for zlib-open */ diff --git a/KAEZlib/src/v2/kaezip_comp.c b/KAEZlib/src/v2/kaezip_comp.c index 241de88..e3fa0ef 100644 --- a/KAEZlib/src/v2/kaezip_comp.c +++ b/KAEZlib/src/v2/kaezip_comp.c @@ -5,164 +5,150 @@ * @Date: 2023-05-09 */ +#include "zlib.h" #include "wd.h" #include "wd_comp.h" -#include "wd_zlibwrapper.h" #include "kaezip_comp.h" #include "kaezip_log.h" -static z_stream g_init_strm = {0}; - -static void __attribute((constructor)) wd_do_init_onlyone(void) -{ - wd_deflate_init(&g_init_strm, 1, 12); -} - -static void __attribute((destructor)) wd_do_uninit_onlyone(void) -{ - wd_deflate_end(&g_init_strm); -} - -static int wd_checkAndSet_remainData(z_streamp strm, int flush) +static int kz_check_params(struct wd_comp_req *req) { - kaezip_exdata *kz_exdata = (kaezip_exdata*)strm->opaque; - if (!kz_exdata) { - US_ERR("kaezip_exdata is NULL!\n"); + if (unlikely(!req)) { + US_ERR("invalid: req is NULL!\n"); + return -WD_EINVAL; } - - unsigned int remain_len = kz_exdata->remain; - US_DEBUG("remain data has %u Bytes\n", remain_len); - if (remain_len != 0) { - if (strm->avail_out < remain_len) { - US_ERR("buffer error! no more avail_out!\n"); - return 0; - } - memcpy(strm->avail_out + strm->total_out, kz_exdata->output_buffer, kz_exdata->last_comp_out_len); - strm->avail_out -= remain_len; - strm->total_out += kz_exdata->last_comp_out_len; - } else if (strm->avail_in == 0 && flush != Z_FINISH) { - US_ERR("buffer error! no more avail_in!\n"); - return 0; + if (unlikely(!req->src || !req->dst)) { + US_ERR("invalid: src or dst is NULL!\n"); + return -WD_EINVAL; + } + if (unlikely(!req->src_len)) { + return Z_STREAM_END; + } + if (unlikely(!req->dst_len)) { + US_ERR("invalid: dst_len is 0!\n"); + return -WD_EINVAL; } - return 1; + return WD_SUCCESS; } -static int wd_zlib_do_implement(handle_t h_sess, struct wd_comp_req *req, kaezip_exdata *kz_exdata) +static int kz_zlib_do_comp_implement(handle_t h_sess, struct wd_comp_req *req, __u32 *borrowd_dst_len, + __u64 *used_in, __u64 *used_out) { - if (unlikely(!req || !kz_exdata)) { - US_ERR("req or kz_exdata NULL!\n"); - return Z_ERRNO; + int ret = kz_check_params(req); + if (unlikely(ret)) { + return ret; } - unsigned int total_avail_in = req->src_len; - unsigned int total_avail_out = req->dst_len; + __u32 total_avail_in = req->src_len; + __u32 total_avail_out = req->dst_len; struct wd_comp_req strm_req; memcpy(&strm_req, req, sizeof(struct wd_comp_req)); - strm_req.dst = kz_exdata->output_buffer; + req->src_len = 0; + req->dst_len = 0; + *used_in = *used_out = 0; + void *tmp_dst_buffer = malloc(OUTPUT_CHUNK_V2); + if (!tmp_dst_buffer) { + return -WD_EINVAL; + } + strm_req.dst = tmp_dst_buffer; + + // 该接口至多压缩/解压缩(u32_max)个字节,若超过, 则重复调用 + // 因此需根据req->last判断是否为最后一个大块 + int is_real_last = req->last; strm_req.last = 0; do { - strm_req.src_len = (total_avail_in > INPUT_CHUNK_V2 ? INPUT_CHUNK_V2 : total_avail_in); + strm_req.src_len = total_avail_in > INPUT_CHUNK_V2 ? INPUT_CHUNK_V2 : total_avail_in; strm_req.dst_len = OUTPUT_CHUNK_V2; - unsigned int orig_src_len = strm_req.src_len; - unsigned int orig_dst_len = strm_req.dst_len; - if (strm_req.op_type == WD_DIR_COMPRESS && req->last && total_avail_in <= INPUT_CHUNK_V2) { - strm_req.last = 1; + __u32 orig_src_len = strm_req.src_len; + __u32 orig_dst_len = OUTPUT_CHUNK_V2; + + if (strm_req.op_type == WD_DIR_COMPRESS) { + if (is_real_last && total_avail_in <= INPUT_CHUNK_V2) { + strm_req.last = 1; + } } - int ret = wd_do_comp_strm(h_sess, &strm_req); - if (unlikely(ret < 0 || strm_req.status == WD_IN_EPARA )) { - US_ERR("wd_do_comp_strm, invaild or incomplete data! ret = %d, status = %d\n", ret, strm_req.status); - US_DEBUG("src_len : %u/%u, dst_len : %u/%u\n", - orig_src_len, strm_req.src_len, orig_dst_len, strm_req.dst_len); - return ret; + ret = wd_do_comp_strm(h_sess, &strm_req); + if (strm_req.status == WD_IN_EPARA || unlikely(ret < 0)) { + US_ERR("wd_do_comp_strm, invalid or incomplete data! ret = %d, status = %d\n", ret, strm_req.status); + US_DEBUG("src_len : %u/%u, dst_len : %u/%u\n\n", orig_src_len, strm_req.src_len, + orig_dst_len, strm_req.dst_len); + free(tmp_dst_buffer); + return Z_STREAM_ERROR; } if (strm_req.dst_len > total_avail_out) { - US_WARN("no more avail out space! need more %u Bytes!\n", kz_exdata->remain); - US_DEBUG("src_len : %u/%u, dst_len : %u/%u\n", - orig_src_len, strm_req.src_len, orig_dst_len, strm_req.dst_len); - kz_exdata->remain = strm_req.dst_len - total_avail_out; - kz_exdata->last_comp_in_len = strm_req.src_len; - kz_exdata->last_comp_out_len = strm_req.dst_len; - kz_exdata->chunk_total_in += strm_req.src_len; - break; + *borrowd_dst_len = strm_req.dst_len - total_avail_out; + total_avail_out = strm_req.dst_len; + US_ERR("no more avail out space! borrowed dst len is %u\n", *borrowd_dst_len); + US_DEBUG("src_len : %u/%u, dst_len : %u/%u\n\n", orig_src_len, strm_req.src_len, + orig_dst_len, strm_req.dst_len); } - kz_exdata->chunk_total_out += strm_req.dst_len; + *used_out += strm_req.dst_len; memcpy(req->dst, strm_req.dst, strm_req.dst_len); req->dst += strm_req.dst_len; total_avail_out -= strm_req.dst_len; - kz_exdata->chunk_total_in += strm_req.src_len; + *used_in += strm_req.src_len; strm_req.src += strm_req.src_len; total_avail_in -= strm_req.src_len; } while ((total_avail_in != 0) && (total_avail_out != 0)); + free(tmp_dst_buffer); req->status = strm_req.status; - return Z_OK; + return 0; } -static int wd_zlib_do_request_v2(z_streamp strm, int flush, enum wd_comp_op_type type) +static int kz_zlib_do_request_v2(z_streamp strm, int flush, enum wd_comp_op_type type) { - if (unlikely(!strm)) { - US_ERR("strm NULL!\n"); + if (unlikely(flush != Z_SYNC_FLUSH && flush != Z_NO_FLUSH && flush != Z_FINISH)) { + US_ERR("invalid: flush is %d!\n", flush); return Z_STREAM_ERROR; } - if (!wd_checkAndSet_remainData(strm, flush)) { - return Z_BUF_ERROR; - } - - flush = (flush == Z_NO_FLUSH ? Z_SYNC_FLUSH : flush); - if (unlikely(flush != Z_SYNC_FLUSH && flush != Z_FINISH)) { - US_ERR("invalid : flush is %d\n", flush); - return Z_STREAM_ERROR; - } - - unsigned int src_len = strm->avail_in; - unsigned int dst_len = strm->avail_out; + handle_t h_sess = strm->reserved; struct wd_comp_req req = {0}; - req.src = (void*)(strm->next_in + strm->total_in); - req.src_len = src_len; - req.dst = (void*)(strm->next_out + strm->total_out); - req.src_len = dst_len; - req.op_type = type; + + __u32 borrowed_dst_len = strm->adler; + __u32 src_len = strm->avail_in; + __u32 dst_len = strm->avail_out > borrowed_dst_len ? strm->avail_out - borrowed_dst_len : 0; + US_DEBUG("borrowed dst len is %u, avail_in is %u, avail_out is %u\n", + borrowed_dst_len, src_len, dst_len); + + req.src = (void*)(strm->next_in + strm->total_in); + req.src_len = src_len; + req.dst = (void*)(strm->next_out + strm->total_out); + req.dst_len = dst_len; + req.op_type = type; req.data_fmt = WD_FLAT_BUF; - req.last = (flush == Z_FINISH ? 1 : 0); - US_DEBUG("before %s, strm->avail_in = %u, strm->avail_out = %u, strm->total_in = %llu, strm->total_out = %llu, is_last_chunk = %u\n", - type ? "decompress" : "compress", strm->avail_in, strm->avail_out, strm->total_in, strm->total_out, req.last); - - /********************************/ - kaezip_exdata *kz_exdata = (kaezip_exdata*)strm->opaque; - memset(kz_exdata, 0, sizeof(kaezip_exdata)); - handle_t h_sess = strm->reserved; - int ret = wd_zlib_do_implement(h_sess, &req, kz_exdata); + req.last = (flush == Z_FINISH) ? 1 : 0; + + borrowed_dst_len = 0; + __u64 used_in; + __u64 used_out; + int ret = kz_zlib_do_comp_implement(h_sess, &req, &borrowed_dst_len, &used_in, &used_out); if (unlikely(ret)) { - US_ERR("failed to do %s, ret is %d\n", type ? "decompress" : "compress", ret); - return Z_STREAM_ERROR; + US_ERR("failed to do un/compress(%d)!\n", ret); + return ret; } - /********************************/ - - strm->avail_in = src_len - kz_exdata->chunk_total_in; - strm->avail_out = (kz_exdata->remain == 0) ? dst_len - kz_exdata->chunk_total_out : 0; - strm->total_in += kz_exdata->chunk_total_in; - strm->total_out += kz_exdata->chunk_total_out; - US_DEBUG("after %s, strm->avail_in = %u, strm->avail_out = %u, strm->total_in = %llu, strm->total_out = %llu, is_last_chunk = %u\n", - type ? "decompress" : "compress", strm->avail_in, strm->avail_out, strm->total_in, strm->total_out, req.last); - - if (type == WD_DIR_COMPRESS && flush == Z_FINISH && - kz_exdata->chunk_total_in == src_len && kz_exdata->remain == 0) { - ret = Z_STREAM_END; - } else if (type == WD_DIR_DECOMPRESS && req.status == WD_STREAM_END && kz_exdata->remain == 0) { + + strm->adler = borrowed_dst_len; + strm->avail_in = src_len - used_in; + strm->avail_out = (strm->adler == 0) ? dst_len - used_out : 0; + strm->total_in += used_in; + strm->total_out += used_out; + US_DEBUG("strm->total_in is %lu, strm->total_out is %lu\n\n", strm->total_in, strm->total_out); + if ((flush == Z_FINISH && used_in == src_len) || (req.status == 1)) { ret = Z_STREAM_END; } return ret; } -int wd_deflate_v2(z_streamp strm, int flush) +int kz_deflate_v2(z_streamp strm, int flush) { - return wd_zlib_do_request_v2(strm, flush, WD_DIR_COMPRESS); + return kz_zlib_do_request_v2(strm, flush, WD_DIR_COMPRESS); } -int wd_inflate_v2(z_streamp strm, int flush) +int kz_inflate_v2(z_streamp strm, int flush) { - return wd_zlib_do_request_v2(strm, flush, WD_DIR_DECOMPRESS); + return kz_zlib_do_request_v2(strm, flush, WD_DIR_DECOMPRESS); } diff --git a/KAEZlib/src/v2/kaezip_comp.h b/KAEZlib/src/v2/kaezip_comp.h index 0684f76..f99651b 100644 --- a/KAEZlib/src/v2/kaezip_comp.h +++ b/KAEZlib/src/v2/kaezip_comp.h @@ -7,19 +7,10 @@ #ifndef KAEZIP_COMP_H #define KAEZIP_COMP_H -#define INPUT_CHUNK_V2 (512 * 1024) +#define INPUT_CHUNK_V2 (1024 * 1024) #define OUTPUT_CHUNK_V2 (INPUT_CHUNK_V2 << 3) -typedef struct kaezip_exdata_s { - unsigned int remain; // 因avail_out不足需要借用的长度 - unsigned char output_buffer[OUTPUT_CHUNK_V2]; // 输出buffer - unsigned int last_comp_in_len; // 最后一次压缩的输入长度 - unsigned int last_comp_out_len; // 最后一次压缩的输出长度 - unsigned long chunk_total_in; // 上次一整块总共消耗的输入长度 - unsigned long chunk_total_out; // 上次一整块总共消耗的输出长度 -} kaezip_exdata; - -int wd_deflate_v2(z_streamp strm, int flush); -int wd_inflate_v2(z_streamp strm, int flush); +int kz_deflate_v2(z_streamp strm, int flush); +int kz_inflate_v2(z_streamp strm, int flush); #endif diff --git a/KAEZlib/src/v2/kaezip_init.c b/KAEZlib/src/v2/kaezip_init.c new file mode 100644 index 0000000..5f73dfb --- /dev/null +++ b/KAEZlib/src/v2/kaezip_init.c @@ -0,0 +1,265 @@ +/* === Dependencies === */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include + +#include "wd.h" +#include "wd_comp.h" +#include "wd_sched.h" +#include "wd_util.h" +#include "drv/wd_comp_drv.h" +#include "wd_zlibwrapper.h" +#include "kaezip_init.h" +#include "kaezip_log.h" + +#define max(a, b) ((a) > (b) ? (a) : (b)) + +enum kz_init_status { + WD_ZLIB_UNINIT, + WD_ZLIB_INIT, +}; + +struct kz_zlibwrapper_config { + int count; + int status; +}; + +static pthread_mutex_t kz_zlib_mutex = PTHREAD_MUTEX_INITIALIZER; +static struct kz_zlibwrapper_config zlib_config = {0}; + +static int kz_getzlib_device_num(void) +{ + int num = 0; + struct uacce_dev_list* zlib_list = wd_get_accel_list("zlib"); + if (zlib_list) { + struct uacce_dev_list* p = zlib_list; + do { + num++; + p = p->next; + } while (p); + } + US_INFO("zlib device num is %d\n", num); + return num; +} + +static void kz_zlib_unlock(void) +{ + pthread_mutex_unlock(&kz_zlib_mutex); + zlib_config.status = WD_ZLIB_UNINIT; +} + +static int kz_zlib_uadk_init(void) +{ + struct wd_ctx_nums *ctx_set_num; + struct wd_ctx_params cparams; + int ret, i; + + if (zlib_config.status == WD_ZLIB_INIT) + return 0; + + ctx_set_num = calloc(WD_DIR_MAX, sizeof(*ctx_set_num)); + if (!ctx_set_num) { + US_ERR("failed to alloc ctx_set_size!\n"); + return Z_MEM_ERROR; + } + + cparams.op_type_num = WD_DIR_MAX; + cparams.ctx_set_num = ctx_set_num; + cparams.bmp = numa_allocate_nodemask(); + if (!cparams.bmp) { + US_ERR("failed to create nodemask!\n"); + ret = Z_MEM_ERROR; + goto out_freectx; + } + + pid_t the_pid = getpid(); + int zlib_device_num = kz_getzlib_device_num(); + if (zlib_device_num == 0) { + US_ERR("no zlib device!\n"); + return Z_ERRNO; + } + numa_bitmask_setbit(cparams.bmp, the_pid % zlib_device_num); + + for (i = 0; i < WD_DIR_MAX; i++) + ctx_set_num[i].sync_ctx_num = WD_DIR_MAX; + + ret = wd_comp_init2_("zlib", 0, 0, &cparams); + if (ret) { + ret = Z_STREAM_ERROR; + goto out_freebmp; + } + + zlib_config.status = WD_ZLIB_INIT; + +out_freebmp: + numa_free_nodemask(cparams.bmp); + +out_freectx: + free(ctx_set_num); + + return ret; +} + +static void kz_zlib_uadk_uninit(void) +{ + wd_comp_uninit2(); + zlib_config.status = WD_ZLIB_UNINIT; +} + +static int kz_zlib_analy_alg(int windowbits, int *alg, int *windowsize) +{ + static const int ZLIB_MAX_WBITS = 15; + static const int ZLIB_MIN_WBITS = 8; + static const int GZIP_MAX_WBITS = 31; + static const int GZIP_MIN_WBITS = 24; + static const int DEFLATE_MAX_WBITS = -8; + static const int DEFLATE_MIN_WBITS = -15; + static const int WBINS_ZLIB_4K = 11; + static const int WBINS_GZIP_4K = 27; + static const int WBINS_DEFLATE_4K = -12; + + if ((windowbits >= ZLIB_MIN_WBITS) && (windowbits <= ZLIB_MAX_WBITS)) { + *alg = WD_ZLIB; + *windowsize = max(windowbits - WBINS_ZLIB_4K, WD_COMP_WS_4K); + } else if ((windowbits >= GZIP_MIN_WBITS) && (windowbits <= GZIP_MAX_WBITS)) { + *alg = WD_GZIP; + *windowsize = max(windowbits - WBINS_GZIP_4K, WD_COMP_WS_4K); + } else if ((windowbits >= DEFLATE_MIN_WBITS) && (windowbits <= DEFLATE_MAX_WBITS)) { + *alg = WD_DEFLATE; + *windowsize = max(windowbits - WBINS_DEFLATE_4K, WD_COMP_WS_4K); + } else { + return Z_STREAM_ERROR; + } + + return Z_OK; +} + +static int kz_zlib_alloc_sess(z_streamp strm, int level, int windowbits, enum wd_comp_op_type type) +{ + struct wd_comp_sess_setup setup = {0}; + struct sched_params sparams = {0}; + int windowsize, alg, ret; + handle_t h_sess; + + ret = kz_zlib_analy_alg(windowbits, &alg, &windowsize); + if (ret < 0) { + US_ERR("invalid: windowbits is %d!\n", windowbits); + return ret; + } + + setup.comp_lv = level; + setup.alg_type = alg; + setup.win_sz = windowsize; + setup.op_type = type; + sparams.type = type; + setup.sched_param = &sparams; + + h_sess = wd_comp_alloc_sess(&setup); + if (!h_sess) { + US_ERR("failed to alloc comp sess!\n"); + return Z_STREAM_ERROR; + } + strm->reserved = (__u64)h_sess; + + return Z_OK; +} + +static void kz_zlib_free_sess(z_streamp strm) +{ + wd_comp_free_sess((handle_t)strm->reserved); +} + +static int kz_zlib_init(z_streamp strm, int level, int windowbits, enum wd_comp_op_type type) +{ + int ret; + + if (unlikely(!strm)) + return Z_STREAM_ERROR; + + pthread_mutex_lock(&kz_zlib_mutex); + ret = kz_zlib_uadk_init(); + if (unlikely(ret < 0)) + goto out_unlock; + + strm->total_in = 0; + strm->total_out = 0; + + ret = kz_zlib_alloc_sess(strm, level, windowbits, type); + if (unlikely(ret < 0)) + goto out_uninit; + + __atomic_add_fetch(&zlib_config.count, 1, __ATOMIC_RELAXED); + pthread_mutex_unlock(&kz_zlib_mutex); + + return Z_OK; + +out_uninit: + kz_zlib_uadk_uninit(); + +out_unlock: + pthread_mutex_unlock(&kz_zlib_mutex); + + return ret; +} + +static int kz_zlib_uninit(z_streamp strm) +{ + int ret; + + if (unlikely(!strm)) + return Z_STREAM_ERROR; + + kz_zlib_free_sess(strm); + + pthread_mutex_lock(&kz_zlib_mutex); + + ret = __atomic_sub_fetch(&zlib_config.count, 1, __ATOMIC_RELAXED); + if (ret != 0) + goto out_unlock; + + kz_zlib_uadk_uninit(); + +out_unlock: + pthread_mutex_unlock(&kz_zlib_mutex); + + return Z_OK; +} + +/* === Compression === */ +int kz_deflate_init(z_streamp strm, int level, int windowbits) +{ + pthread_atfork(NULL, NULL, kz_zlib_unlock); + return kz_zlib_init(strm, level, windowbits, WD_DIR_COMPRESS); +} + +int kz_deflate_reset(z_streamp strm) +{ + return wd_deflate_reset(strm); +} + +int kz_deflate_end(z_streamp strm) +{ + return kz_zlib_uninit(strm); +} + +/* === Decompression === */ +int kz_inflate_init(z_streamp strm, int windowbits) +{ + pthread_atfork(NULL, NULL, kz_zlib_unlock); + return kz_zlib_init(strm, 0, windowbits, WD_DIR_DECOMPRESS); +} + +int kz_inflate_reset(z_streamp strm) +{ + return wd_inflate_reset(strm); +} + +int kz_inflate_end(z_streamp strm) +{ + return kz_zlib_uninit(strm); +} diff --git a/KAEZlib/src/v2/kaezip_init.h b/KAEZlib/src/v2/kaezip_init.h new file mode 100644 index 0000000..ae20482 --- /dev/null +++ b/KAEZlib/src/v2/kaezip_init.h @@ -0,0 +1,12 @@ +#ifndef KAEZIP_INIT_H +#define KAEZIP_INIT_H + +int kz_deflate_init(z_streamp strm, int level, int windowbits); +int kz_deflate_reset(z_streamp strm); +int kz_deflate_end(z_streamp strm); + +int kz_inflate_init(z_streamp strm, int windowbits); +int kz_inflate_reset(z_streamp strm); +int kz_inflate_end(z_streamp strm); + +#endif \ No newline at end of file diff --git a/KAEZlib/test/kaezip_perf.c b/KAEZlib/test/kaezip_perf.c index f042492..18711a2 100644 --- a/KAEZlib/test/kaezip_perf.c +++ b/KAEZlib/test/kaezip_perf.c @@ -57,10 +57,10 @@ uint32_t *get_decompress_input(size_t input_sz, uLong *pblen) return outbuf; } -int do_multi_perf(int multi, int stream_len, int loop_times, int compress, +int do_multi_perf(int multi, int stream_len, int loop_times, int windowBits, int compress, void* output, uLong output_sz, void* inbuf, uLong blen) { - int i,j; + int i, j, err; pid_t pid_child = 0; struct timeval start, stop; gettimeofday(&start, NULL); @@ -72,24 +72,55 @@ int do_multi_perf(int multi, int stream_len, int loop_times, int compress, } if (pid_child == 0) { - for(j = 0;j < loop_times;j++) - { + z_stream strm; + strm.zalloc = (alloc_func)0; + strm.zfree = (free_func)0; + strm.opaque = (voidpf)0; + strm.next_in = (z_const Bytef*) inbuf; + strm.next_out = output; + if (compress) { + (void)deflateInit2_(&strm, 1, Z_DEFLATED, windowBits, 0, Z_DEFAULT_STRATEGY, NULL, 0); + } else { + (void)inflateInit2_(&strm, windowBits, NULL, 0); + } + + for (j = 0; j < loop_times; j++) { int ret = -1; if (compress) { blen = compressBound(stream_len); - ret = compress2((Bytef *)output, (uLongf *)&blen, (Bytef *)inbuf, (uLong)stream_len, 1); + // ret = compress2((Bytef *)output, (uLongf *)&blen, (Bytef *)inbuf, (uLong)stream_len, 1); + /***********************************************/ + strm.avail_in = stream_len; + strm.avail_out = blen; + err = deflate(&strm, Z_FINISH); + ret = (err == Z_STREAM_END ? Z_OK : err); + deflateReset(&strm); + /***********************************************/ if (ret != Z_OK && ret != Z_BUF_ERROR) { printf("compres error, ret = %d\n", ret); return -1; } } else { - ret = uncompress((Bytef *)output, &output_sz, (const Bytef *)inbuf, blen); + // ret = uncompress((Bytef *)output, &output_sz, (const Bytef *)inbuf, blen); + /***********************************************/ + strm.avail_in = blen; + strm.avail_out = output_sz; + err = inflate(&strm, Z_FINISH); + ret = (err == Z_STREAM_END ? Z_OK : err); + inflateReset(&strm); + /***********************************************/ if (ret < 0) { printf("uncompres error, ret = %d\n", ret); return -1; } } } + + if (compress) { + (void)deflateEnd(&strm); + } else { + (void)inflateEnd(&strm); + } } if (pid_child > 0) { @@ -109,7 +140,7 @@ int do_multi_perf(int multi, int stream_len, int loop_times, int compress, if (multi == 0) { multi = 1; } gettimeofday(&stop, NULL); uLong time1 = (stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec - start.tv_usec; - float speed1 = 1000000.0 / time1 * loop_times * multi * stream_len / 1000 / 1000 / 1000; + float speed1 = 1000000.0 / time1 * loop_times * multi * stream_len / (1 << 30); printf("kaezip %s perf result:\n", compress ? "compress" : "decompress"); printf(" time used: %lu us, speed = %.3f GB/s\n", time1, speed1); } @@ -117,9 +148,8 @@ int do_multi_perf(int multi, int stream_len, int loop_times, int compress, return 0; } -int do_compress_perf(int multi, int stream_len, int loop_times) +int do_compress_perf(int multi, int stream_len, int loop_times, int windowBits) { - int i = 0; uint8_t *inbuf = get_compress_input(stream_len); if (inbuf == NULL) { return -1; @@ -133,7 +163,7 @@ int do_compress_perf(int multi, int stream_len, int loop_times) } memset(outbuf, 0, output_sz); - int ret = do_multi_perf(multi, stream_len, loop_times, 1, outbuf, output_sz, inbuf, blen); + int ret = do_multi_perf(multi, stream_len, loop_times, windowBits, 1, outbuf, output_sz, inbuf, blen); free(inbuf); inbuf = NULL; @@ -142,9 +172,8 @@ int do_compress_perf(int multi, int stream_len, int loop_times) return ret; } -int do_decompress_perf(int multi, int stream_len, int loop_times) +int do_decompress_perf(int multi, int stream_len, int loop_times, int windowBits) { - int i, j; uLong blen = 0; uint32_t *inbuf = get_decompress_input(stream_len, &blen); if (inbuf == NULL) { @@ -157,7 +186,7 @@ int do_decompress_perf(int multi, int stream_len, int loop_times) return -1; } - int ret = do_multi_perf(multi, stream_len, loop_times, 0, output, output_sz, inbuf, blen); + int ret = do_multi_perf(multi, stream_len, loop_times, windowBits, 0, output, output_sz, inbuf, blen); free(inbuf); inbuf = NULL; @@ -173,6 +202,7 @@ void usage(void) printf(" -l: stream length(KB)\n"); printf(" -n: loop times\n"); printf(" -d: compress or decompress\n"); + printf(" -w: windowBits\n"); printf(" example: ./kaezip_perf -m 2 -l 1024 -n 1000\n"); printf(" ./kaezip_perf -d -m 2 -l 1024 -n 1000\n"); } @@ -199,11 +229,12 @@ void usage(void) int main(int argc, char **argv) { int o = 0; - const char *optstring = "dm:l:n:h"; + const char *optstring = "dm:l:n:w:h"; int multi = 2; int stream_len = 1024; int loop_times = 1000; int compress = 1; + int windowBits = 8; while ((o = getopt(argc, argv, optstring)) != -1) { if(optstring == NULL) continue; switch (o) { @@ -216,6 +247,9 @@ int main(int argc, char **argv) case 'n': loop_times = atoi(optarg); break; + case 'w': + windowBits = atoi(optarg); + break; case 'd': compress = 0; break; @@ -230,12 +264,13 @@ int main(int argc, char **argv) printf("\ndefault input parameter used\n"); } - printf("kaezip perf parameter: multi process %d, stream length: %d(KB), loop times: %d\n", multi, stream_len, loop_times); + printf("kaezip perf parameter: multi process %d, stream length: %d(KB), loop times: %d, windowBits : %d\n", + multi, stream_len, loop_times, windowBits); - stream_len = 1000 * stream_len; + stream_len = 1024 * stream_len; if (compress) { - return do_compress_perf(multi, stream_len, loop_times); + return do_compress_perf(multi, stream_len, loop_times, windowBits); } else { - return do_decompress_perf(multi, stream_len, loop_times); + return do_decompress_perf(multi, stream_len, loop_times, windowBits); } } diff --git a/KAEZlib/test/makefile b/KAEZlib/test/makefile index baaaf36..d250764 100644 --- a/KAEZlib/test/makefile +++ b/KAEZlib/test/makefile @@ -11,7 +11,7 @@ all: kaezip_perf objects = kaezip_perf.c kaezip_perf : $(objects) - gcc $(CFLAGS) $(LDFLAGS) $(INCDIR) -o kaezip_perf $(objects) + gcc $(CFLAGS) $(LDFLAGS) $(INCDIR) -Wl,-rpath=/usr/local/kaezip/lib -o kaezip_perf $(objects) .PHONY : clean clean : diff --git a/uadk/Makefile.am b/uadk/Makefile.am index f04164f..70d9f2b 100644 --- a/uadk/Makefile.am +++ b/uadk/Makefile.am @@ -36,7 +36,7 @@ pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \ include/wd_ecc.h include/wd_sched.h include/wd_alg.h nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ - v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h + v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h v1/wd_comp.h lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la libhisi_zip.la \ libhisi_hpre.la libhisi_sec.la -- Gitee