1 Star 0 Fork 83

zhaosai/openjdk-1.8.0

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
implementation_of_Blas_hotspot_function_in_Intrinsics.patch 63.82 KB
一键复制 编辑 原始数据 按行查看 历史
kuen 提交于 2022-07-28 20:52 +08:00 . I5JGAF: fix coding style and describe error
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626
diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
index 7080ea10d..62a8ab7bd 100644
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
@@ -919,6 +919,126 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
}
}
+void LIRGenerator::do_dgemm_dgemm(Intrinsic* x) {
+ assert(x->number_of_arguments() == 16, "wrong type");
+
+ LIRItem ta(x->argument_at(0), this);
+ LIRItem tb(x->argument_at(1), this);
+ LIRItem m(x->argument_at(2), this);
+ LIRItem n(x->argument_at(3), this);
+ LIRItem k(x->argument_at(4), this);
+ LIRItem alpha(x->argument_at(5), this);
+ LIRItem a(x->argument_at(6), this);
+ LIRItem a_offset(x->argument_at(7), this);
+ LIRItem lda(x->argument_at(8), this);
+ LIRItem b(x->argument_at(9), this);
+ LIRItem b_offset(x->argument_at(10), this);
+ LIRItem ldb(x->argument_at(11), this);
+ LIRItem beta(x->argument_at(12), this);
+ LIRItem c(x->argument_at(13), this);
+ LIRItem c_offset(x->argument_at(14), this);
+ LIRItem ldc(x->argument_at(15), this);
+
+ ta.load_item();
+ tb.load_item();
+ m.load_item();
+ n.load_item();
+ k.load_item();
+ alpha.load_item();
+ a.load_item();
+ a_offset.load_nonconstant();
+ lda.load_item();
+ b.load_item();
+ b_offset.load_nonconstant();
+ ldb.load_item();
+ beta.load_item();
+ c.load_item();
+ c_offset.load_nonconstant();
+ ldc.load_item();
+
+ LIR_Opr ta_base = ta.result();
+ LIR_Opr tb_base = tb.result();
+ LIR_Opr r_m = m.result();
+ LIR_Opr r_n = n.result();
+ LIR_Opr r_k = k.result();
+ LIR_Opr r_alpha = alpha.result();
+ LIR_Opr a_base = a.result();
+ LIR_Opr r_a_offset = a_offset.result();
+ LIR_Opr r_lda = lda.result();
+ LIR_Opr b_base = b.result();
+ LIR_Opr r_b_offset = b_offset.result();
+ LIR_Opr r_ldb = ldb.result();
+ LIR_Opr r_beta = beta.result();
+ LIR_Opr c_base = c.result();
+ LIR_Opr r_c_offset = c_offset.result();
+ LIR_Opr r_ldc = ldc.result();
+
+ LIR_Opr ta_value = load_String_value(ta_base);
+ LIR_Opr ta_offset = load_String_offset(ta_base);
+ LIR_Opr tb_value = load_String_value(tb_base);
+ LIR_Opr tb_offset = load_String_offset(tb_base);
+
+ LIR_Address* addr_ta = emit_array_address(ta_value, ta_offset, T_CHAR, false);
+ LIR_Address* addr_tb = emit_array_address(tb_value, tb_offset, T_CHAR, false);
+ LIR_Address* addr_a = emit_array_address(a_base, r_a_offset, T_DOUBLE, false);
+ LIR_Address* addr_b = emit_array_address(b_base, r_b_offset, T_DOUBLE, false);
+ LIR_Address* addr_c = emit_array_address(c_base, r_c_offset, T_DOUBLE, false);
+
+ LIR_Opr tmp = new_pointer_register();
+ LIR_Opr ta_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_ta), tmp);
+ __ move(tmp, ta_addr);
+ tmp = new_pointer_register();
+ LIR_Opr tb_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_tb), tmp);
+ __ move(tmp, tb_addr);
+ tmp = new_pointer_register();
+ LIR_Opr a_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_a), tmp);
+ __ move(tmp, a_addr);
+ tmp = new_pointer_register();
+ LIR_Opr b_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_b), tmp);
+ __ move(tmp, b_addr);
+ tmp = new_pointer_register();
+ LIR_Opr c_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_c), tmp);
+ __ move(tmp, c_addr);
+
+ BasicTypeList signature(13);
+ signature.append(T_ADDRESS);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ signature.append(T_INT);
+ signature.append(T_INT);
+ signature.append(T_DOUBLE);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ signature.append(T_DOUBLE);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+
+ LIR_OprList* args = new LIR_OprList();
+ args->append(ta_addr);
+ args->append(tb_addr);
+ args->append(r_m);
+ args->append(r_n);
+ args->append(r_k);
+ args->append(r_alpha);
+ args->append(a_addr);
+ args->append(r_lda);
+ args->append(b_addr);
+ args->append(r_ldb);
+ args->append(r_beta);
+ args->append(c_addr);
+ args->append(r_ldc);
+
+ assert(StubRoutines::dgemmDgemm() != NULL, "invalid stub entry");
+ call_runtime(&signature, args, StubRoutines::dgemmDgemm(), voidType, NULL);
+ set_no_result(x);
+}
void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
assert(x->number_of_arguments() == 5, "wrong type");
@@ -1038,6 +1158,114 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
}
}
+void LIRGenerator::do_dgemv_dgemv(Intrinsic* x) {
+ assert(x->number_of_arguments() == 14, "wrong type");
+
+ LIRItem trans(x->argument_at(0), this);
+ LIRItem m(x->argument_at(1), this);
+ LIRItem n(x->argument_at(2), this);
+ LIRItem alpha(x->argument_at(3), this);
+ LIRItem array_a(x->argument_at(4), this);
+ LIRItem array_a_offset(x->argument_at(5), this);
+ LIRItem lda(x->argument_at(6), this);
+ LIRItem array_x(x->argument_at(7), this);
+ LIRItem array_x_offset(x->argument_at(8), this);
+ LIRItem incx(x->argument_at(9), this);
+ LIRItem beta(x->argument_at(10), this);
+ LIRItem array_y(x->argument_at(11), this);
+ LIRItem array_y_offset(x->argument_at(12), this);
+ LIRItem incy(x->argument_at(13), this);
+
+ trans.load_item();
+ m.load_item();
+ n.load_item();
+ alpha.load_item();
+ array_a.load_item();
+ array_a_offset.load_nonconstant();
+ lda.load_item();
+ array_x.load_item();
+ array_x_offset.load_nonconstant();
+ incx.load_item();
+ beta.load_item();
+ array_y.load_item();
+ array_y_offset.load_nonconstant();
+ incy.load_item();
+
+ LIR_Opr res_trans_base = trans.result();
+ LIR_Opr res_m = m.result();
+ LIR_Opr res_n = n.result();
+ LIR_Opr res_alpha = alpha.result();
+ LIR_Opr res_a_base = array_a.result();
+ LIR_Opr res_a_offset = array_a_offset.result();
+ LIR_Opr res_lda = lda.result();
+ LIR_Opr res_x_base = array_x.result();
+ LIR_Opr res_x_offset = array_x_offset.result();
+ LIR_Opr res_incx = incx.result();
+ LIR_Opr res_beta = beta.result();
+ LIR_Opr res_y_base = array_y.result();
+ LIR_Opr res_y_offset = array_y_offset.result();
+ LIR_Opr res_incy = incy.result();
+
+ LIR_Opr addr_trans_base = LIRGenerator::load_String_value(res_trans_base);
+ LIR_Opr addr_trans_offset = LIRGenerator::load_String_offset(res_trans_base);
+ LIR_Address* addr_trans = emit_array_address(addr_trans_base, addr_trans_offset, T_CHAR, false);
+
+ LIR_Address* addr_a = emit_array_address(res_a_base, res_a_offset, T_DOUBLE, false);
+ LIR_Address* addr_x = emit_array_address(res_x_base, res_x_offset, T_DOUBLE, false);
+ LIR_Address* addr_y = emit_array_address(res_y_base, res_y_offset, T_DOUBLE, false);
+
+ // load addr to register
+ LIR_Opr tmp = new_pointer_register();
+ LIR_Opr trans_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_trans), tmp);
+ __ move(tmp, trans_addr);
+
+ LIR_Opr tmp1 = new_pointer_register();
+ LIR_Opr a_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_a), tmp1);
+ __ move(tmp1, a_addr);
+
+ LIR_Opr tmp2 = new_pointer_register();
+ LIR_Opr x_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_x), tmp2);
+ __ move(tmp2, x_addr);
+
+ LIR_Opr tmp3 = new_pointer_register();
+ LIR_Opr y_addr = new_register(T_ADDRESS);
+ __ leal(LIR_OprFact::address(addr_y), tmp3);
+ __ move(tmp3, y_addr);
+
+ BasicTypeList signature(11);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ signature.append(T_INT);
+ signature.append(T_DOUBLE);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+ signature.append(T_DOUBLE);
+ signature.append(T_ADDRESS);
+ signature.append(T_INT);
+
+ LIR_OprList* args = new LIR_OprList();
+ args->append(trans_addr);
+ args->append(res_m);
+ args->append(res_n);
+ args->append(res_alpha);
+ args->append(a_addr);
+ args->append(res_lda);
+ args->append(x_addr);
+ args->append(res_incx);
+ args->append(res_beta);
+ args->append(y_addr);
+ args->append(res_incy);
+
+ assert(StubRoutines::dgemvDgemv() != NULL, "invalid stub entry");
+ call_runtime(&signature, args, StubRoutines::dgemvDgemv(), voidType, NULL);
+ set_no_result(x);
+}
+
// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
// _i2b, _i2c, _i2s
void LIRGenerator::do_Convert(Convert* x) {
diff --git a/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp
index c0aaa1de4..a275a6a99 100644
--- a/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp
@@ -50,6 +50,11 @@ void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpa
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
void lock_method(void);
void generate_stack_overflow_check(void);
+ void load_String_value(Register src, Register dst);
+ void load_String_offset(Register src, Register dst);
+ void emit_array_address(Register src, Register idx, Register dst, BasicType type);
+ address generate_Dgemm_dgemm_entry();
+ address generate_Dgemv_dgemv_entry();
void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
void generate_counter_overflow(Label* do_continue);
diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
index c5ec637a1..125983179 100644
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
@@ -3221,6 +3221,44 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address load_BLAS_library() {
+ // Try to load BLAS library.
+ const char library_name[] = "openblas";
+ char err_buf[1024] = {0};
+ char path[JVM_MAXPATHLEN] = {0};
+ os::jvm_path(path, sizeof(path));
+ int jvm_offset = -1;
+
+ // Match "jvm[^/]*" in jvm_path.
+ const char* last_name = strrchr(path, '/');
+ last_name = last_name ? last_name : path;
+ const char* last_lib_name = strstr(last_name, "jvm");
+ if (last_lib_name != NULL) {
+ jvm_offset = last_lib_name - path;
+ }
+
+ address library = NULL;
+ // Find the BLAS shared library.
+ // Search path: <home>/jre/lib/<arch>/<vm>/libopenblas.so
+ if (jvm_offset >= 0) {
+ if (jvm_offset + strlen(library_name) + strlen(os::dll_file_extension()) < JVM_MAXPATHLEN) {
+ strncpy(&path[jvm_offset], library_name, JVM_MAXPATHLEN - jvm_offset);
+ strncat(path, os::dll_file_extension(), strlen(os::dll_file_extension()));
+ library = (address)os::dll_load(path, err_buf, sizeof(err_buf));
+ }
+ }
+ return library;
+ }
+
+ address get_BLAS_func_entry(address library, const char* func_name) {
+ if (library == NULL) {
+ return NULL;
+ }
+
+ // Try to find BLAS function entry.
+ return (address)os::dll_lookup((void*)library, func_name);
+ }
+
/**
* Arguments:
*
@@ -3254,6 +3292,218 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // Parameter conversion from JVM to native BLAS
+ //
+ // Register:
+ // r0: transa r0: transa
+ // r1: transb r1: transb
+ // r2: m r2: &m
+ // r3: n r3: &n
+ // r4: k =========> r4: &k
+ // r5: A r5: &alpha
+ // r6: lda r6: A
+ // r7: B r7: &lda
+ // v0: alpha
+ // v1: beta
+ //
+ // Stack:
+ // |-------| |-------|
+ // | ldc | | ldc |
+ // |-------| |-------|
+ // | C | | C |
+ // |-------| |-------|
+ // | ldb | | ldb |
+ // |-------| <-- sp |-------|
+ // | | | m |
+ // |-------| |-------|
+ // | | | n |
+ // |-------| |-------|
+ // | | | k |
+ // |-------| |-------|
+ // | | | lda |
+ // |-------| |-------|
+ // | | | alpha |
+ // |-------| |-------|
+ // | | | beta |
+ // |-------| =========> |-------|
+ // | | | lr |
+ // |-------| |-------|
+ // | | | rfp |
+ // |-------| |-------| <-- fp
+ // | ... | | ... |
+ // |-------| |-------|
+ // | | | &ldc |
+ // |-------| |-------|
+ // | | | C |
+ // |-------| |-------|
+ // | | | &bata |
+ // |-------| |-------|
+ // | | | &ldb |
+ // |-------| |-------|
+ // | | | B |
+ // |-------| |-------| <-- sp
+ address generate_dgemmDgemm(address library) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "dgemm_dgemm");
+
+ address fn = get_BLAS_func_entry(library, "dgemm_");
+ if (fn == NULL) return NULL;
+
+ address start = __ pc();
+
+ const Register transa = c_rarg0;
+ const Register transb = c_rarg1;
+ const Register m = c_rarg2;
+ const Register n = c_rarg3;
+ const Register k = c_rarg4;
+ const FloatRegister alpha = c_farg0;
+ const Register A = c_rarg5;
+ const Register lda = c_rarg6;
+ const Register B = c_rarg7;
+ const FloatRegister beta = c_farg1;
+
+ BLOCK_COMMENT("Entry:");
+
+ // extend stack
+ __ sub(sp, sp, 0x60);
+ __ stp(rfp, lr, Address(sp, 48));
+ __ add(rfp, sp, 0x30);
+ // load BLAS function entry
+ __ mov(rscratch1, fn);
+ // C
+ __ ldr(rscratch2, Address(rfp, 56));
+ // store m / n to stack
+ __ stpw(n, m, Address(rfp, 40));
+ // &beta
+ __ add(r2, rfp, 0x10);
+ // store k / lda to stack
+ __ stpw(lda, k, Address(rfp, 32));
+ // load ldc
+ __ add(r3, rfp, 0x40);
+ // store C / &beta
+ __ stp(r2, rscratch2, Address(sp, 16));
+ // &ldb
+ __ add(r2, rfp, 0x30);
+ // store B
+ __ str(B, Address(sp));
+ // move A from r5 to r6
+ __ mov(r6, A);
+ // store ldc
+ __ str(r3, Address(sp, 32));
+ // &alpha
+ __ add(r5, rfp, 0x18);
+ // store &ldb
+ __ str(r2, Address(sp, 8));
+ // &k
+ __ add(r4, rfp, 0x24);
+ // store alpha / beta
+ __ stpd(beta, alpha, Address(rfp, 16));
+ // load &lda to r7
+ __ add(r7, rfp, 0x20);
+ // load &n
+ __ add(r3, rfp, 0x28);
+ // load &m
+ __ add(r2, rfp, 0x2c);
+ // call dgemm
+ __ blr(rscratch1);
+
+ // restore rfp and lr
+ __ ldp(rfp, lr, Address(sp, 48));
+ // exit stack
+ __ add(sp, sp, 0x60);
+ __ ret(lr);
+
+ return start;
+ }
+
+ /**
+ * public void dgemv(String trans, int m, int n,
+ * double alpha, double[] a, int lda,
+ * double[] x, int incx,
+ * double beta, double[] y, int incy)
+ *
+ * Arguments:
+ *
+ * Inputs:
+ * c_rarg0 - char* trans
+ * c_rarg1 - int m
+ * c_rarg2 - int n
+ * d0/c_farg0 - double alpha
+ * c_rarg3 - double[] a
+ * c_rarg4 - int lda
+ * c_rarg5 - double[] x
+ * c_rarg6 - int incx
+ * d1/c_farg1 - double beta
+ * c_rarg7 - double[] y
+ * [sp] - int incy
+ *
+ * Output:
+ * null
+ *
+ */
+
+ address generate_dgemvDgemv(address library) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "dgemv_dgemv");
+
+ address fn = get_BLAS_func_entry(library, "dgemv_");
+ if (fn == NULL) return NULL;
+
+ address start = __ pc();
+ BLOCK_COMMENT("Entry: ");
+
+ Register trans = c_rarg0;
+ Register m = c_rarg1;
+ Register n = c_rarg2;
+ Register a = c_rarg3;
+ Register lda = c_rarg4;
+ Register x = c_rarg5;
+ Register incx = c_rarg6;
+ Register y = c_rarg7;
+
+ FloatRegister alpha = c_farg0;
+ FloatRegister beta = c_farg1;
+
+ __ sub(sp, sp, 0x50);
+ __ stp(rfp, lr, Address(sp, 32));
+ __ add(rfp, sp, 0x20);
+
+ // no need for saving trans to tmp register, keep it in register x0
+ __ strw(m, Address(rfp, 44));
+ __ strw(n, Address(rfp, 40));
+ __ strd(alpha, Address(rfp, 32));
+ __ strw(lda, Address(rfp, 28));
+ __ strw(incx, Address(rfp, 24));
+ __ strd(beta, Address(rfp, 16));
+
+ // pre call
+ // load incy and push on stack, order incy --> y --> beta
+ __ add(r1, rfp, 0x30);
+ __ str(r1, Address(sp, 16));
+ __ str(y, Address(sp, 8));
+ __ add(r1, rfp, 0x10);
+ __ str(r1, Address(sp));
+
+ __ add(r7, rfp, 0x18);
+ __ mov(r6, x);
+ __ add(r5, rfp, 0x1c);
+ __ mov(r4, a);
+ __ add(r3, rfp, 0x20);
+ __ add(r2, rfp, 0x28);
+ __ add(r1, rfp, 0x2c);
+
+ __ mov(rscratch1, fn);
+ __ blr(rscratch1);
+
+ __ ldp(rfp, lr, Address(sp, 32));
+ __ add(sp, sp, 0x50);
+ __ ret(lr);
+
+ return start;
+ }
+
+
+
/**
* Arguments:
*
@@ -4252,6 +4502,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc_table_adr = (address)StubRoutines::aarch64::_crc_table;
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
}
+
+ if (UseF2jBLASIntrinsics) {
+ StubRoutines::_BLAS_library = load_BLAS_library();
+ // F2jBLAS intrinsics will use the implements in BLAS dynamic library
+ StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS();
+ StubRoutines::_dgemmDgemm = generate_dgemmDgemm(StubRoutines::_BLAS_library);
+ StubRoutines::_dgemvDgemv = generate_dgemvDgemv(StubRoutines::_BLAS_library);
+ }
}
void generate_all() {
@@ -4296,10 +4554,6 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_montgomerySquare = g.generate_multiply();
}
- if (UseF2jBLASIntrinsics) {
- StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS();
- }
-
if (UseAESIntrinsics) {
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
diff --git a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
index ae5cb3f32..924b6670f 100644
--- a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
@@ -856,6 +856,250 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
return generate_native_entry(false);
}
+// Access the char-array of String
+void InterpreterGenerator::load_String_value(Register src, Register dst) {
+ // Need to cooperate with JDK-8243996
+ int value_offset = java_lang_String::value_offset_in_bytes();
+
+ __ add(src, src, value_offset);
+ __ load_heap_oop(dst, Address(src));
+}
+
+void InterpreterGenerator::load_String_offset(Register src, Register dst) {
+ __ mov(dst, 0);
+
+ // Get String value offset, because of order of initialization for Interpreter,
+ // we have to hardcode the offset for String value. (JDK-8243996)
+ if (java_lang_String::has_offset_field()) {
+ int offset_offset = java_lang_String::offset_offset_in_bytes();
+ __ add(src, src, offset_offset);
+ __ ldrw(dst, Address(src));
+ }
+}
+
+void InterpreterGenerator::emit_array_address(Register src, Register idx,
+ Register dst, BasicType type) {
+ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
+ int elem_size = type2aelembytes(type);
+ int shift = exact_log2(elem_size);
+
+ __ lsl(idx, idx, shift);
+ __ add(idx, idx, offset_in_bytes);
+ __ add(dst, src, idx);
+}
+
+/**
+ * Stub Arguments:
+ *
+ * c_rarg0 - char* transa
+ * c_rarg1 - char* transb
+ * c_rarg2 - int m
+ * c_rarg3 - int n
+ * c_rarg4 - int k
+ * d0 - double alpha
+ * c_rarg5 - double[] A
+ * c_rarg6 - int lda
+ * c_rarg7 - double[] B
+ * d1 - double beta
+ * [sp + 16] - int ldc
+ * [sp + 8] - double[] C
+ * [sp] - int ldb
+ *
+ */
+address InterpreterGenerator::generate_Dgemm_dgemm_entry() {
+ if (!UseF2jBLASIntrinsics || (StubRoutines::dgemmDgemm() == NULL)) return NULL;
+
+ address entry = __ pc();
+
+ // r13: senderSP must preserved for slow path
+
+ // Arguments are reversed on java expression stack
+ const Register ta = c_rarg0;
+ const Register tb = c_rarg1;
+ const Register m = c_rarg2;
+ const Register n = c_rarg3;
+ const Register k = c_rarg4;
+ const FloatRegister alpha = c_farg0;
+ const Register A = c_rarg5;
+ const Register lda = c_rarg6;
+ const Register B = c_rarg7;
+ const FloatRegister beta = c_farg1;
+ const Register tmp1 = rscratch1;
+ const Register tmp2 = rscratch2;
+
+ // trana
+ __ ldr(ta, Address(esp, 17 * wordSize));
+ load_String_value(ta, tmp1);
+ load_String_offset(ta, tmp2);
+ emit_array_address(tmp1, tmp2, ta, T_CHAR);
+ // tranb
+ __ ldr(tb, Address(esp, 16 * wordSize));
+ load_String_value(tb, tmp1);
+ load_String_offset(tb, tmp2);
+ emit_array_address(tmp1, tmp2, tb, T_CHAR);
+ // m, n, k
+ __ ldrw(m, Address(esp, 15 * wordSize));
+ __ ldrw(n, Address(esp, 14 * wordSize));
+ __ ldrw(k, Address(esp, 13 * wordSize));
+ // alpha
+ __ ldrd(alpha, Address(esp, 11 * wordSize));
+ // A
+ __ ldr(tmp1, Address(esp, 10 * wordSize));
+ __ mov(tmp2, 0);
+ __ ldrw(tmp2, Address(esp, 9 * wordSize));
+ emit_array_address(tmp1, tmp2, A, T_DOUBLE);
+ // lda
+ __ ldrw(lda, Address(esp, 8 * wordSize));
+ // B
+ __ ldr(tmp1, Address(esp, 7 * wordSize));
+ __ ldrw(tmp2, Address(esp, 6 * wordSize));
+ emit_array_address(tmp1, tmp2, B, T_DOUBLE);
+ // beta
+ __ ldrd(beta, Address(esp, 3 * wordSize));
+ // Start pushing arguments to machine stack.
+ //
+ // Remove the incoming args, peeling the machine SP back to where it
+ // was in the caller. This is not strictly necessary, but unless we
+ // do so the stack frame may have a garbage FP; this ensures a
+ // correct call stack that we can always unwind. The ANDR should be
+ // unnecessary because the sender SP in r13 is always aligned, but
+ // it doesn't hurt.
+ __ andr(sp, r13, -16);
+ __ str(lr, Address(sp, -wordSize));
+ // ldc
+ __ ldrw(tmp1, Address(esp, 0x0));
+ __ strw(tmp1, Address(sp, 2 * -wordSize));
+ // C
+ __ ldr(tmp1, Address(esp, 2 * wordSize));
+ __ ldrw(tmp2, Address(esp, wordSize));
+ emit_array_address(tmp1, tmp2, tmp1, T_DOUBLE);
+ __ str(tmp1, Address(sp, 3 * -wordSize));
+ // ldb
+ __ ldrw(tmp2, Address(esp, 5 * wordSize));
+ __ strw(tmp2, Address(sp, 4 * -wordSize));
+
+ // Call function
+ __ add(sp, sp, 4 * -wordSize);
+ address fn = CAST_FROM_FN_PTR(address, StubRoutines::dgemmDgemm());
+ __ mov(tmp1, fn);
+ __ blr(tmp1);
+
+ __ ldr(lr, Address(sp, 3 * wordSize));
+ // For assert(Rd != sp || imm % 16 == 0)
+ __ add(sp, sp, 4 * wordSize);
+ __ br(lr);
+
+ return entry;
+}
+
+address InterpreterGenerator::generate_Dgemv_dgemv_entry() {
+ if (StubRoutines::dgemvDgemv() == NULL) return NULL;
+ address entry = __ pc();
+
+ const Register trans = c_rarg0; // trans
+ const Register m = c_rarg1; // m
+ const Register n = c_rarg2; // n
+ const Register a = c_rarg3; // array a addr
+ const Register lda = c_rarg4; // lda
+ const Register x = c_rarg5; // array x addr
+ const Register incx = c_rarg6; // incx
+ const Register y = c_rarg7; // array y addr
+
+ const FloatRegister alpha = v0; // alpha
+ const FloatRegister beta = v1; // beta
+
+ const Register tmp1 = rscratch1;
+ const Register tmp2 = rscratch2;
+
+ // esp: expression stack of caller
+ // dgemv parameter ---> the position in stack ---> move to register
+ // | char* trans | | esp + 15 | | r0 |
+ // | int m | | esp + 14 | | r1 |
+ // | int n | | esp + 13 | | r2 |
+ // | double alpha | | esp + 11 | | v0 |
+ // ---------------- ------------ --------
+ // | double* a | | esp + 10 | | |
+ // | | | | | r3 |
+ // | int a_offset | | esp + 9 | | |
+ // ---------------- ------------ --------
+ // | int lda | | esp + 8 | | r4 |
+ // ---------------- ------------ --------
+ // | double* x | | esp + 7 | | |
+ // | | | | | r5 |
+ // | int x_offset | | esp + 6 | | |
+ // ---------------- ------------ --------
+ // | int incx | | esp + 5 | | r6 |
+ // | double beta | | esp + 3 | | v1 |
+ // ---------------- ------------ --------
+ // | double* y | | esp + 2 | | |
+ // | | | | | r7 |
+ // | int y_offset | | esp + 1 | | |
+ // ---------------- ------------ --------
+ // | int incy | | esp | | [sp] |
+
+
+ // trans
+ __ ldr(trans, Address(esp, 15 * wordSize));
+ load_String_value(trans, tmp1);
+ load_String_offset(trans, tmp2);
+ emit_array_address(tmp1, tmp2, trans, T_CHAR);
+ // m, n
+ __ ldrw(m, Address(esp, 14 * wordSize));
+ __ ldrw(n, Address(esp, 13 * wordSize));
+
+ // alpha
+ __ ldrd(alpha, Address(esp, 11 * wordSize));
+
+ // a
+ __ ldr(tmp1, Address(esp, 10 * wordSize));
+ __ mov(tmp2, zr);
+ __ ldrw(tmp2, Address(esp, 9 * wordSize));
+ emit_array_address(tmp1, tmp2, a, T_DOUBLE);
+
+ // lda
+ __ ldrw(lda, Address(esp, 8 * wordSize));
+
+ // x
+ __ ldr(tmp1, Address(esp, 7 * wordSize));
+ __ mov(tmp2, zr);
+ __ ldrw(tmp2, Address(esp, 6 * wordSize));
+ emit_array_address(tmp1, tmp2, x, T_DOUBLE);
+
+ // incx
+ __ ldrw(incx, Address(esp, 5 * wordSize));
+
+ // beta
+ __ ldrd(beta, Address(esp, 3 * wordSize));
+
+ // y
+ __ ldr(tmp1, Address(esp, 2 * wordSize));
+ __ mov(tmp2, zr);
+ __ ldrw(tmp2, Address(esp, wordSize));
+ emit_array_address(tmp1, tmp2, y, T_DOUBLE);
+
+ // resume sp, restore lr
+ __ andr(sp, r13, -16);
+ __ str(lr, Address(sp, -wordSize));
+
+ // incy, push on stack
+ __ ldrw(tmp1, Address(esp, 0));
+ __ strw(tmp1, Address(sp, 2 * -wordSize));
+
+ __ add(sp, sp, -2 * wordSize);
+
+ // call function
+ address fn = CAST_FROM_FN_PTR(address, StubRoutines::dgemvDgemv());
+ __ mov(tmp1, fn);
+ __ blr(tmp1);
+
+ // resume lr
+ __ ldr(lr, Address(sp, wordSize));
+ __ add(sp, sp, 2 * wordSize);
+ __ br(lr);
+
+ return entry;
+}
+
void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
// Bang each page in the shadow zone. We can't assume it's been done for
// an interpreter frame with greater than a page of locals, so each page
@@ -1575,6 +1819,10 @@ address AbstractInterpreterGenerator::generate_method_entry(
: // fall thru
case Interpreter::java_util_zip_CRC32_updateByteBuffer
: entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break;
+ case Interpreter::org_netlib_blas_Dgemm_dgemm
+ : entry_point = ((InterpreterGenerator*)this)->generate_Dgemm_dgemm_entry(); break;
+ case Interpreter::org_netlib_blas_Dgemv_dgemv
+ : entry_point = ((InterpreterGenerator*)this)->generate_Dgemv_dgemv_entry(); break;
default : ShouldNotReachHere(); break;
}
diff --git a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
index f1160792a..477c6e550 100644
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
@@ -754,6 +754,13 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
}
}
+void LIRGenerator::do_dgemm_dgemm(Intrinsic* x) {
+ fatal("BLAS intrinsics are not implemented on this platform!");
+}
+
+void LIRGenerator::do_dgemv_dgemv(Intrinsic* x) {
+ fatal("BLAS intrinsics are not implemented on this platform!");
+}
void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
assert(x->number_of_arguments() == 5, "wrong type");
diff --git a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
index dd23f005b..d1ecbaeb4 100644
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
@@ -896,6 +896,13 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
}
}
+void LIRGenerator::do_dgemm_dgemm(Intrinsic* x) {
+ fatal("BLAS intrinsics are not implemented on this platform!");
+}
+
+void LIRGenerator::do_dgemv_dgemv(Intrinsic *x) {
+ fatal("Blas intrinsics are not implemented on this platform!");
+}
void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
assert(x->number_of_arguments() == 5, "wrong type");
diff --git a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
index 459315cb7..79b2b2bb1 100644
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
@@ -3672,6 +3672,20 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
case vmIntrinsics::_fullFence :
break;
+ case vmIntrinsics::_dgemm_dgemm:
+ if (!UseF2jBLASIntrinsics || (StubRoutines::dgemmDgemm() == NULL)) {
+ return false;
+ }
+ cantrap = false;
+ preserves_state = true;
+ break;
+
+ case vmIntrinsics::_dgemv_dgemv:
+ if (!UseF2jBLASIntrinsics || (StubRoutines::dgemvDgemv() == NULL)) return false;
+ cantrap = false;
+ preserves_state = true;
+ break;
+
default : return false; // do not inline
}
// create intrinsic node
diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
index 65c04e3e5..070fd8052 100644
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
@@ -1208,7 +1208,7 @@ void LIRGenerator::do_Return(Return* x) {
set_no_result(x);
}
-// Examble: ref.get()
+// Example: ref.get()
// Combination of LoadField and g1 pre-write barrier
void LIRGenerator::do_Reference_get(Intrinsic* x) {
@@ -1220,7 +1220,7 @@ void LIRGenerator::do_Reference_get(Intrinsic* x) {
LIRItem reference(x->argument_at(0), this);
reference.load_item();
- // need to perform the null check on the reference objecy
+ // need to perform the null check on the reference object
CodeEmitInfo* info = NULL;
if (x->needs_null_check()) {
info = state_for(x);
@@ -1422,6 +1422,35 @@ LIR_Opr LIRGenerator::load_constant(LIR_Const* c) {
return result;
}
+// Access the char-array of String
+LIR_Opr LIRGenerator::load_String_value(LIR_Opr str) {
+ int value_offset = java_lang_String::value_offset_in_bytes();
+ LIR_Opr value = new_register(T_ARRAY);
+ LIR_Opr tmp = new_pointer_register();
+
+ __ add(str, LIR_OprFact::intConst(value_offset), tmp);
+ LIR_Address* array_addr = new LIR_Address(tmp, T_ARRAY);
+ __ load(array_addr, value);
+
+ return value;
+}
+
+LIR_Opr LIRGenerator::load_String_offset(LIR_Opr str) {
+ LIR_Opr offset = new_register(T_INT);
+
+ if (java_lang_String::has_offset_field()) {
+ LIR_Opr tmp = new_pointer_register();
+ int offset_offset = java_lang_String::offset_offset_in_bytes();
+ __ add(str, LIR_OprFact::intConst(offset_offset), tmp);
+ LIR_Address* addr = new LIR_Address(tmp, T_INT);
+ __ load(addr, offset);
+ } else {
+ offset = LIR_OprFact::intConst(0);
+ }
+
+ return offset;
+}
+
// Various barriers
void LIRGenerator::pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val,
@@ -3290,6 +3328,14 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
do_update_CRC32(x);
break;
+ case vmIntrinsics::_dgemm_dgemm:
+ do_dgemm_dgemm(x);
+ break;
+
+ case vmIntrinsics::_dgemv_dgemv:
+ do_dgemv_dgemv(x);
+ break;
+
default: ShouldNotReachHere(); break;
}
}
diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
index 24d072b36..57d675c5b 100644
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
@@ -210,6 +210,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
// Given an immediate value, return an operand usable in logical ops.
LIR_Opr load_immediate(int x, BasicType type);
+ // Get String value and offset
+ LIR_Opr load_String_value(LIR_Opr str);
+ LIR_Opr load_String_offset(LIR_Opr str);
+
void set_result(Value x, LIR_Opr opr) {
assert(opr->is_valid(), "must set to valid value");
assert(x->operand()->is_illegal(), "operand should never change");
@@ -251,6 +255,8 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
void do_FPIntrinsics(Intrinsic* x);
void do_Reference_get(Intrinsic* x);
void do_update_CRC32(Intrinsic* x);
+ void do_dgemm_dgemm(Intrinsic* x);
+ void do_dgemv_dgemv(Intrinsic* x);
void do_UnsafePrefetch(UnsafePrefetch* x, bool is_store);
diff --git a/hotspot/src/share/vm/c1/c1_Runtime1.cpp b/hotspot/src/share/vm/c1/c1_Runtime1.cpp
index f379a0395..3ece7f6ea 100644
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp
@@ -305,6 +305,8 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, JFR_TIME_FUNCTION);
#endif
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
+ FUNCTION_CASE(entry, StubRoutines::dgemmDgemm());
+ FUNCTION_CASE(entry, StubRoutines::dgemvDgemv());
#undef FUNCTION_CASE
diff --git a/hotspot/src/share/vm/classfile/vmSymbols.cpp b/hotspot/src/share/vm/classfile/vmSymbols.cpp
index a5f89dbf8..34514022a 100644
--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp
@@ -333,6 +333,8 @@ bool vmIntrinsics::should_be_pinned(vmIntrinsics::ID id) {
#endif
case vmIntrinsics::_currentTimeMillis:
case vmIntrinsics::_nanoTime:
+ case vmIntrinsics::_dgemm_dgemm:
+ case vmIntrinsics::_dgemv_dgemv:
return true;
default:
return false;
diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
index 6bd8dbedd..942d172a1 100644
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@@ -857,6 +857,14 @@
do_intrinsic(_f2jblas_ddot, com_github_fommil_netlib_f2jblas, ddot_name, ddot_signature, F_R) \
do_name( ddot_name, "ddot") \
do_signature(ddot_signature, "(I[DI[DI)D") \
+ do_class(org_netlib_blas_dgemm, "org/netlib/blas/Dgemm") \
+ do_intrinsic(_dgemm_dgemm, org_netlib_blas_dgemm, dgemm_name, dgemm_signature, F_S) \
+ do_name( dgemm_name, "dgemm") \
+ do_signature(dgemm_signature, "(Ljava/lang/String;Ljava/lang/String;IIID[DII[DIID[DII)V") \
+ do_class(org_netlib_blas_dgemv, "org/netlib/blas/Dgemv") \
+ do_intrinsic(_dgemv_dgemv, org_netlib_blas_dgemv, dgemv_name, dgemv_signature, F_S) \
+ do_name( dgemv_name, "dgemv") \
+ do_signature(dgemv_signature, "(Ljava/lang/String;IID[DII[DIID[DII)V") \
\
/* support for sun.security.provider.SHA2 */ \
do_class(sun_security_provider_sha2, "sun/security/provider/SHA2") \
diff --git a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
index e14c50bf0..293382b3c 100644
--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
+++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
@@ -100,6 +100,8 @@ class AbstractInterpreter: AllStatic {
java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update()
java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes()
java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer()
+ org_netlib_blas_Dgemm_dgemm, // implementation of org.netlib.blas.Dgemm.dgemm()
+ org_netlib_blas_Dgemv_dgemv, // implementation of org.netlib.blas.Dgemv.dgemv()
number_of_method_entries,
invalid = -1
};
diff --git a/hotspot/src/share/vm/interpreter/cppInterpreter.cpp b/hotspot/src/share/vm/interpreter/cppInterpreter.cpp
index 0007aa8be..9e48a1d94 100644
--- a/hotspot/src/share/vm/interpreter/cppInterpreter.cpp
+++ b/hotspot/src/share/vm/interpreter/cppInterpreter.cpp
@@ -31,17 +31,20 @@
#ifdef CC_INTERP
# define __ _masm->
-void CppInterpreter::initialize() {
+void CppInterpreter::initialize_stub() {
if (_code != NULL) return;
+ int code_size = InterpreterCodeSize;
+ NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space
+ _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL,
+ "Interpreter");
+}
+
+void CppInterpreter::initialize_code() {
AbstractInterpreter::initialize();
// generate interpreter
{ ResourceMark rm;
TraceTime timer("Interpreter generation", TraceStartupTime);
- int code_size = InterpreterCodeSize;
- NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space
- _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL,
- "Interpreter");
InterpreterGenerator g(_code);
if (PrintInterpreter) print();
}
diff --git a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
index 6a6447503..58efcfaf2 100644
--- a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
+++ b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
@@ -54,7 +54,8 @@ class CppInterpreter: public AbstractInterpreter {
public:
// Initialization/debugging
- static void initialize();
+ static void initialize_stub();
+ static void initialize_code();
// this only returns whether a pc is within generated code for the interpreter.
// This is a moderately dubious interface for the c++ interpreter. Only
diff --git a/hotspot/src/share/vm/interpreter/interpreter.cpp b/hotspot/src/share/vm/interpreter/interpreter.cpp
index 7ce4bdbb3..a313f2e63 100644
--- a/hotspot/src/share/vm/interpreter/interpreter.cpp
+++ b/hotspot/src/share/vm/interpreter/interpreter.cpp
@@ -85,8 +85,6 @@ void InterpreterCodelet::print_on(outputStream* st) const {
// Implementation of platform independent aspects of Interpreter
void AbstractInterpreter::initialize() {
- if (_code != NULL) return;
-
// make sure 'imported' classes are initialized
if (CountBytecodes || TraceBytecodes || StopInterpreterAt) BytecodeCounter::reset();
if (PrintBytecodeHistogram) BytecodeHistogram::reset();
@@ -114,8 +112,22 @@ void AbstractInterpreter::print() {
}
-void interpreter_init() {
- Interpreter::initialize();
+// The reason that interpreter initialization is split into two parts is that the first part
+// needs to run before methods are loaded (which with CDS implies linked also), and the other
+// part needs to run after. The reason is that when methods are loaded (with CDS) or linked
+// (without CDS), the i2c adapters are generated that assert we are currently in the interpreter.
+// Asserting that requires knowledge about where the interpreter is in memory. Therefore,
+// establishing the interpreter address must be done before methods are loaded. However,
+// we would like to actually generate the interpreter after methods are loaded. That allows
+// us to remove otherwise hardcoded offsets regarding fields that are needed in the interpreter
+// code. This leads to a split if 1. reserving the memory for the interpreter, 2. loading methods
+// and 3. generating the interpreter.
+void interpreter_init_stub() {
+ Interpreter::initialize_stub();
+}
+
+void interpreter_init_code() {
+ Interpreter::initialize_code();
#ifndef PRODUCT
if (TraceBytecodes) BytecodeTracer::set_closure(BytecodeTracer::std_closure());
#endif // PRODUCT
@@ -251,6 +263,13 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(methodHandle m)
return java_lang_ref_reference_get;
}
+ if (UseF2jBLASIntrinsics) {
+ switch (m->intrinsic_id()) {
+ case vmIntrinsics::_dgemm_dgemm: return org_netlib_blas_Dgemm_dgemm;
+ case vmIntrinsics::_dgemv_dgemv: return org_netlib_blas_Dgemv_dgemv;
+ }
+ }
+
// Accessor method?
if (m->is_accessor()) {
assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1");
@@ -311,6 +330,8 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) {
case java_util_zip_CRC32_update : tty->print("java_util_zip_CRC32_update"); break;
case java_util_zip_CRC32_updateBytes : tty->print("java_util_zip_CRC32_updateBytes"); break;
case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break;
+ case org_netlib_blas_Dgemm_dgemm : tty->print("org_netlib_blas_Dgemm_dgemm"); break;
+ case org_netlib_blas_Dgemv_dgemv : tty->print("org_netlib_blas_Dgemv_dgemv"); break;
default:
if (kind >= method_handle_invoke_FIRST &&
kind <= method_handle_invoke_LAST) {
diff --git a/hotspot/src/share/vm/interpreter/templateInterpreter.cpp b/hotspot/src/share/vm/interpreter/templateInterpreter.cpp
index 1520c7b1c..f38f05117 100644
--- a/hotspot/src/share/vm/interpreter/templateInterpreter.cpp
+++ b/hotspot/src/share/vm/interpreter/templateInterpreter.cpp
@@ -32,12 +32,20 @@
# define __ _masm->
-void TemplateInterpreter::initialize() {
+void TemplateInterpreter::initialize_stub() {
if (_code != NULL) return;
// assertions
assert((int)Bytecodes::number_of_codes <= (int)DispatchTable::length,
"dispatch table too small");
+ // allocate interpreter
+ int code_size = InterpreterCodeSize;
+ NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space
+ _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL,
+ "Interpreter");
+}
+
+void TemplateInterpreter::initialize_code() {
AbstractInterpreter::initialize();
TemplateTable::initialize();
@@ -45,10 +53,6 @@ void TemplateInterpreter::initialize() {
// generate interpreter
{ ResourceMark rm;
TraceTime timer("Interpreter generation", TraceStartupTime);
- int code_size = InterpreterCodeSize;
- NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space
- _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL,
- "Interpreter");
InterpreterGenerator g(_code);
if (PrintInterpreter) print();
}
@@ -401,6 +405,11 @@ void TemplateInterpreterGenerator::generate_all() {
method_entry(java_util_zip_CRC32_updateByteBuffer)
}
+ if (UseF2jBLASIntrinsics) {
+ method_entry(org_netlib_blas_Dgemm_dgemm)
+ method_entry(org_netlib_blas_Dgemv_dgemv)
+ }
+
initialize_method_handle_entries();
// all native method kinds (must be one contiguous block)
diff --git a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
index 5f76dca8a..96da6353c 100644
--- a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
+++ b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
@@ -132,7 +132,8 @@ class TemplateInterpreter: public AbstractInterpreter {
public:
// Initialization/debugging
- static void initialize();
+ static void initialize_stub();
+ static void initialize_code();
// this only returns whether a pc is within generated code for the interpreter.
static bool contains(address pc) { return _code != NULL && _code->contains(pc); }
diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
index 68631dbf2..0e0cc1028 100644
--- a/hotspot/src/share/vm/opto/escape.cpp
+++ b/hotspot/src/share/vm/opto/escape.cpp
@@ -979,7 +979,9 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
- strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0)
+ strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "dgemm_dgemm") == 0) ||
+ strcmp(call->as_CallLeaf()->_name, "dgemv_dgemv") == 0
))) {
call->dump();
fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
diff --git a/hotspot/src/share/vm/opto/graphKit.cpp b/hotspot/src/share/vm/opto/graphKit.cpp
index 41a067ce2..1c3bc2e8c 100644
--- a/hotspot/src/share/vm/opto/graphKit.cpp
+++ b/hotspot/src/share/vm/opto/graphKit.cpp
@@ -2372,7 +2372,11 @@ Node* GraphKit::make_runtime_call(int flags,
Node* parm0, Node* parm1,
Node* parm2, Node* parm3,
Node* parm4, Node* parm5,
- Node* parm6, Node* parm7) {
+ Node* parm6, Node* parm7,
+ Node* parm8, Node* parm9,
+ Node* parm10, Node* parm11,
+ Node* parm12, Node* parm13,
+ Node* parm14, Node* parm15) {
// Slow-path call
bool is_leaf = !(flags & RC_NO_LEAF);
bool has_io = (!is_leaf && !(flags & RC_NO_IO));
@@ -2415,7 +2419,15 @@ Node* GraphKit::make_runtime_call(int flags,
if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5);
if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6);
if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7);
- /* close each nested if ===> */ } } } } } } } }
+ if (parm8 != NULL) { call->init_req(TypeFunc::Parms+8, parm8);
+ if (parm9 != NULL) { call->init_req(TypeFunc::Parms+9, parm9);
+ if (parm10 != NULL) { call->init_req(TypeFunc::Parms+10, parm10);
+ if (parm11 != NULL) { call->init_req(TypeFunc::Parms+11, parm11);
+ if (parm12 != NULL) { call->init_req(TypeFunc::Parms+12, parm12);
+ if (parm13 != NULL) { call->init_req(TypeFunc::Parms+13, parm13);
+ if (parm14 != NULL) { call->init_req(TypeFunc::Parms+14, parm14);
+ if (parm15 != NULL) { call->init_req(TypeFunc::Parms+15, parm15);
+ /* close each nested if ===> */ } } } } } } } } } } } } } } } }
assert(call->in(call->req()-1) != NULL, "must initialize all parms");
if (!is_leaf) {
diff --git a/hotspot/src/share/vm/opto/graphKit.hpp b/hotspot/src/share/vm/opto/graphKit.hpp
index 7a363fd33..e9a061acf 100644
--- a/hotspot/src/share/vm/opto/graphKit.hpp
+++ b/hotspot/src/share/vm/opto/graphKit.hpp
@@ -818,7 +818,11 @@ class GraphKit : public Phase {
Node* parm0 = NULL, Node* parm1 = NULL,
Node* parm2 = NULL, Node* parm3 = NULL,
Node* parm4 = NULL, Node* parm5 = NULL,
- Node* parm6 = NULL, Node* parm7 = NULL);
+ Node* parm6 = NULL, Node* parm7 = NULL,
+ Node* parm8 = NULL, Node* parm9 = NULL,
+ Node* parm10 = NULL, Node* parm11 = NULL,
+ Node* parm12 = NULL, Node* parm13 = NULL,
+ Node* parm14 = NULL, Node* parm15 = NULL);
enum { // flag values for make_runtime_call
RC_NO_FP = 1, // CallLeafNoFPNode
RC_NO_IO = 2, // do not hook IO edges
diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp
index 5cbc0f012..10eeea217 100644
--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@@ -336,6 +336,8 @@ class LibraryCallKit : public GraphKit {
bool inline_montgomeryMultiply();
bool inline_montgomerySquare();
bool inline_ddotF2jBLAS();
+ bool inline_dgemmDgemm();
+ bool inline_dgemvDgemv();
bool inline_profileBoolean();
};
@@ -589,6 +591,8 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
break;
case vmIntrinsics::_f2jblas_ddot:
+ case vmIntrinsics::_dgemm_dgemm:
+ case vmIntrinsics::_dgemv_dgemv:
if (!UseF2jBLASIntrinsics) return NULL;
break;
@@ -988,9 +992,13 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_profileBoolean:
return inline_profileBoolean();
+
case vmIntrinsics::_f2jblas_ddot:
return inline_ddotF2jBLAS();
-
+ case vmIntrinsics::_dgemm_dgemm:
+ return inline_dgemmDgemm();
+ case vmIntrinsics::_dgemv_dgemv:
+ return inline_dgemvDgemv();
default:
// If you get here, it may be that someone has added a new intrinsic
// to the list in vmSymbols.hpp without implementing it here.
@@ -6354,6 +6362,144 @@ bool LibraryCallKit::inline_ddotF2jBLAS() {
}
/**
+ * double org.netlib.blas.Dgemm.dgemm(java.lang.String transa,
+ * java.lang.String transb, int m, int n, int k,
+ * double alpha, double[] a, int offset_a, int lda,
+ * double[] b, int offset_b, int ldb, double beta,
+ * double[] c, int offset_c, int Ldc)
+ */
+bool LibraryCallKit::inline_dgemmDgemm() {
+ assert(callee()->signature()->count() == 16, "Dgemm.dgemm has 16 parameters");
+
+ address stubAddr = StubRoutines::dgemmDgemm();
+ if (stubAddr == NULL) return false;
+
+ Node* transa = argument(0);
+ Node* transb = argument(1);
+ Node* m = argument(2);
+ Node* n = argument(3);
+ Node* k = argument(4);
+ Node* alpha = round_double_node(argument(5));
+ Node* a = argument(7);
+ Node* a_offset = argument(8);
+ Node* lda = argument(9);
+ Node* b = argument(10);
+ Node* b_offset = argument(11);
+ Node* ldb = argument(12);
+ Node* beta = round_double_node(argument(13));
+ Node* c = argument(15);
+ Node* c_offset = argument(16);
+ Node* ldc = argument(17);
+
+ const Type* a_type = a->Value(&_gvn);
+ const Type* b_type = b->Value(&_gvn);
+ const Type* c_type = c->Value(&_gvn);
+ const TypeAryPtr* a_base_type = a_type->isa_aryptr();
+ const TypeAryPtr* b_base_type = b_type->isa_aryptr();
+ const TypeAryPtr* c_base_type = c_type->isa_aryptr();
+ if (a_base_type == NULL || b_base_type == NULL || c_base_type == NULL) return false;
+
+ ciKlass* a_klass = a_base_type->klass();
+ ciKlass* b_klass = b_base_type->klass();
+ ciKlass* c_klass = c_base_type->klass();
+ if (a_klass == NULL || b_klass == NULL || c_klass == NULL) return false;
+
+ BasicType a_elem_type = a_klass->as_array_klass()->element_type()->basic_type();
+ BasicType b_elem_type = b_klass->as_array_klass()->element_type()->basic_type();
+ BasicType c_elem_type = a_klass->as_array_klass()->element_type()->basic_type();
+ if (a_elem_type != T_DOUBLE || b_elem_type != T_DOUBLE || c_elem_type != T_DOUBLE) return false;
+
+ // get array a/b/c's addr
+ Node* a_start = array_element_address(a, a_offset, a_elem_type);
+ Node* b_start = array_element_address(b, b_offset, b_elem_type);
+ Node* c_start = array_element_address(c, c_offset, c_elem_type);
+
+ // Get start addr of string
+ Node* transa_value = load_String_value(NULL, transa);
+ Node* transa_offset = load_String_offset(NULL, transa);
+ Node* transa_start = array_element_address(transa_value, transa_offset, T_CHAR);
+ Node* transb_value = load_String_value(NULL, transb);
+ Node* transb_offset = load_String_offset(NULL, transb);
+ Node* transb_start = array_element_address(transb_value, transb_offset, T_CHAR);
+
+ const char *stubName = "dgemm_dgemm";
+ make_runtime_call(RC_LEAF, OptoRuntime::dgemmDgemm_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ transa_start, transb_start, m, n, k, alpha, top(),
+ a_start, lda, b_start, ldb, beta, top(), c_start, ldc);
+
+ return true;
+}
+
+/**
+ * void org.netlib.blas.Dgemv.dgemv(string trans, int m, int n, double alpha,
+ * double[] a, int _a_offset, int lda,
+ * double[] x, int _x_offset, int incx, double beta,
+ * double[] y, int _y_offset, int incy)
+ */
+bool LibraryCallKit::inline_dgemvDgemv() {
+ assert(callee()->signature()->count() == 14, "F2jBLAS.dgemv has 14 parameters");
+ Node* trans = argument(0);
+ Node* m = argument(1);
+ Node* n = argument(2);
+ Node* alpha = round_double_node(argument(3));
+ Node* a = argument(5);
+ Node* a_offset = argument(6);
+ Node* lda = argument(7);
+ Node* x = argument(8);
+ Node* x_offset = argument(9);
+ Node* incx = argument(10);
+ Node* beta = round_double_node(argument(11));
+ Node* y = argument(13);
+ Node* y_offset = argument(14);
+ Node* incy = argument(15);
+
+ const Type* a_type = a->Value(&_gvn);
+ const Type* x_type = x->Value(&_gvn);
+ const Type* y_type = y->Value(&_gvn);
+ const TypeAryPtr* a_base_type = a_type->isa_aryptr();
+ const TypeAryPtr* x_base_type = x_type->isa_aryptr();
+ const TypeAryPtr* y_base_type = y_type->isa_aryptr();
+ if (a_base_type == NULL || x_base_type == NULL || y_base_type == NULL) return false;
+
+ ciKlass* a_klass = a_base_type->klass();
+ ciKlass* x_klass = x_base_type->klass();
+ ciKlass* y_klass = y_base_type->klass();
+
+ if (a_klass == NULL || x_klass == NULL || y_klass == NULL) return false;
+
+ BasicType a_elem_type = a_klass->as_array_klass()->element_type()->basic_type();
+ BasicType x_elem_type = x_klass->as_array_klass()->element_type()->basic_type();
+ BasicType y_elem_type = y_klass->as_array_klass()->element_type()->basic_type();
+
+ if (a_elem_type != T_DOUBLE || x_elem_type != T_DOUBLE || y_elem_type != T_DOUBLE) return false;
+
+
+ address stubAddr = StubRoutines::dgemvDgemv();
+ if (stubAddr == NULL) return false;
+
+ // 'a_start' points to array a + scaled offset
+ Node* a_start = array_element_address(a, a_offset, a_elem_type);
+ // 'x_start' points to array x + scaled offset
+ Node* x_start = array_element_address(x, x_offset, x_elem_type);
+ // 'y_start' points to array y + scaled offset
+ Node* y_start = array_element_address(y, y_offset, y_elem_type);
+
+ Node* no_ctrl = NULL;
+
+ // get start addr of string
+ Node* trans_value = load_String_value(no_ctrl, trans);
+ Node* trans_offset = load_String_offset(no_ctrl, trans);
+ Node* trans_start = array_element_address(trans_value, trans_offset, T_CHAR);
+
+ const char *stubName = "dgemv_dgemv";
+ Node* call = make_runtime_call(RC_LEAF, OptoRuntime::dgemvDgemv_Type(), stubAddr, stubName,
+ TypePtr::BOTTOM, trans_start, m, n, alpha, top(), a_start,
+ lda, x_start, incx, beta, top(), y_start, incy);
+ return true;
+}
+
+/**
* Calculate CRC32 for ByteBuffer.
* int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
*/
diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
index f1fe4d666..dc8f0c774 100644
--- a/hotspot/src/share/vm/opto/runtime.cpp
+++ b/hotspot/src/share/vm/opto/runtime.cpp
@@ -944,6 +944,81 @@ const TypeFunc* OptoRuntime::ddotF2jBLAS_Type() {
return TypeFunc::make(domain, range);
}
+/**
+ * double org.netlib.blas.Dgemm.dgemm(java.lang.String transa,
+ * java.lang.String transb, int m, int n, int k,
+ * double alpha, double[] a, int offset_a, int lda,
+ * double[] b, int offset_b, int ldb, double beta,
+ * double[] c, int offset_c, int Ldc)
+ */
+const TypeFunc* OptoRuntime::dgemmDgemm_Type() {
+ // create input type (domain)
+ int num_args = 15;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+
+ fields[argp++] = TypeAryPtr::CHARS; // char[]
+ fields[argp++] = TypeAryPtr::CHARS; // char[]
+ fields[argp++] = TypeInt::INT; // int m
+ fields[argp++] = TypeInt::INT; // int n
+ fields[argp++] = TypeInt::INT; // int k
+ fields[argp++] = Type::DOUBLE; // double alpha
+ fields[argp++] = Type::HALF;
+ fields[argp++] = TypeAryPtr::DOUBLES; // double[] a
+ fields[argp++] = TypeInt::INT; // int lda
+ fields[argp++] = TypeAryPtr::DOUBLES; // double[] b
+ fields[argp++] = TypeInt::INT; // int ldb
+ fields[argp++] = Type::DOUBLE; // double beta
+ fields[argp++] = Type::HALF;
+ fields[argp++] = TypeAryPtr::DOUBLES; // double[] c
+ fields[argp++] = TypeInt::INT; // int ldc
+ assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
+
+ // no result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms + 0] = NULL; // void
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+ return TypeFunc::make(domain, range);
+}
+
+/**
+ * void dgemv(String trans, int m, int n, double alpha,
+ * double[] a, int _a_offset, int lda,
+ * double[] x, int _x_offset, int incx, double beta,
+ * double[] y, int _y_offset, int incy)
+ */
+const TypeFunc* OptoRuntime::dgemvDgemv_Type() {
+ // create input type (domain)
+ int num_args = 13;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+
+ fields[argp++] = TypeAryPtr::CHARS; // char[]
+ fields[argp++] = TypeInt::INT; // int m
+ fields[argp++] = TypeInt::INT; // int n
+ fields[argp++] = Type::DOUBLE; // double alpha
+ fields[argp++] = Type::HALF;
+ fields[argp++] = TypeAryPtr::DOUBLES; // double[] a
+ fields[argp++] = TypeInt::INT; // int lda
+ fields[argp++] = TypeAryPtr::DOUBLES; // double[] x
+ fields[argp++] = TypeInt::INT; // int incx
+ fields[argp++] = Type::DOUBLE; // double beta
+ fields[argp++] = Type::HALF;
+ fields[argp++] = TypeAryPtr::DOUBLES; // double[] y
+ fields[argp++] = TypeInt::INT; // int incy
+ assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
+
+ // no result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms + 0] = NULL; // void
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+ return TypeFunc::make(domain, range);
+}
+
// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
// create input type (domain)
diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp
index 66d393c5c..e07c34c15 100644
--- a/hotspot/src/share/vm/opto/runtime.hpp
+++ b/hotspot/src/share/vm/opto/runtime.hpp
@@ -318,6 +318,8 @@ private:
static const TypeFunc* updateBytesCRC32_Type();
static const TypeFunc* ddotF2jBLAS_Type();
+ static const TypeFunc* dgemmDgemm_Type();
+ static const TypeFunc* dgemvDgemv_Type();
// leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type();
diff --git a/hotspot/src/share/vm/runtime/init.cpp b/hotspot/src/share/vm/runtime/init.cpp
index 1512ccc96..4c133bd4e 100644
--- a/hotspot/src/share/vm/runtime/init.cpp
+++ b/hotspot/src/share/vm/runtime/init.cpp
@@ -54,7 +54,8 @@ void VM_Version_init();
void os_init_globals(); // depends on VM_Version_init, before universe_init
void stubRoutines_init1();
jint universe_init(); // depends on codeCache_init and stubRoutines_init
-void interpreter_init(); // before any methods loaded
+void interpreter_init_stub(); // before any methods loaded
+void interpreter_init_code(); // after methods loaded, but before they are linked
void invocationCounter_init(); // before any methods loaded
void marksweep_init();
void accessFlags_init();
@@ -106,7 +107,7 @@ jint init_globals() {
if (status != JNI_OK)
return status;
- interpreter_init(); // before any methods loaded
+ interpreter_init_stub(); // before methods get loaded
invocationCounter_init(); // before any methods loaded
marksweep_init();
accessFlags_init();
@@ -114,6 +115,7 @@ jint init_globals() {
InterfaceSupport_init();
SharedRuntime::generate_stubs();
universe2_init(); // dependent on codeCache_init and stubRoutines_init1
+ interpreter_init_code(); // after universe2_init and before any method gets linked
referenceProcessor_init();
jni_handles_init();
#if INCLUDE_VM_STRUCTS
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp
index 10f438bc5..f2106d13a 100644
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp
@@ -136,7 +136,10 @@ address StubRoutines::_sha512_implCompressMB = NULL;
address StubRoutines::_updateBytesCRC32 = NULL;
address StubRoutines::_crc_table_adr = NULL;
+address StubRoutines::_BLAS_library = NULL;
address StubRoutines::_ddotF2jBLAS = NULL;
+address StubRoutines::_dgemmDgemm = NULL;
+address StubRoutines::_dgemvDgemv = NULL;
address StubRoutines::_multiplyToLen = NULL;
address StubRoutines::_squareToLen = NULL;
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
index a4eeb910d..16075d9f4 100644
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
@@ -214,7 +214,10 @@ class StubRoutines: AllStatic {
static address _updateBytesCRC32;
static address _crc_table_adr;
+ static address _BLAS_library;
static address _ddotF2jBLAS;
+ static address _dgemmDgemm;
+ static address _dgemvDgemv;
static address _multiplyToLen;
static address _squareToLen;
@@ -380,6 +383,8 @@ class StubRoutines: AllStatic {
static address crc_table_addr() { return _crc_table_adr; }
static address ddotF2jBLAS() { return _ddotF2jBLAS; }
+ static address dgemmDgemm() { return _dgemmDgemm; }
+ static address dgemvDgemv() { return _dgemvDgemv; }
static address multiplyToLen() {return _multiplyToLen; }
static address squareToLen() {return _squareToLen; }
--
2.12.3
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/zhaosai-Simon/openjdk-1.8.0.git
git@gitee.com:zhaosai-Simon/openjdk-1.8.0.git
zhaosai-Simon
openjdk-1.8.0
openjdk-1.8.0
master

搜索帮助