diff options
Diffstat (limited to 'deps/lightning/lib/jit_x86-sse.c')
-rw-r--r-- | deps/lightning/lib/jit_x86-sse.c | 1569 |
1 files changed, 1569 insertions, 0 deletions
diff --git a/deps/lightning/lib/jit_x86-sse.c b/deps/lightning/lib/jit_x86-sse.c new file mode 100644 index 0000000..d09bda9 --- /dev/null +++ b/deps/lightning/lib/jit_x86-sse.c @@ -0,0 +1,1569 @@ +/* + * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * + * This file is part of GNU lightning. + * + * GNU lightning is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU lightning is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + * + * Authors: + * Paulo Cesar Pereira de Andrade + */ + +#if PROTO +# if __X32 +# define sse_address_p(i0) 1 +# else +# if __X64_32 +# define sse_address_p(i0) ((jit_word_t)(i0) >= 0) +# else +# define sse_address_p(i0) can_sign_extend_int_p(i0) +# endif +# endif +# define _XMM6_REGNO 6 +# define _XMM7_REGNO 7 +# define _XMM8_REGNO 8 +# define _XMM9_REGNO 9 +# define _XMM10_REGNO 10 +# define _XMM11_REGNO 11 +# define _XMM12_REGNO 12 +# define _XMM13_REGNO 13 +# define _XMM14_REGNO 14 +# define _XMM15_REGNO 15 +#define X86_SSE_MOV 0x10 +#define X86_SSE_MOV1 0x11 +#define X86_SSE_MOVLP 0x12 +#define X86_SSE_MOVHP 0x16 +#define X86_SSE_MOVA 0x28 +#define X86_SSE_CVTIS 0x2a +#define X86_SSE_CVTTSI 0x2c +#define X86_SSE_CVTSI 0x2d +#define X86_SSE_UCOMI 0x2e +#define X86_SSE_COMI 0x2f +#define X86_SSE_ROUND 0x3a +#define X86_SSE_SQRT 0x51 +#define X86_SSE_RSQRT 0x52 +#define X86_SSE_RCP 0x53 +#define X86_SSE_AND 0x54 +#define X86_SSE_ANDN 0x55 +#define X86_SSE_OR 0x56 +#define X86_SSE_XOR 0x57 +#define X86_SSE_ADD 0x58 +#define X86_SSE_MUL 0x59 +#define X86_SSE_CVTSD 0x5a +#define X86_SSE_CVTDT 0x5b +#define X86_SSE_SUB 0x5c +#define X86_SSE_MIN 0x5d +#define X86_SSE_DIV 0x5e +#define X86_SSE_MAX 0x5f +#define X86_SSE_X2G 0x6e +#define X86_SSE_EQB 0x74 +#define X86_SSE_EQW 0x75 +#define X86_SSE_EQD 0x76 +#define X86_SSE_G2X 0x7e +#define X86_SSE_MOV2 0xd6 +# define sser(c,r0,r1) _sser(_jit,c,r0,r1) +static void _sser(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define ssexr(p,c,r0,r1) _ssexr(_jit,p,c,r0,r1) +static void _ssexr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define ssexi(c,r0,m,i) _ssexi(_jit,c,r0,m,i) +static void _ssexi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define addssr(r0, r1) ssexr(0xf3, X86_SSE_ADD, r0, r1) +# define addsdr(r0, r1) ssexr(0xf2, X86_SSE_ADD, r0, r1) +# define subssr(r0, r1) ssexr(0xf3, X86_SSE_SUB, r0, r1) +# define subsdr(r0, r1) ssexr(0xf2, X86_SSE_SUB, r0, r1) +# define mulssr(r0, r1) ssexr(0xf3, X86_SSE_MUL, r0, r1) +# define mulsdr(r0, r1) ssexr(0xf2, X86_SSE_MUL, r0, r1) +# define divssr(r0, r1) ssexr(0xf3, X86_SSE_DIV, r0, r1) +# define divsdr(r0, r1) ssexr(0xf2, X86_SSE_DIV, r0, r1) +# define andpsr(r0, r1) sser( X86_SSE_AND, r0, r1) +# define andpdr(r0, r1) ssexr(0x66, X86_SSE_AND, r0, r1) +# define sse_truncr_f_i(r0, r1) ssexr(0xf3, X86_SSE_CVTTSI, r0, r1) +# define sse_truncr_d_i(r0, r1) ssexr(0xf2, X86_SSE_CVTTSI, r0, r1) +# if __X64 +# define sse_truncr_f_l(r0, r1) sselxr(0xf3, X86_SSE_CVTTSI, r0, r1) +# define sse_truncr_d_l(r0, r1) sselxr(0xf2, X86_SSE_CVTTSI, r0, r1) +# define sse_extr_f(r0, r1) sselxr(0xf3, X86_SSE_CVTIS, r0, r1) +# define sse_extr_d(r0, r1) sselxr(0xf2, X86_SSE_CVTIS, r0, r1) +# else +# define sse_extr_f(r0, r1) ssexr(0xf3, X86_SSE_CVTIS, r0, r1) +# define sse_extr_d(r0, r1) ssexr(0xf2, X86_SSE_CVTIS, r0, r1) +# endif +# define sse_extr_f_d(r0, r1) ssexr(0xf3, X86_SSE_CVTSD, r0, r1) +# define sse_extr_d_f(r0, r1) ssexr(0xf2, X86_SSE_CVTSD, r0, r1) +# define ucomissr(r0,r1) sser(X86_SSE_UCOMI,r0,r1) +# define ucomisdr(r0,r1) ssexr(0x66,X86_SSE_UCOMI,r0,r1) +# define xorpsr(r0,r1) sser(X86_SSE_XOR,r0,r1) +# define xorpdr(r0,r1) ssexr(0x66,X86_SSE_XOR,r0,r1) +# define movdlxr(r0,r1) ssexr(0x66, X86_SSE_X2G,r0,r1) +# define pcmpeqlr(r0, r1) ssexr(0x66, X86_SSE_EQD, r0, r1) +# define psrl(r0, i0) ssexi(0x72, r0, 0x02, i0) +# define psrq(r0, i0) ssexi(0x73, r0, 0x02, i0) +# define psll(r0, i0) ssexi(0x72, r0, 0x06, i0) +# define pslq(r0, i0) ssexi(0x73, r0, 0x06, i0) +# define movdqxr(r0,r1) sselxr(0x66,X86_SSE_X2G,r0,r1) +# if __X64 && !__X64_32 +# define sselxr(p,c,r0,r1) _sselxr(_jit,p,c,r0,r1) +static void +_sselxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# else +# define sselxr(p,c,r0,r1) ssexr(p,c,r0,r1) +# endif +# define ssexrx(p,c,md,rb,ri,ms,rd) _ssexrx(_jit,p,c,md,rb,ri,ms,rd) +# define movssmr(md,rb,ri,ms,rd) ssexrx(0xf3,X86_SSE_MOV,md,rb,ri,ms,rd) +# define movsdmr(md,rb,ri,ms,rd) ssexrx(0xf2,X86_SSE_MOV,md,rb,ri,ms,rd) +# define movssrm(rs,md,mb,mi,ms) ssexrx(0xf3,X86_SSE_MOV1,md,mb,mi,ms,rs) +# define movsdrm(rs,md,mb,mi,ms) ssexrx(0xf2,X86_SSE_MOV1,md,mb,mi,ms,rs) +static void +_ssexrx(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, + jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_addr_f(r0, r1, r2) _sse_addr_f(_jit, r0, r1, r2) +static void _sse_addr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_addi_f(r0, r1, i0) _sse_addi_f(_jit, r0, r1, i0) +static void _sse_addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_addr_d(r0, r1, r2) _sse_addr_d(_jit, r0, r1, r2) +static void _sse_addr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_addi_d(r0, r1, i0) _sse_addi_d(_jit, r0, r1, i0) +static void _sse_addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_subr_f(r0, r1, r2) _sse_subr_f(_jit, r0, r1, r2) +static void _sse_subr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_subi_f(r0, r1, i0) _sse_subi_f(_jit, r0, r1, i0) +static void _sse_subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_subr_d(r0, r1, r2) _sse_subr_d(_jit, r0, r1, r2) +static void _sse_subr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_subi_d(r0, r1, i0) _sse_subi_d(_jit, r0, r1, i0) +static void _sse_subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_rsbr_f(r0, r1, r2) sse_subr_f(r0, r2, r1) +# define sse_rsbi_f(r0, r1, i0) _sse_rsbi_f(_jit, r0, r1, i0) +static void _sse_rsbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_rsbr_d(r0, r1, r2) sse_subr_d(r0, r2, r1) +# define sse_rsbi_d(r0, r1, i0) _sse_rsbi_d(_jit, r0, r1, i0) +static void _sse_rsbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_mulr_f(r0, r1, r2) _sse_mulr_f(_jit, r0, r1, r2) +static void _sse_mulr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_muli_f(r0, r1, i0) _sse_muli_f(_jit, r0, r1, i0) +static void _sse_muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_mulr_d(r0, r1, r2) _sse_mulr_d(_jit, r0, r1, r2) +static void _sse_mulr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_muli_d(r0, r1, i0) _sse_muli_d(_jit, r0, r1, i0) +static void _sse_muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_divr_f(r0, r1, r2) _sse_divr_f(_jit, r0, r1, r2) +static void _sse_divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_divi_f(r0, r1, i0) _sse_divi_f(_jit, r0, r1, i0) +static void _sse_divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_divr_d(r0, r1, r2) _sse_divr_d(_jit, r0, r1, r2) +static void _sse_divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_divi_d(r0, r1, i0) _sse_divi_d(_jit, r0, r1, i0) +static void _sse_divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_absr_f(r0, r1) _sse_absr_f(_jit, r0, r1) +static void _sse_absr_f(jit_state_t*,jit_int32_t,jit_int32_t); +# define sse_absr_d(r0, r1) _sse_absr_d(_jit, r0, r1) +static void _sse_absr_d(jit_state_t*,jit_int32_t,jit_int32_t); +# define sse_negr_f(r0, r1) _sse_negr_f(_jit, r0, r1) +static void _sse_negr_f(jit_state_t*,jit_int32_t,jit_int32_t); +# define sse_negr_d(r0, r1) _sse_negr_d(_jit, r0, r1) +static void _sse_negr_d(jit_state_t*,jit_int32_t,jit_int32_t); +# define sse_sqrtr_f(r0, r1) ssexr(0xf3, X86_SSE_SQRT, r0, r1) +# define sse_sqrtr_d(r0, r1) ssexr(0xf2, X86_SSE_SQRT, r0, r1) +# define ssecmpf(code, r0, r1, r2) _ssecmp(_jit, 0, code, r0, r1, r2) +# define ssecmpd(code, r0, r1, r2) _ssecmp(_jit, 1, code, r0, r1, r2) +static void +_ssecmp(jit_state_t*, jit_bool_t, jit_int32_t, + jit_int32_t, jit_int32_t, jit_int32_t); +#define sse_movr_f(r0,r1) _sse_movr_f(_jit,r0,r1) +static void _sse_movr_f(jit_state_t*, jit_int32_t, jit_int32_t); +#define sse_movi_f(r0,i0) _sse_movi_f(_jit,r0,i0) +static void _sse_movi_f(jit_state_t*, jit_int32_t, jit_float32_t*); +# define sse_lti_f(r0, r1, i0) _sse_lti_f(_jit, r0, r1, i0) +static void _sse_lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_ltr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r1, r2) +# define sse_lei_f(r0, r1, i0) _sse_lei_f(_jit, r0, r1, i0) +static void _sse_lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_ler_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r1, r2) +# define sse_eqi_f(r0, r1, i0) _sse_eqi_f(_jit, r0, r1, i0) +static void _sse_eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_eqr_f(r0, r1, r2) _sse_eqr_f(_jit, r0, r1, r2) +static void _sse_eqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_gei_f(r0, r1, i0) _sse_gei_f(_jit, r0, r1, i0) +static void _sse_gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_ger_f(r0, r1, r2) ssecmpf(X86_CC_AE, r0, r2, r1) +# define sse_gti_f(r0, r1, i0) _sse_gti_f(_jit, r0, r1, i0) +static void _sse_gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_gtr_f(r0, r1, r2) ssecmpf(X86_CC_A, r0, r2, r1) +# define sse_nei_f(r0, r1, i0) _sse_nei_f(_jit, r0, r1, i0) +static void _sse_nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_ner_f(r0, r1, r2) _sse_ner_f(_jit, r0, r1, r2) +static void _sse_ner_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_unlti_f(r0, r1, i0) _sse_unlti_f(_jit, r0, r1, i0) +static void _sse_unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_unltr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r2, r1) +# define sse_unlei_f(r0, r1, i0) _sse_unlei_f(_jit, r0, r1, i0) +static void _sse_unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_unler_f(r0, r1, r2) _sse_unler_f(_jit, r0, r1, r2) +# define sse_uneqi_f(r0, r1, i0) _sse_uneqi_f(_jit, r0, r1, i0) +static void _sse_uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +static void _sse_unler_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_uneqr_f(r0, r1, r2) _sse_uneqr_f(_jit, r0, r1, r2) +static void _sse_uneqr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_ungei_f(r0, r1, i0) _sse_ungei_f(_jit, r0, r1, i0) +static void _sse_ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_unger_f(r0, r1, r2) _sse_unger_f(_jit, r0, r1, r2) +static void _sse_unger_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_ungti_f(r0, r1, i0) _sse_ungti_f(_jit, r0, r1, i0) +static void _sse_ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_ungtr_f(r0, r1, r2) ssecmpf(X86_CC_NAE, r0, r1, r2) +# define sse_ltgti_f(r0, r1, i0) _sse_ltgti_f(_jit, r0, r1, i0) +static void _sse_ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_ltgtr_f(r0, r1, r2) _sse_ltgtr_f(_jit, r0, r1, r2) +static void _sse_ltgtr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_ordi_f(r0, r1, i0) _sse_ordi_f(_jit, r0, r1, i0) +static void _sse_ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_ordr_f(r0, r1, r2) ssecmpf(X86_CC_NP, r0, r2, r1) +# define sse_unordi_f(r0, r1, i0) _sse_unordi_f(_jit, r0, r1, i0) +static void _sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); +# define sse_unordr_f(r0, r1, r2) ssecmpf(X86_CC_P, r0, r2, r1) +# define sse_ldr_f(r0, r1) movssmr(0, r1, _NOREG, _SCL1, r0) +# define sse_ldi_f(r0, i0) _sse_ldi_f(_jit, r0, i0) +static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t); +# define sse_ldxr_f(r0, r1, r2) _sse_ldxr_f(_jit, r0, r1, r2) +static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_ldxi_f(r0, r1, i0) _sse_ldxi_f(_jit, r0, r1, i0) +static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define sse_str_f(r0, r1) movssrm(r1, 0, r0, _NOREG, _SCL1) +# define sse_sti_f(i0, r0) _sse_sti_f(_jit, i0, r0) +static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t); +# define sse_stxr_f(r0, r1, r2) _sse_stxr_f(_jit, r0, r1, r2) +static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_stxi_f(i0, r0, r1) _sse_stxi_f(_jit, i0, r0, r1) +static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bltr_f(i0, r0, r1) _sse_bltr_f(_jit, i0, r0, r1) +static jit_word_t _sse_bltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_blti_f(i0, r0, i1) _sse_blti_f(_jit, i0, r0, i1) +static jit_word_t +_sse_blti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bler_f(i0, r0, r1) _sse_bler_f(_jit, i0, r0, r1) +static jit_word_t _sse_bler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_blei_f(i0, r0, i1) _sse_blei_f(_jit, i0, r0, i1) +static jit_word_t +_sse_blei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_beqr_f(i0, r0, r1) _sse_beqr_f(_jit, i0, r0, r1) +static jit_word_t _sse_beqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_beqi_f(i0, r0, i1) _sse_beqi_f(_jit, i0, r0, i1) +static jit_word_t +_sse_beqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bger_f(i0, r0, r1) _sse_bger_f(_jit, i0, r0, r1) +static jit_word_t _sse_bger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bgei_f(i0, r0, i1) _sse_bgei_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bgei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bgtr_f(i0, r0, r1) _sse_bgtr_f(_jit, i0, r0, r1) +static jit_word_t _sse_bgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bgti_f(i0, r0, i1) _sse_bgti_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bner_f(i0, r0, r1) _sse_bner_f(_jit, i0, r0, r1) +static jit_word_t _sse_bner_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bnei_f(i0, r0, i1) _sse_bnei_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bnei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bunltr_f(i0, r0, r1) _sse_bunltr_f(_jit, i0, r0, r1) +static jit_word_t _sse_bunltr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bunlti_f(i0, r0, i1) _sse_bunlti_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bunlti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bunler_f(i0, r0, r1) _sse_bunler_f(_jit, i0, r0, r1) +static jit_word_t _sse_bunler_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bunlei_f(i0, r0, i1) _sse_bunlei_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bunlei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_buneqr_f(i0, r0, r1) _sse_buneqr_f(_jit, i0, r0, r1) +static jit_word_t _sse_buneqr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_buneqi_f(i0, r0, i1) _sse_buneqi_f(_jit, i0, r0, i1) +static jit_word_t +_sse_buneqi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bunger_f(i0, r0, r1) _sse_bunger_f(_jit, i0, r0, r1) +static jit_word_t _sse_bunger_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bungei_f(i0, r0, i1) _sse_bungei_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bungei_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bungtr_f(i0, r0, r1) _sse_bungtr_f(_jit, i0, r0, r1) +static jit_word_t _sse_bungtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bungti_f(i0, r0, i1) _sse_bungti_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bungti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bltgtr_f(i0, r0, r1) _sse_bltgtr_f(_jit, i0, r0, r1) +static jit_word_t _sse_bltgtr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bltgti_f(i0, r0, i1) _sse_bltgti_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bltgti_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bordr_f(i0, r0, r1) _sse_bordr_f(_jit, i0, r0, r1) +static jit_word_t _sse_bordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bordi_f(i0, r0, i1) _sse_bordi_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +# define sse_bunordr_f(i0, r0, r1) _sse_bunordr_f(_jit, i0, r0, r1) +static jit_word_t _sse_bunordr_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bunordi_f(i0, r0, i1) _sse_bunordi_f(_jit, i0, r0, i1) +static jit_word_t +_sse_bunordi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_float32_t*); +#define sse_movr_d(r0,r1) _sse_movr_d(_jit,r0,r1) +static void _sse_movr_d(jit_state_t*, jit_int32_t, jit_int32_t); +#define sse_movi_d(r0,i0) _sse_movi_d(_jit,r0,i0) +static void _sse_movi_d(jit_state_t*, jit_int32_t, jit_float64_t*); +# define sse_ltr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r1, r2) +# define sse_lti_d(r0, r1, i0) _sse_lti_d(_jit, r0, r1, i0) +static void _sse_lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_ler_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r1, r2) +# define sse_lei_d(r0, r1, i0) _sse_lei_d(_jit, r0, r1, i0) +static void _sse_lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_eqr_d(r0, r1, r2) _sse_eqr_d(_jit, r0, r1, r2) +static void _sse_eqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_eqi_d(r0, r1, i0) _sse_eqi_d(_jit, r0, r1, i0) +static void _sse_eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_ger_d(r0, r1, r2) ssecmpd(X86_CC_AE, r0, r2, r1) +# define sse_gei_d(r0, r1, i0) _sse_gei_d(_jit, r0, r1, i0) +static void _sse_gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_gtr_d(r0, r1, r2) ssecmpd(X86_CC_A, r0, r2, r1) +# define sse_gti_d(r0, r1, i0) _sse_gti_d(_jit, r0, r1, i0) +static void _sse_gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_ner_d(r0, r1, r2) _sse_ner_d(_jit, r0, r1, r2) +static void _sse_ner_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_nei_d(r0, r1, i0) _sse_nei_d(_jit, r0, r1, i0) +static void _sse_nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_unltr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r2, r1) +# define sse_unlti_d(r0, r1, i0) _sse_unlti_d(_jit, r0, r1, i0) +static void _sse_unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_unler_d(r0, r1, r2) _sse_unler_d(_jit, r0, r1, r2) +static void _sse_unler_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_unlei_d(r0, r1, i0) _sse_unlei_d(_jit, r0, r1, i0) +static void _sse_unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_uneqr_d(r0, r1, r2) _sse_uneqr_d(_jit, r0, r1, r2) +static void _sse_uneqr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_uneqi_d(r0, r1, i0) _sse_uneqi_d(_jit, r0, r1, i0) +static void _sse_uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_unger_d(r0, r1, r2) _sse_unger_d(_jit, r0, r1, r2) +static void _sse_unger_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_ungei_d(r0, r1, i0) _sse_ungei_d(_jit, r0, r1, i0) +static void _sse_ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_ungtr_d(r0, r1, r2) ssecmpd(X86_CC_NAE, r0, r1, r2) +# define sse_ungti_d(r0, r1, i0) _sse_ungti_d(_jit, r0, r1, i0) +static void _sse_ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_ltgtr_d(r0, r1, r2) _sse_ltgtr_d(_jit, r0, r1, r2) +static void _sse_ltgtr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_ltgti_d(r0, r1, i0) _sse_ltgti_d(_jit, r0, r1, i0) +static void _sse_ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_ordr_d(r0, r1, r2) ssecmpd(X86_CC_NP, r0, r2, r1) +# define sse_ordi_d(r0, r1, i0) _sse_ordi_d(_jit, r0, r1, i0) +static void _sse_ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_unordr_d(r0, r1, r2) ssecmpd(X86_CC_P, r0, r2, r1) +# define sse_unordi_d(r0, r1, i0) _sse_unordi_d(_jit, r0, r1, i0) +static void _sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); +# define sse_ldr_d(r0, r1) movsdmr(0, r1, _NOREG, _SCL1, r0) +# define sse_ldi_d(r0, i0) _sse_ldi_d(_jit, r0, i0) +static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t); +# define sse_ldxr_d(r0, r1, r2) _sse_ldxr_d(_jit, r0, r1, r2) +static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define sse_ldxi_d(r0, r1, i0) _sse_ldxi_d(_jit, r0, r1, i0) +static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); +# define sse_bltr_d(i0, r0, r1) _sse_bltr_d(_jit, i0, r0, r1) +# define sse_str_d(r0, r1) movsdrm(r1, 0, r0, _NOREG, _SCL1) +# define sse_sti_d(i0, r0) _sse_sti_d(_jit, i0, r0) +static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t); +# define sse_stxr_d(r0, r1, r2) _sse_stxr_d(_jit, r0, r1, r2) +static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define sse_stxi_d(i0, r0, r1) _sse_stxi_d(_jit, i0, r0, r1) +static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_blti_d(i0, r0, i1) _sse_blti_d(_jit, i0, r0, i1) +static jit_word_t +_sse_blti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bler_d(i0, r0, r1) _sse_bler_d(_jit, i0, r0, r1) +static jit_word_t _sse_bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_blei_d(i0, r0, i1) _sse_blei_d(_jit, i0, r0, i1) +static jit_word_t +_sse_blei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_beqr_d(i0, r0, r1) _sse_beqr_d(_jit, i0, r0, r1) +static jit_word_t _sse_beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_beqi_d(i0, r0, i1) _sse_beqi_d(_jit, i0, r0, i1) +static jit_word_t +_sse_beqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bger_d(i0, r0, r1) _sse_bger_d(_jit, i0, r0, r1) +static jit_word_t _sse_bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bgei_d(i0, r0, i1) _sse_bgei_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bgei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bgtr_d(i0, r0, r1) _sse_bgtr_d(_jit, i0, r0, r1) +static jit_word_t _sse_bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bgti_d(i0, r0, i1) _sse_bgti_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bner_d(i0, r0, r1) _sse_bner_d(_jit, i0, r0, r1) +static jit_word_t _sse_bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bnei_d(i0, r0, i1) _sse_bnei_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bnei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bunltr_d(i0, r0, r1) _sse_bunltr_d(_jit, i0, r0, r1) +static jit_word_t _sse_bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bunlti_d(i0, r0, i1) _sse_bunlti_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bunlti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bunler_d(i0, r0, r1) _sse_bunler_d(_jit, i0, r0, r1) +static jit_word_t _sse_bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bunlei_d(i0, r0, i1) _sse_bunlei_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bunlei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_buneqr_d(i0, r0, r1) _sse_buneqr_d(_jit, i0, r0, r1) +static jit_word_t _sse_buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_buneqi_d(i0, r0, i1) _sse_buneqi_d(_jit, i0, r0, i1) +static jit_word_t +_sse_buneqi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bunger_d(i0, r0, r1) _sse_bunger_d(_jit, i0, r0, r1) +static jit_word_t _sse_bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bungei_d(i0, r0, i1) _sse_bungei_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bungei_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bungtr_d(i0, r0, r1) _sse_bungtr_d(_jit, i0, r0, r1) +static jit_word_t _sse_bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bungti_d(i0, r0, i1) _sse_bungti_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bungti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bltgtr_d(i0, r0, r1) _sse_bltgtr_d(_jit, i0, r0, r1) +static jit_word_t _sse_bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bltgti_d(i0, r0, i1) _sse_bltgti_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bltgti_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bordr_d(i0, r0, r1) _sse_bordr_d(_jit, i0, r0, r1) +static jit_word_t _sse_bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bordi_d(i0, r0, i1) _sse_bordi_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +# define sse_bunordr_d(i0, r0, r1) _sse_bunordr_d(_jit, i0, r0, r1) +static jit_word_t _sse_bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); +# define sse_bunordi_d(i0, r0, i1) _sse_bunordi_d(_jit, i0, r0, i1) +static jit_word_t +_sse_bunordi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_float64_t*); +#endif + +#if CODE +# define fpr_opi(name, type, size) \ +static void \ +_sse_##name##i_##type(jit_state_t *_jit, \ + jit_int32_t r0, jit_int32_t r1, \ + jit_float##size##_t *i0) \ +{ \ + jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr); \ + assert(jit_sse_reg_p(reg)); \ + sse_movi_##type(rn(reg), i0); \ + sse_##name##r_##type(r0, r1, rn(reg)); \ + jit_unget_reg(reg); \ +} +# define fpr_bopi(name, type, size) \ +static jit_word_t \ +_sse_b##name##i_##type(jit_state_t *_jit, \ + jit_word_t i0, jit_int32_t r0, \ + jit_float##size##_t *i1) \ +{ \ + jit_word_t word; \ + jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \ + jit_class_nospill); \ + assert(jit_sse_reg_p(reg)); \ + sse_movi_##type(rn(reg), i1); \ + word = sse_b##name##r_##type(i0, r0, rn(reg)); \ + jit_unget_reg(reg); \ + return (word); \ +} +# define fopi(name) fpr_opi(name, f, 32) +# define fbopi(name) fpr_bopi(name, f, 32) +# define dopi(name) fpr_opi(name, d, 64) +# define dbopi(name) fpr_bopi(name, d, 64) +static void +_sser(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, jit_int32_t r1) +{ + rex(0, 0, r0, 0, r1); + ic(0x0f); + ic(c); + mrm(0x03, r7(r0), r7(r1)); +} + +static void +_ssexr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c, + jit_int32_t r0, jit_int32_t r1) +{ + ic(p); + rex(0, 0, r0, 0, r1); + ic(0x0f); + ic(c); + mrm(0x03, r7(r0), r7(r1)); +} + +static void +_ssexi(jit_state_t *_jit, jit_int32_t c, jit_int32_t r0, + jit_int32_t m, jit_int32_t i) +{ + ic(0x66); + rex(0, 0, 0, 0, r0); + ic(0x0f); + ic(c); + mrm(0x03, r7(m), r7(r0)); + ic(i); +} + +#if __X64 +static void +_sselxr(jit_state_t *_jit, jit_int32_t p, jit_int32_t c, + jit_int32_t r0, jit_int32_t r1) +{ + ic(p); + rex(0, 1, r0, 0, r1); + ic(0x0f); + ic(c); + mrm(0x03, r7(r0), r7(r1)); +} +#endif + +static void +_ssexrx(jit_state_t *_jit, jit_int32_t px, jit_int32_t code, jit_int32_t md, + jit_int32_t rb, jit_int32_t ri, jit_int32_t ms, jit_int32_t rd) +{ + ic(px); + rex(0, 0, rd, ri, rb); + ic(0x0f); + ic(code); + rx(rd, md, rb, ri, ms); +} + +static void +_sse_addr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r0 == r1) + addssr(r0, r2); + else if (r0 == r2) + addssr(r0, r1); + else { + sse_movr_f(r0, r1); + addssr(r0, r2); + } +} + +fopi(add) + +static void +_sse_addr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r0 == r1) + addsdr(r0, r2); + else if (r0 == r2) + addsdr(r0, r1); + else { + sse_movr_d(r0, r1); + addsdr(r0, r2); + } +} + +dopi(add) + +static void +_sse_subr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 == r1) + subssr(r0, r2); + else if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_movr_f(rn(reg), r0); + sse_movr_f(r0, r1); + subssr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + sse_movr_f(r0, r1); + subssr(r0, r2); + } +} + +fopi(sub) + +static void +_sse_subr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 == r1) + subsdr(r0, r2); + else if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_movr_d(rn(reg), r0); + sse_movr_d(r0, r1); + subsdr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + sse_movr_d(r0, r1); + subsdr(r0, r2); + } +} + +dopi(sub) + +fopi(rsb) + +dopi(rsb) + +static void +_sse_mulr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r0 == r1) + mulssr(r0, r2); + else if (r0 == r2) + mulssr(r0, r1); + else { + sse_movr_f(r0, r1); + mulssr(r0, r2); + } +} + +fopi(mul) + +static void +_sse_mulr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r0 == r1) + mulsdr(r0, r2); + else if (r0 == r2) + mulsdr(r0, r1); + else { + sse_movr_d(r0, r1); + mulsdr(r0, r2); + } +} + +dopi(mul) + +static void +_sse_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 == r1) + divssr(r0, r2); + else if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_movr_f(rn(reg), r0); + sse_movr_f(r0, r1); + divssr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + sse_movr_f(r0, r1); + divssr(r0, r2); + } +} + +fopi(div) + +static void +_sse_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (r0 == r1) + divsdr(r0, r2); + else if (r0 == r2) { + reg = jit_get_reg(jit_class_fpr|jit_class_xpr); + sse_movr_d(rn(reg), r0); + sse_movr_d(r0, r1); + divsdr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + sse_movr_d(r0, r1); + divsdr(r0, r2); + } +} + +dopi(div) + +static void +_sse_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr|jit_class_xpr); + pcmpeqlr(rn(reg), rn(reg)); + psrl(rn(reg), 1); + andpsr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + pcmpeqlr(r0, r0); + psrl(r0, 1); + andpsr(r0, r1); + } +} + +static void +_sse_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (r0 == r1) { + reg = jit_get_reg(jit_class_fpr|jit_class_xpr); + pcmpeqlr(rn(reg), rn(reg)); + psrq(rn(reg), 1); + andpdr(r0, rn(reg)); + jit_unget_reg(reg); + } + else { + pcmpeqlr(r0, r0); + psrq(r0, 1); + andpdr(r0, r1); + } +} + +static void +_sse_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t freg, ireg; + ireg = jit_get_reg(jit_class_gpr); + imovi(rn(ireg), 0x80000000); + if (r0 == r1) { + freg = jit_get_reg(jit_class_fpr|jit_class_xpr); + movdlxr(rn(freg), rn(ireg)); + xorpsr(r0, rn(freg)); + jit_unget_reg(freg); + } + else { + movdlxr(r0, rn(ireg)); + xorpsr(r0, r1); + } + jit_unget_reg(ireg); +} + +static void +_sse_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t freg, ireg; + ireg = jit_get_reg(jit_class_gpr); + imovi(rn(ireg), 0x80000000); + if (r0 == r1) { + freg = jit_get_reg(jit_class_fpr|jit_class_xpr); + movdlxr(rn(freg), rn(ireg)); + pslq(rn(freg), 32); + xorpdr(r0, rn(freg)); + jit_unget_reg(freg); + } + else { + movdlxr(r0, rn(ireg)); + pslq(r0, 32); + xorpdr(r0, r1); + } + jit_unget_reg(ireg); +} + +static void +_ssecmp(jit_state_t *_jit, jit_bool_t d, jit_int32_t code, + jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_bool_t rc; + jit_int32_t reg; + if ((rc = reg8_p(r0))) + reg = r0; + else { + reg = _RAX_REGNO; + movr(r0, reg); + } + ixorr(reg, reg); + if (d) + ucomisdr(r2, r1); + else + ucomissr(r2, r1); + cc(code, reg); + if (!rc) + xchgr(r0, reg); +} + +static void +_sse_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 != r1) + ssexr(0xf3, X86_SSE_MOV, r0, r1); +} + +static void +_sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) +{ + union { + jit_int32_t i; + jit_float32_t f; + } data; + jit_int32_t reg; + jit_bool_t ldi; + + data.f = *i0; + if (data.f == 0.0 && !(data.i & 0x80000000)) + xorpsr(r0, r0); + else { + ldi = !_jitc->no_data; +#if __X64 + /* if will allocate a register for offset, just use immediate */ + if (ldi && !sse_address_p(i0)) + ldi = 0; +#endif + if (ldi) + sse_ldi_f(r0, (jit_word_t)i0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), data.i); + movdlxr(r0, rn(reg)); + jit_unget_reg(reg); + } + } +} + +fopi(lt) +fopi(le) + +static void +_sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_bool_t rc; + jit_int32_t reg; + jit_word_t jp_code; + if ((rc = reg8_p(r0))) + reg = r0; + else { + reg = _RAX_REGNO; + movr(r0, _RAX_REGNO); + } + ixorr(reg, reg); + ucomissr(r2, r1); + jpes(0); + jp_code = _jit->pc.w; + cc(X86_CC_E, reg); + patch_rel_char(jp_code, _jit->pc.w); + if (!rc) + xchgr(r0, reg); +} + +fopi(eq) +fopi(ge) +fopi(gt) + +static void +_sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_bool_t rc; + jit_int32_t reg; + jit_word_t jp_code; + if ((rc = reg8_p(r0))) + reg = r0; + else { + reg = _RAX_REGNO; + movr(r0, _RAX_REGNO); + } + imovi(reg, 1); + ucomissr(r2, r1); + jpes(0); + jp_code = _jit->pc.w; + cc(X86_CC_NE, reg); + patch_rel_char(jp_code, _jit->pc.w); + if (!rc) + xchgr(r0, reg); +} + +fopi(ne) +fopi(unlt) + +static void +_sse_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r1 == r2) + movi(r0, 1); + else + ssecmpf(X86_CC_NA, r0, r2, r1); +} + +fopi(unle) + +static void +_sse_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r1 == r2) + movi(r0, 1); + else + ssecmpf(X86_CC_E, r0, r1, r2); +} + +fopi(uneq) + +static void +_sse_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r1 == r2) + movi(r0, 1); + else + ssecmpf(X86_CC_NA, r0, r1, r2); +} + +fopi(unge) +fopi(ungt) + +static void +_sse_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r1 == r2) + ixorr(r0, r0); + else + ssecmpf(X86_CC_NE, r0, r1, r2); +} + +fopi(ltgt) +fopi(ord) +fopi(unord) + +static void +_sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (sse_address_p(i0)) + movssmr(i0, _NOREG, _NOREG, _SCL1, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + sse_ldr_f(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ +#if __X64_32 + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + addr(rn(reg), r1, r2); + sse_ldr_f(r0, rn(reg)); + jit_unget_reg(reg); +#else + movssmr(0, r1, r2, _SCL1, r0); +#endif +} + +static void +_sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_int_p(i0)) + movssmr(i0, r1, _NOREG, _SCL1, r0); + else { + reg = jit_get_reg(jit_class_gpr); +#if __X64_32 + addi(rn(reg), r1, i0); + sse_ldr_f(r0, rn(reg)); +#else + movi(rn(reg), i0); + sse_ldxr_f(r0, r1, rn(reg)); +#endif + jit_unget_reg(reg); + } +} + +static void +_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + if (sse_address_p(i0)) + movssrm(r0, i0, _NOREG, _NOREG, _SCL1); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + sse_str_f(rn(reg), r0); + jit_unget_reg(reg); + } +} + +static void +_sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ +#if __X64_32 + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + addr(rn(reg), r0, r1); + sse_str_f(rn(reg), r2); + jit_unget_reg(reg); +#else + movssrm(r2, 0, r0, r1, _SCL1); +#endif +} + +static void +_sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_int_p(i0)) + movssrm(r1, i0, r0, _NOREG, _SCL1); + else { + reg = jit_get_reg(jit_class_gpr); +#if __X64_32 + addi(rn(reg), r0, i0); + sse_str_f(rn(reg), r1); +#else + movi(rn(reg), i0); + sse_stxr_f(rn(reg), r0, r1); +#endif + jit_unget_reg(reg); + } +} + +static jit_word_t +_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r1, r0); + ja(i0); + return (_jit->pc.w); +} +fbopi(lt) + +static jit_word_t +_sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r1, r0); + jae(i0); + return (_jit->pc.w); +} +fbopi(le) + +static jit_word_t +_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t jp_code; + ucomissr(r0, r1); + jps(0); + jp_code = _jit->pc.w; + je(i0); + patch_rel_char(jp_code, _jit->pc.w); + return (_jit->pc.w); +} +fbopi(eq) + +static jit_word_t +_sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r0, r1); + jae(i0); + return (_jit->pc.w); +} +fbopi(ge) + +static jit_word_t +_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r0, r1); + ja(i0); + return (_jit->pc.w); +} +fbopi(gt) + +static jit_word_t +_sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t jp_code; + jit_word_t jz_code; + ucomissr(r0, r1); + jps(0); + jp_code = _jit->pc.w; + jzs(0); + jz_code = _jit->pc.w; + patch_rel_char(jp_code, _jit->pc.w); + jmpi(i0); + patch_rel_char(jz_code, _jit->pc.w); + return (_jit->pc.w); +} +fbopi(ne) + +static jit_word_t +_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r0, r1); + jnae(i0); + return (_jit->pc.w); +} +fbopi(unlt) + +static jit_word_t +_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 == r1) + jmpi(i0); + else { + ucomissr(r0, r1); + jna(i0); + } + return (_jit->pc.w); +} +fbopi(unle) + +static jit_word_t +_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 == r1) + jmpi(i0); + else { + ucomissr(r0, r1); + je(i0); + } + return (_jit->pc.w); +} +fbopi(uneq) + +static jit_word_t +_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 == r1) + jmpi(i0); + else { + ucomissr(r1, r0); + jna(i0); + } + return (_jit->pc.w); +} +fbopi(unge) + +static jit_word_t +_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r1, r0); + jnae(i0); + return (_jit->pc.w); +} +fbopi(ungt) + +static jit_word_t +_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r0, r1); + jne(i0); + return (_jit->pc.w); +} +fbopi(ltgt) + +static jit_word_t +_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r0, r1); + jnp(i0); + return (_jit->pc.w); +} +fbopi(ord) + +static jit_word_t +_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomissr(r0, r1); + jp(i0); + return (_jit->pc.w); +} +fbopi(unord) + +dopi(lt) +dopi(le) + +static void +_sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_bool_t rc; + jit_int32_t reg; + jit_word_t jp_code; + if ((rc = reg8_p(r0))) + reg = r0; + else { + reg = _RAX_REGNO; + movr(r0, _RAX_REGNO); + } + ixorr(reg, reg); + ucomisdr(r2, r1); + jpes(0); + jp_code = _jit->pc.w; + cc(X86_CC_E, reg); + patch_rel_char(jp_code, _jit->pc.w); + if (!rc) + xchgr(r0, reg); +} + +dopi(eq) +dopi(ge) +dopi(gt) + +static void +_sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_bool_t rc; + jit_int32_t reg; + jit_word_t jp_code; + if ((rc = reg8_p(r0))) + reg = r0; + else { + reg = _RAX_REGNO; + movr(r0, _RAX_REGNO); + } + imovi(reg, 1); + ucomisdr(r2, r1); + jpes(0); + jp_code = _jit->pc.w; + cc(X86_CC_NE, reg); + patch_rel_char(jp_code, _jit->pc.w); + if (!rc) + xchgr(r0, reg); +} + +dopi(ne) +dopi(unlt) + +static void +_sse_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r1 == r2) + movi(r0, 1); + else + ssecmpd(X86_CC_NA, r0, r2, r1); +} + +dopi(unle) + +static void +_sse_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r1 == r2) + movi(r0, 1); + else + ssecmpd(X86_CC_E, r0, r1, r2); +} + +dopi(uneq) + +static void +_sse_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r1 == r2) + movi(r0, 1); + else + ssecmpd(X86_CC_NA, r0, r1, r2); +} + +dopi(unge) +dopi(ungt) + +static void +_sse_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + if (r1 == r2) + ixorr(r0, r0); + else + ssecmpd(X86_CC_NE, r0, r1, r2); +} + +dopi(ltgt) +dopi(ord) +dopi(unord) + +static void +_sse_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 != r1) + ssexr(0xf2, X86_SSE_MOV, r0, r1); +} + +static void +_sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) +{ + union { + jit_int32_t ii[2]; + jit_word_t w; + jit_float64_t d; + } data; + jit_int32_t reg; + jit_bool_t ldi; + + data.d = *i0; + if (data.d == 0.0 && !(data.ii[1] & 0x80000000)) + xorpdr(r0, r0); + else { + ldi = !_jitc->no_data; +#if __X64 + /* if will allocate a register for offset, just use immediate */ + if (ldi && !sse_address_p(i0)) + ldi = 0; +#endif + if (ldi) + sse_ldi_d(r0, (jit_word_t)i0); + else { + reg = jit_get_reg(jit_class_gpr); +#if __X64 && !__X64_32 + movi(rn(reg), data.w); + movdqxr(r0, rn(reg)); + jit_unget_reg(reg); +#else + movi(rn(reg), data.ii[0]); + stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); + movi(rn(reg), data.ii[1]); + stxi_i(CVT_OFFSET + 4, _RBP_REGNO, rn(reg)); + jit_unget_reg(reg); + sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); +#endif + } + } +} + +static void +_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) +{ + jit_int32_t reg; + if (sse_address_p(i0)) + movsdmr(i0, _NOREG, _NOREG, _SCL1, r0); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + sse_ldr_d(r0, rn(reg)); + jit_unget_reg(reg); + } +} + +static void +_sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ +#if __X64_32 + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + addr(rn(reg), r1, r2); + sse_ldr_d(r0, rn(reg)); + jit_unget_reg(reg); +#else + movsdmr(0, r1, r2, _SCL1, r0); +#endif +} + +static void +_sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) +{ + jit_int32_t reg; + if (can_sign_extend_int_p(i0)) + movsdmr(i0, r1, _NOREG, _SCL1, r0); + else { + reg = jit_get_reg(jit_class_gpr); +#if __X64_32 + addi(rn(reg), r1, i0); + sse_ldr_d(r0, rn(reg)); +#else + movi(rn(reg), i0); + sse_ldxr_d(r0, r1, rn(reg)); +#endif + jit_unget_reg(reg); + } +} + +static void +_sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) +{ + jit_int32_t reg; + if (sse_address_p(i0)) + movsdrm(r0, i0, _NOREG, _NOREG, _SCL1); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + sse_str_d(rn(reg), r0); + jit_unget_reg(reg); + } +} + +static void +_sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ +#if __X64_32 + jit_int32_t reg; + reg = jit_get_reg(jit_class_gpr); + addr(rn(reg), r0, r1); + sse_str_d(rn(reg), r2); + jit_unget_reg(reg); +#else + movsdrm(r2, 0, r0, r1, _SCL1); +#endif +} + +static void +_sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (can_sign_extend_int_p(i0)) + movsdrm(r1, i0, r0, _NOREG, _SCL1); + else { + reg = jit_get_reg(jit_class_gpr); +#if __X64_32 + addi(rn(reg), r0, i0); + sse_str_d(rn(reg), r1); +#else + movi(rn(reg), i0); + sse_stxr_f(rn(reg), r0, r1); +#endif + jit_unget_reg(reg); + } +} + +static jit_word_t +_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r1, r0); + ja(i0); + return (_jit->pc.w); +} +dbopi(lt) + +static jit_word_t +_sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r1, r0); + jae(i0); + return (_jit->pc.w); +} +dbopi(le) + +static jit_word_t +_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t jp_code; + ucomisdr(r0, r1); + jps(0); + jp_code = _jit->pc.w; + je(i0); + patch_rel_char(jp_code, _jit->pc.w); + return (_jit->pc.w); +} +dbopi(eq) + +static jit_word_t +_sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r0, r1); + jae(i0); + return (_jit->pc.w); +} +dbopi(ge) + +static jit_word_t +_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r0, r1); + ja(i0); + return (_jit->pc.w); +} +dbopi(gt) + +static jit_word_t +_sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t jp_code; + jit_word_t jz_code; + ucomisdr(r0, r1); + jps(0); + jp_code = _jit->pc.w; + jzs(0); + jz_code = _jit->pc.w; + patch_rel_char(jp_code, _jit->pc.w); + jmpi(i0); + patch_rel_char(jz_code, _jit->pc.w); + return (_jit->pc.w); +} +dbopi(ne) + +static jit_word_t +_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r0, r1); + jnae(i0); + return (_jit->pc.w); +} +dbopi(unlt) + +static jit_word_t +_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 == r1) + jmpi(i0); + else { + ucomisdr(r0, r1); + jna(i0); + } + return (_jit->pc.w); +} +dbopi(unle) + +static jit_word_t +_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 == r1) + jmpi(i0); + else { + ucomisdr(r0, r1); + je(i0); + } + return (_jit->pc.w); +} +dbopi(uneq) + +static jit_word_t +_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + if (r0 == r1) + jmpi(i0); + else { + ucomisdr(r1, r0); + jna(i0); + } + return (_jit->pc.w); +} +dbopi(unge) + +static jit_word_t +_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r1, r0); + jnae(i0); + return (_jit->pc.w); +} +dbopi(ungt) + +static jit_word_t +_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r0, r1); + jne(i0); + return (_jit->pc.w); +} +dbopi(ltgt) + +static jit_word_t +_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r0, r1); + jnp(i0); + return (_jit->pc.w); +} +dbopi(ord) + +static jit_word_t +_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +{ + ucomisdr(r0, r1); + jp(i0); + return (_jit->pc.w); +} +dbopi(unord) +# undef fopi +# undef fbopi +# undef bopi +# undef dbopi +# undef fpr_bopi +# undef fpr_opi +#endif |