From ce188d4d7f3b9ad04867c49b424e8497c2ade92b Mon Sep 17 00:00:00 2001 From: aliaspider Date: Tue, 14 May 2019 08:58:52 +0100 Subject: add CHD support. --- deps/lzma-16.04/C/AesOpt.c | 184 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 deps/lzma-16.04/C/AesOpt.c (limited to 'deps/lzma-16.04/C/AesOpt.c') diff --git a/deps/lzma-16.04/C/AesOpt.c b/deps/lzma-16.04/C/AesOpt.c new file mode 100644 index 0000000..e5d4d26 --- /dev/null +++ b/deps/lzma-16.04/C/AesOpt.c @@ -0,0 +1,184 @@ +/* AesOpt.c -- Intel's AES +2013-11-12 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 +#if _MSC_VER >= 1500 +#define USE_INTEL_AES +#endif +#endif + +#ifdef USE_INTEL_AES + +#include + +void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks) +{ + __m128i m = *p; + for (; numBlocks != 0; numBlocks--, data++) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + const __m128i *w = p + 3; + m = _mm_xor_si128(m, *data); + m = _mm_xor_si128(m, p[2]); + do + { + m = _mm_aesenc_si128(m, w[0]); + m = _mm_aesenc_si128(m, w[1]); + w += 2; + } + while (--numRounds2 != 0); + m = _mm_aesenc_si128(m, w[0]); + m = _mm_aesenclast_si128(m, w[1]); + *data = m; + } + *p = m; +} + +#define NUM_WAYS 3 + +#define AES_OP_W(op, n) { \ + const __m128i t = w[n]; \ + m0 = op(m0, t); \ + m1 = op(m1, t); \ + m2 = op(m2, t); \ + } + +#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n) +#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n) +#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n) +#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n) + +void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks) +{ + __m128i iv = *p; + for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1); + const __m128i *w = p + numRounds2 * 2; + __m128i m0, m1, m2; + { + const __m128i t = w[2]; + m0 = _mm_xor_si128(t, data[0]); + m1 = _mm_xor_si128(t, data[1]); + m2 = _mm_xor_si128(t, data[2]); + } + numRounds2--; + do + { + AES_DEC(1) + AES_DEC(0) + w -= 2; + } + while (--numRounds2 != 0); + AES_DEC(1) + AES_DEC_LAST(0) + + { + __m128i t; + t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t; + t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t; + t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t; + } + } + for (; numBlocks != 0; numBlocks--, data++) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1); + const __m128i *w = p + numRounds2 * 2; + __m128i m = _mm_xor_si128(w[2], *data); + numRounds2--; + do + { + m = _mm_aesdec_si128(m, w[1]); + m = _mm_aesdec_si128(m, w[0]); + w -= 2; + } + while (--numRounds2 != 0); + m = _mm_aesdec_si128(m, w[1]); + m = _mm_aesdeclast_si128(m, w[0]); + + m = _mm_xor_si128(m, iv); + iv = *data; + *data = m; + } + *p = iv; +} + +void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks) +{ + __m128i ctr = *p; + __m128i one; + one.m128i_u64[0] = 1; + one.m128i_u64[1] = 0; + for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + const __m128i *w = p; + __m128i m0, m1, m2; + { + const __m128i t = w[2]; + ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t); + ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t); + ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t); + } + w += 3; + do + { + AES_ENC(0) + AES_ENC(1) + w += 2; + } + while (--numRounds2 != 0); + AES_ENC(0) + AES_ENC_LAST(1) + data[0] = _mm_xor_si128(data[0], m0); + data[1] = _mm_xor_si128(data[1], m1); + data[2] = _mm_xor_si128(data[2], m2); + } + for (; numBlocks != 0; numBlocks--, data++) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + const __m128i *w = p; + __m128i m; + ctr = _mm_add_epi64(ctr, one); + m = _mm_xor_si128(ctr, p[2]); + w += 3; + do + { + m = _mm_aesenc_si128(m, w[0]); + m = _mm_aesenc_si128(m, w[1]); + w += 2; + } + while (--numRounds2 != 0); + m = _mm_aesenc_si128(m, w[0]); + m = _mm_aesenclast_si128(m, w[1]); + *data = _mm_xor_si128(*data, m); + } + *p = ctr; +} + +#else + +void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); +void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); +void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); + +void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks) +{ + AesCbc_Encode(p, data, numBlocks); +} + +void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks) +{ + AesCbc_Decode(p, data, numBlocks); +} + +void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks) +{ + AesCtr_Code(p, data, numBlocks); +} + +#endif -- cgit v1.2.3