diff options
Diffstat (limited to 'audio/softsynth/mt32/i386.cpp')
-rw-r--r-- | audio/softsynth/mt32/i386.cpp | 849 |
1 files changed, 849 insertions, 0 deletions
diff --git a/audio/softsynth/mt32/i386.cpp b/audio/softsynth/mt32/i386.cpp new file mode 100644 index 0000000000..f092189d76 --- /dev/null +++ b/audio/softsynth/mt32/i386.cpp @@ -0,0 +1,849 @@ +/* Copyright (c) 2003-2005 Various contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "mt32emu.h" + +#ifdef MT32EMU_HAVE_X86 + +namespace MT32Emu { + +#ifndef _MSC_VER + +#define eflag(value) __asm__ __volatile__("pushfl \n popfl \n" : : "a"(value)) +#define cpuid_flag (1 << 21) + +static inline bool atti386_DetectCPUID() { + unsigned int result; + + // Is there a cpuid? + result = cpuid_flag; // set test + eflag(result); + if (!(result & cpuid_flag)) + return false; + + result = 0; // clear test + eflag(result); + if (result & cpuid_flag) + return false; + + return true; +} + +static inline bool atti386_DetectSIMD() { + unsigned int result; + + if (atti386_DetectCPUID() == false) + return false; + + /* check cpuid */ + __asm__ __volatile__( + "pushl %%ebx \n" \ + "movl $1, %%eax \n" \ + "cpuid \n" \ + "movl %%edx, %0 \n" \ + "popl %%ebx \n" \ + : "=r"(result) : : "eax", "ecx", "edx"); + + if (result & (1 << 25)) + return true; + + return false; +} + +static inline bool atti386_Detect3DNow() { + unsigned int result; + + if (atti386_DetectCPUID() == false) + return false; + + // get cpuid + __asm__ __volatile__( + "pushl %%ebx \n" \ + "movl $0x80000001, %%eax \n" \ + "cpuid \n" \ + "movl %%edx, %0 \n" \ + "popl %%ebx \n" \ + : "=r"(result) : : "eax", "ecx", "edx"); + + if (result & 0x80000000) + return true; + + return false; +} + + +static inline float atti386_iir_filter_sse(float *output, float *hist1_ptr, float *coef_ptr) { + __asm__ __volatile__ ( + "pushl %1 \n" \ + "pushl %2 \n" \ + "movss 0(%0), %%xmm1 \n" \ + "movups 0(%1), %%xmm2 \n" \ + "movlps 0(%2), %%xmm3 \n" \ + " \n" \ + "shufps $0x44, %%xmm3, %%xmm3 \n" \ + " \n" \ + "mulps %%xmm3, %%xmm2 \n" \ + " \n" \ + "subss %%xmm2, %%xmm1 \n" \ + "shufps $0x39, %%xmm2, %%xmm2 \n" \ + "subss %%xmm2, %%xmm1 \n" \ + " \n" \ + "movss %%xmm1, 0(%2) \n" \ + " \n" \ + "shufps $0x39, %%xmm2, %%xmm2 \n" \ + "addss %%xmm2, %%xmm1 \n" \ + " \n" \ + "shufps $0x39, %%xmm2, %%xmm2 \n" \ + "addss %%xmm2, %%xmm1 \n" \ + " \n" \ + "movss %%xmm3, 4(%2) \n" \ + " \n" \ + "addl $16, %1 \n" \ + "addl $8, %2 \n" \ + " \n" \ + "movups 0(%1), %%xmm2 \n" \ + " \n" \ + "movlps 0(%2), %%xmm3 \n" \ + "shufps $0x44, %%xmm3, %%xmm3 \n" \ + " \n" \ + "mulps %%xmm3, %%xmm2 \n" \ + " \n" \ + "subss %%xmm2, %%xmm1 \n" \ + "shufps $0x39, %%xmm2, %%xmm2 \n" \ + "subss %%xmm2, %%xmm1 \n" \ + " \n" \ + "movss %%xmm1, 0(%2) \n" \ + " \n" \ + "shufps $0x39, %%xmm2, %%xmm2 \n" \ + "addss %%xmm2, %%xmm1 \n" \ + " \n" \ + "shufps $0x39, %%xmm2, %%xmm2 \n" \ + "addss %%xmm2, %%xmm1 \n" \ + " \n" \ + "movss %%xmm3, 4(%2) \n" \ + "movss %%xmm1, 0(%0) \n" \ + "popl %2 \n" \ + "popl %1 \n" \ + : : "r"(output), "r"(coef_ptr), "r"(hist1_ptr) + : "memory" +#ifdef __SSE__ + , "xmm1", "xmm2", "xmm3" +#endif + ); + + return *output; +} + +static inline float atti386_iir_filter_3DNow(float output, float *hist1_ptr, float *coef_ptr) { + float tmp; + + __asm__ __volatile__ ( + "movq %0, %%mm1 \n" \ + " \n" \ + "movl %1, %%edi \n" \ + "movq 0(%%edi), %%mm2 \n" \ + " \n" \ + "movl %2, %%eax; \n" \ + "movq 0(%%eax), %%mm3 \n" \ + " \n" \ + "pfmul %%mm3, %%mm2 \n" \ + "pfsub %%mm2, %%mm1 \n" \ + " \n" \ + "psrlq $32, %%mm2 \n" \ + "pfsub %%mm2, %%mm1 \n" \ + " \n" \ + "movd %%mm1, %3 \n" \ + " \n" \ + "addl $8, %%edi \n" \ + "movq 0(%%edi), %%mm2 \n" \ + "movq 0(%%eax), %%mm3 \n" \ + " \n" \ + "pfmul %%mm3, %%mm2 \n" \ + "pfadd %%mm2, %%mm1 \n" \ + " \n" \ + "psrlq $32, %%mm2 \n" \ + "pfadd %%mm2, %%mm1 \n" \ + " \n" \ + "pushl %3 \n" \ + "popl 0(%%eax) \n" \ + " \n" \ + "movd %%mm3, 4(%%eax) \n" \ + " \n" \ + "addl $8, %%edi \n" \ + "addl $8, %%eax \n" \ + " \n" \ + "movq 0(%%edi), %%mm2 \n" \ + "movq 0(%%eax), %%mm3 \n" \ + " \n" \ + "pfmul %%mm3, %%mm2 \n" \ + "pfsub %%mm2, %%mm1 \n" \ + " \n" \ + "psrlq $32, %%mm2 \n" \ + "pfsub %%mm2, %%mm1 \n" \ + " \n" \ + "movd %%mm1, %3 \n" \ + " \n" \ + "addl $8, %%edi \n" \ + "movq 0(%%edi), %%mm2 \n" \ + "movq 0(%%eax), %%mm3 \n" \ + " \n" \ + "pfmul %%mm3, %%mm2 \n" \ + "pfadd %%mm2, %%mm1 \n" \ + " \n" \ + "psrlq $32, %%mm2 \n" \ + "pfadd %%mm2, %%mm1 \n" \ + " \n" \ + "pushl %3 \n" \ + "popl 0(%%eax) \n" \ + "movd %%mm3, 4(%%eax) \n" \ + " \n" \ + "movd %%mm1, %0 \n" \ + "femms \n" \ + : "=m"(output) : "g"(coef_ptr), "g"(hist1_ptr), "m"(tmp) + : "eax", "edi", "memory" +#ifdef __MMX__ + , "mm1", "mm2", "mm3" +#endif + ); + + return output; +} + +static inline void atti386_produceOutput1(int tmplen, Bit16s myvolume, Bit16s *useBuf, Bit16s *snd) { + __asm__ __volatile__( + "movl %0, %%ecx \n" \ + "movw %1, %%ax \n" \ + "shll $16, %%eax \n" \ + "movw %1, %%ax \n" \ + "movd %%eax, %%mm3 \n" \ + "movd %%eax, %%mm2 \n" \ + "psllq $32, %%mm3 \n" \ + "por %%mm2, %%mm3 \n" \ + "movl %2, %%esi \n" \ + "movl %3, %%edi \n" \ + "1: \n" \ + "movq 0(%%esi), %%mm1 \n" \ + "movq 0(%%edi), %%mm2 \n" \ + "pmulhw %%mm3, %%mm1 \n" \ + "paddw %%mm2, %%mm1 \n" \ + "movq %%mm1, 0(%%edi) \n" \ + " \n" \ + "addl $8, %%esi \n" \ + "addl $8, %%edi \n" \ + " \n" \ + "decl %%ecx \n" \ + "cmpl $0, %%ecx \n" \ + "jg 1b \n" \ + "emms \n" \ + : : "g"(tmplen), "g"(myvolume), "g"(useBuf), "g"(snd) + : "eax", "ecx", "edi", "esi", "memory" +#ifdef __MMX__ + , "mm1", "mm2", "mm3" +#endif + ); +} + +static inline void atti386_produceOutput2(Bit32u len, Bit16s *snd, float *sndbufl, float *sndbufr, float *multFactor) { + __asm__ __volatile__( + "movl %4, %%ecx \n" \ + "shrl $1, %%ecx \n" \ + "addl $4, %%ecx \n" \ + "pushl %%ecx \n" \ + " \n" \ + "movl %0, %%esi \n" \ + "movups 0(%%esi), %%xmm1 \n" \ + " \n" \ + "movl %1, %%esi \n" \ + "movl %2, %%edi \n" \ + "1: \n" \ + "xorl %%eax, %%eax \n" \ + "movw 0(%1), %%ax \n" \ + "cwde \n" \ + "incl %1 \n" \ + "incl %1 \n" \ + "movd %%eax, %%mm1 \n" \ + "psrlq $32, %%mm1 \n" \ + "movw 0(%1), %%ax \n" \ + "incl %1 \n" \ + "incl %1 \n" \ + "movd %%eax, %%mm2 \n" \ + "por %%mm2, %%mm1 \n" \ + " \n" \ + "decl %%ecx \n" \ + "jnz 1b \n" \ + " \n" \ + "popl %%ecx \n" \ + "movl %1, %%esi \n" \ + "movl %3, %%edi \n" \ + "incl %%esi \n" \ + "2: \n" \ + "decl %%ecx \n" \ + "jnz 2b \n" \ + : : "g"(multFactor), "r"(snd), "g"(sndbufl), "g"(sndbufr), "g"(len) + : "eax", "ecx", "edi", "esi", "mm1", "mm2", "xmm1", "memory"); +} + +static inline void atti386_mixBuffers(Bit16s * buf1, Bit16s *buf2, int len) { + __asm__ __volatile__( + "movl %0, %%ecx \n" \ + "movl %1, %%esi \n" \ + "movl %2, %%edi \n" \ + "1: \n" \ + "movq 0(%%edi), %%mm1 \n" \ + "movq 0(%%esi), %%mm2 \n" \ + "paddw %%mm2, %%mm1 \n" \ + "movq %%mm1, 0(%%esi) \n" \ + "addl $8, %%edi \n" \ + "addl $8, %%esi \n" \ + "decl %%ecx \n" \ + "cmpl $0, %%ecx \n" \ + "jg 1b \n" \ + "emms \n" \ + : : "g"(len), "g"(buf1), "g"(buf2) + : "ecx", "edi", "esi", "memory" +#ifdef __MMX__ + , "mm1", "mm2" +#endif + ); +} + +static inline void atti386_mixBuffersRingMix(Bit16s * buf1, Bit16s *buf2, int len) { + __asm__ __volatile__( + "movl %0, %%ecx \n" \ + "movl %1, %%esi \n" \ + "movl %2, %%edi \n" \ + "1: \n" \ + "movq 0(%%esi), %%mm1 \n" \ + "movq 0(%%edi), %%mm2 \n" \ + "movq %%mm1, %%mm3 \n" \ + "pmulhw %%mm2, %%mm1 \n" \ + "paddw %%mm3, %%mm1 \n" \ + "movq %%mm1, 0(%%esi) \n" \ + "addl $8, %%edi \n" \ + "addl $8, %%esi \n" \ + "decl %%ecx \n" \ + "cmpl $0, %%ecx \n" \ + "jg 1b \n" \ + "emms \n" \ + : : "g"(len), "g"(buf1), "g"(buf2) + : "ecx", "edi", "esi", "memory" +#ifdef __MMX__ + , "mm1", "mm2", "mm3" +#endif + ); +} + +static inline void atti386_mixBuffersRing(Bit16s * buf1, Bit16s *buf2, int len) { + __asm__ __volatile__( + "movl %0, %%ecx \n" \ + "movl %1, %%esi \n" \ + "movl %2, %%edi \n" \ + "1: \n" \ + "movq 0(%%esi), %%mm1 \n" \ + "movq 0(%%edi), %%mm2 \n" \ + "pmulhw %%mm2, %%mm1 \n" \ + "movq %%mm1, 0(%%esi) \n" \ + "addl $8, %%edi \n" \ + "addl $8, %%esi \n" \ + "decl %%ecx \n" \ + "cmpl $0, %%ecx \n" \ + "jg 1b \n" \ + "emms \n" \ + : : "g"(len), "g"(buf1), "g"(buf2) + : "ecx", "edi", "esi", "memory" +#ifdef __MMX__ + , "mm1", "mm2" +#endif + ); +} + +static inline void atti386_partialProductOutput(int quadlen, Bit16s leftvol, Bit16s rightvol, Bit16s *partialBuf, Bit16s *p1buf) { + __asm__ __volatile__( + "movl %0, %%ecx \n" \ + "movw %1, %%ax \n" \ + "shll $16, %%eax \n" \ + "movw %2, %%ax \n" \ + "movd %%eax, %%mm1 \n" \ + "movd %%eax, %%mm2 \n" \ + "psllq $32, %%mm1 \n" \ + "por %%mm2, %%mm1 \n" \ + "movl %3, %%edi \n" \ + "movl %4, %%esi \n" \ + "pushl %%ebx \n" \ + "1: \n" \ + "movw 0(%%esi), %%bx \n" \ + "addl $2, %%esi \n" \ + "movw 0(%%esi), %%dx \n" \ + "addl $2, %%esi \n" \ + "" \ + "movw %%dx, %%ax \n" \ + "shll $16, %%eax \n" \ + "movw %%dx, %%ax \n" \ + "movd %%eax, %%mm2 \n" \ + "psllq $32, %%mm2 \n" \ + "movw %%bx, %%ax \n" \ + "shll $16, %%eax \n" \ + "movw %%bx, %%ax \n" \ + "movd %%eax, %%mm3 \n" \ + "por %%mm3, %%mm2 \n" \ + "" \ + "pmulhw %%mm1, %%mm2 \n" \ + "movq %%mm2, 0(%%edi) \n" \ + "addl $8, %%edi \n" \ + "" \ + "decl %%ecx \n" \ + "cmpl $0, %%ecx \n" \ + "jg 1b \n" \ + "emms \n" \ + "popl %%ebx \n" \ + : : "g"(quadlen), "g"(leftvol), "g"(rightvol), "g"(partialBuf), "g"(p1buf) + : "eax", "ecx", "edx", "edi", "esi", "memory" +#ifdef __MMX__ + , "mm1", "mm2", "mm3" +#endif + ); +} + +#endif + +bool DetectSIMD() { +#ifdef _MSC_VER + bool found_simd; + __asm { + pushfd + pop eax // get EFLAGS into eax + mov ebx,eax // keep a copy + xor eax,0x200000 + // toggle CPUID bit + + push eax + popfd // set new EFLAGS + pushfd + pop eax // EFLAGS back into eax + + xor eax,ebx + // have we changed the ID bit? + + je NO_SIMD + // No, no CPUID instruction + + // we could toggle the + // ID bit so CPUID is present + mov eax,1 + + cpuid // get processor features + test edx,1<<25 // check the SIMD bit + jz NO_SIMD + mov found_simd,1 + jmp DONE + NO_SIMD: + mov found_simd,0 + DONE: + } + return found_simd; +#else + return atti386_DetectSIMD(); +#endif +} + +bool Detect3DNow() { +#ifdef _MSC_VER + bool found3D = false; + __asm { + pushfd + pop eax + mov edx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + xor eax, edx + jz NO_3DNOW + + mov eax, 80000000h + cpuid + + cmp eax, 80000000h + jbe NO_3DNOW + + mov eax, 80000001h + cpuid + test edx, 80000000h + jz NO_3DNOW + mov found3D, 1 +NO_3DNOW: + + } + return found3D; +#else + return atti386_Detect3DNow(); +#endif +} + +float iir_filter_sse(float input,float *hist1_ptr, float *coef_ptr) { + float output; + + // 1st number of coefficients array is overall input scale factor, or filter gain + output = input * (*coef_ptr++); + +#ifdef _MSC_VER + __asm { + + movss xmm1, output + + mov eax, coef_ptr + movups xmm2, [eax] + + mov eax, hist1_ptr + movlps xmm3, [eax] + shufps xmm3, xmm3, 44h + // hist1_ptr+1, hist1_ptr, hist1_ptr+1, hist1_ptr + + mulps xmm2, xmm3 + + subss xmm1, xmm2 + // Rotate elements right + shufps xmm2, xmm2, 39h + subss xmm1, xmm2 + + // Store new_hist + movss DWORD PTR [eax], xmm1 + + // Rotate elements right + shufps xmm2, xmm2, 39h + addss xmm1, xmm2 + + // Rotate elements right + shufps xmm2, xmm2, 39h + addss xmm1, xmm2 + + // Store previous hist + movss DWORD PTR [eax+4], xmm3 + + add coef_ptr, 16 + add hist1_ptr, 8 + + mov eax, coef_ptr + movups xmm2, [eax] + + mov eax, hist1_ptr + movlps xmm3, [eax] + shufps xmm3, xmm3, 44h + // hist1_ptr+1, hist1_ptr, hist1_ptr+1, hist1_ptr + + mulps xmm2, xmm3 + + subss xmm1, xmm2 + // Rotate elements right + shufps xmm2, xmm2, 39h + subss xmm1, xmm2 + + // Store new_hist + movss DWORD PTR [eax], xmm1 + + // Rotate elements right + shufps xmm2, xmm2, 39h + addss xmm1, xmm2 + + // Rotate elements right + shufps xmm2, xmm2, 39h + addss xmm1, xmm2 + + // Store previous hist + movss DWORD PTR [eax+4], xmm3 + + movss output, xmm1 + } +#else + output = atti386_iir_filter_sse(&output, hist1_ptr, coef_ptr); +#endif + return output; +} + +float iir_filter_3dnow(float input,float *hist1_ptr, float *coef_ptr) { + float output; + + // 1st number of coefficients array is overall input scale factor, or filter gain + output = input * (*coef_ptr++); + + // I find it very sad that 3DNow requires twice as many instructions as Intel's SSE + // Intel does have the upper hand here. +#ifdef _MSC_VER + float tmp; + __asm { + movq mm1, output + mov ebx, coef_ptr + movq mm2, [ebx] + + mov eax, hist1_ptr; + movq mm3, [eax] + + pfmul mm2, mm3 + pfsub mm1, mm2 + + psrlq mm2, 32 + pfsub mm1, mm2 + + // Store new hist + movd tmp, mm1 + + add ebx, 8 + movq mm2, [ebx] + movq mm3, [eax] + + pfmul mm2, mm3 + pfadd mm1, mm2 + + psrlq mm2, 32 + pfadd mm1, mm2 + + push tmp + pop DWORD PTR [eax] + + movd DWORD PTR [eax+4], mm3 + + add ebx, 8 + add eax, 8 + + movq mm2, [ebx] + movq mm3, [eax] + + pfmul mm2, mm3 + pfsub mm1, mm2 + + psrlq mm2, 32 + pfsub mm1, mm2 + + // Store new hist + movd tmp, mm1 + + add ebx, 8 + movq mm2, [ebx] + movq mm3, [eax] + + pfmul mm2, mm3 + pfadd mm1, mm2 + + psrlq mm2, 32 + pfadd mm1, mm2 + + push tmp + pop DWORD PTR [eax] + movd DWORD PTR [eax+4], mm3 + + movd output, mm1 + + femms + } +#else + output = atti386_iir_filter_3DNow(output, hist1_ptr, coef_ptr); +#endif + return output; +} + +#if MT32EMU_USE_MMX > 0 + +int i386_partialProductOutput(int len, Bit16s leftvol, Bit16s rightvol, Bit16s *partialBuf, Bit16s *mixedBuf) { + int tmplen = len >> 1; + if (tmplen == 0) { + return 0; + } +#ifdef _MSC_VER + __asm { + mov ecx,tmplen + mov ax, leftvol + shl eax,16 + mov ax, rightvol + movd mm1, eax + movd mm2, eax + psllq mm1, 32 + por mm1, mm2 + mov edi, partialBuf + mov esi, mixedBuf +mmxloop1: + mov bx, [esi] + add esi,2 + mov dx, [esi] + add esi,2 + + mov ax, dx + shl eax, 16 + mov ax, dx + movd mm2,eax + psllq mm2, 32 + mov ax, bx + shl eax, 16 + mov ax, bx + movd mm3,eax + por mm2,mm3 + + pmulhw mm2, mm1 + movq [edi], mm2 + add edi, 8 + + dec ecx + cmp ecx,0 + jg mmxloop1 + emms + } +#else + atti386_partialProductOutput(tmplen, leftvol, rightvol, partialBuf, mixedBuf); +#endif + return tmplen << 1; +} + +int i386_mixBuffers(Bit16s * buf1, Bit16s *buf2, int len) { + int tmplen = len >> 2; + if (tmplen == 0) { + return 0; + } +#ifdef _MSC_VER + __asm { + mov ecx, tmplen + mov esi, buf1 + mov edi, buf2 + +mixloop1: + movq mm1, [edi] + movq mm2, [esi] + paddw mm1,mm2 + movq [esi],mm1 + add edi,8 + add esi,8 + + dec ecx + cmp ecx,0 + jg mixloop1 + emms + } +#else + atti386_mixBuffers(buf1, buf2, tmplen); +#endif + return tmplen << 2; +} + + +int i386_mixBuffersRingMix(Bit16s * buf1, Bit16s *buf2, int len) { + int tmplen = len >> 2; + if (tmplen == 0) { + return 0; + } +#ifdef _MSC_VER + __asm { + mov ecx, tmplen + mov esi, buf1 + mov edi, buf2 + +mixloop2: + movq mm1, [esi] + movq mm2, [edi] + movq mm3, mm1 + pmulhw mm1, mm2 + paddw mm1,mm3 + movq [esi],mm1 + add edi,8 + add esi,8 + + dec ecx + cmp ecx,0 + jg mixloop2 + emms + } +#else + atti386_mixBuffersRingMix(buf1, buf2, tmplen); +#endif + return tmplen << 2; +} + +int i386_mixBuffersRing(Bit16s * buf1, Bit16s *buf2, int len) { + int tmplen = len >> 2; + if (tmplen == 0) { + return 0; + } +#ifdef _MSC_VER + __asm { + mov ecx, tmplen + mov esi, buf1 + mov edi, buf2 + +mixloop3: + movq mm1, [esi] + movq mm2, [edi] + pmulhw mm1, mm2 + movq [esi],mm1 + add edi,8 + add esi,8 + + dec ecx + cmp ecx,0 + jg mixloop3 + emms + } +#else + atti386_mixBuffersRing(buf1, buf2, tmplen); +#endif + return tmplen << 2; +} + +int i386_produceOutput1(Bit16s *useBuf, Bit16s *stream, Bit32u len, Bit16s volume) { + int tmplen = (len >> 1); + if (tmplen == 0) { + return 0; + } +#ifdef _MSC_VER + __asm { + mov ecx, tmplen + mov ax,volume + shl eax,16 + mov ax,volume + movd mm3,eax + movd mm2,eax + psllq mm3, 32 + por mm3,mm2 + mov esi, useBuf + mov edi, stream +mixloop4: + movq mm1, [esi] + movq mm2, [edi] + pmulhw mm1, mm3 + paddw mm1,mm2 + movq [edi], mm1 + + add esi,8 + add edi,8 + + dec ecx + cmp ecx,0 + jg mixloop4 + emms + } +#else + atti386_produceOutput1(tmplen, volume, useBuf, stream); +#endif + return tmplen << 1; +} + +#endif + +} + +#endif |