aboutsummaryrefslogtreecommitdiff
path: root/deps/flac-1.3.2/src/libFLAC/ia32
diff options
context:
space:
mode:
Diffstat (limited to 'deps/flac-1.3.2/src/libFLAC/ia32')
-rw-r--r--deps/flac-1.3.2/src/libFLAC/ia32/Makefile.am44
-rw-r--r--deps/flac-1.3.2/src/libFLAC/ia32/Makefile.in640
-rw-r--r--deps/flac-1.3.2/src/libFLAC/ia32/cpu_asm.nasm99
-rw-r--r--deps/flac-1.3.2/src/libFLAC/ia32/fixed_asm.nasm309
-rw-r--r--deps/flac-1.3.2/src/libFLAC/ia32/lpc_asm.nasm2049
-rw-r--r--deps/flac-1.3.2/src/libFLAC/ia32/nasm.h90
6 files changed, 3231 insertions, 0 deletions
diff --git a/deps/flac-1.3.2/src/libFLAC/ia32/Makefile.am b/deps/flac-1.3.2/src/libFLAC/ia32/Makefile.am
new file mode 100644
index 0000000..5b4880b
--- /dev/null
+++ b/deps/flac-1.3.2/src/libFLAC/ia32/Makefile.am
@@ -0,0 +1,44 @@
+# libFLAC - Free Lossless Audio Codec library
+# Copyright (C) 2001-2009 Josh Coalson
+# Copyright (C) 2011-2016 Xiph.Org Foundation
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# - Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# - Neither the name of the Xiph.org Foundation nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SUFFIXES = .nasm .lo
+
+STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
+AM_CPPFLAGS = -I$(top_builddir) -I$(srcdir)/include -I$(top_srcdir)/include
+.nasm.lo:
+ $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) $(NASM) -f $(OBJ_FORMAT) -d OBJ_FORMAT_$(OBJ_FORMAT) -i$(srcdir)/ $< -o $@
+
+noinst_LTLIBRARIES = libFLAC-asm.la
+libFLAC_asm_la_SOURCES = \
+ cpu_asm.nasm \
+ fixed_asm.nasm \
+ lpc_asm.nasm \
+ nasm.h
diff --git a/deps/flac-1.3.2/src/libFLAC/ia32/Makefile.in b/deps/flac-1.3.2/src/libFLAC/ia32/Makefile.in
new file mode 100644
index 0000000..38e1967
--- /dev/null
+++ b/deps/flac-1.3.2/src/libFLAC/ia32/Makefile.in
@@ -0,0 +1,640 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# libFLAC - Free Lossless Audio Codec library
+# Copyright (C) 2001-2009 Josh Coalson
+# Copyright (C) 2011-2016 Xiph.Org Foundation
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# - Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# - Neither the name of the Xiph.org Foundation nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/libFLAC/ia32
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/add_cflags.m4 \
+ $(top_srcdir)/m4/add_cxxflags.m4 $(top_srcdir)/m4/bswap.m4 \
+ $(top_srcdir)/m4/clang.m4 $(top_srcdir)/m4/codeset.m4 \
+ $(top_srcdir)/m4/gcc_version.m4 $(top_srcdir)/m4/iconv.m4 \
+ $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
+ $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/ogg.m4 $(top_srcdir)/m4/really_gcc.m4 \
+ $(top_srcdir)/m4/stack_protect.m4 $(top_srcdir)/m4/xmms.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libFLAC_asm_la_LIBADD =
+am_libFLAC_asm_la_OBJECTS = cpu_asm.lo fixed_asm.lo lpc_asm.lo
+libFLAC_asm_la_OBJECTS = $(am_libFLAC_asm_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libFLAC_asm_la_SOURCES)
+DIST_SOURCES = $(libFLAC_asm_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DOCBOOK_TO_MAN = @DOCBOOK_TO_MAN@
+DOXYGEN = @DOXYGEN@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ENABLE_64_BIT_WORDS = @ENABLE_64_BIT_WORDS@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+FLAC__HAS_OGG = @FLAC__HAS_OGG@
+FLAC__TEST_LEVEL = @FLAC__TEST_LEVEL@
+FLAC__TEST_WITH_VALGRIND = @FLAC__TEST_WITH_VALGRIND@
+GCC_MAJOR_VERSION = @GCC_MAJOR_VERSION@
+GCC_MINOR_VERSION = @GCC_MINOR_VERSION@
+GCC_VERSION = @GCC_VERSION@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBICONV = @LIBICONV@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIB_CLOCK_GETTIME = @LIB_CLOCK_GETTIME@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBICONV = @LTLIBICONV@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NASM = @NASM@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OBJ_FORMAT = @OBJ_FORMAT@
+OGG_CFLAGS = @OGG_CFLAGS@
+OGG_LIBS = @OGG_LIBS@
+OGG_PACKAGE = @OGG_PACKAGE@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+XMMS_CFLAGS = @XMMS_CFLAGS@
+XMMS_CONFIG = @XMMS_CONFIG@
+XMMS_DATA_DIR = @XMMS_DATA_DIR@
+XMMS_EFFECT_PLUGIN_DIR = @XMMS_EFFECT_PLUGIN_DIR@
+XMMS_GENERAL_PLUGIN_DIR = @XMMS_GENERAL_PLUGIN_DIR@
+XMMS_INPUT_PLUGIN_DIR = @XMMS_INPUT_PLUGIN_DIR@
+XMMS_LIBS = @XMMS_LIBS@
+XMMS_OUTPUT_PLUGIN_DIR = @XMMS_OUTPUT_PLUGIN_DIR@
+XMMS_PLUGIN_DIR = @XMMS_PLUGIN_DIR@
+XMMS_VERSION = @XMMS_VERSION@
+XMMS_VISUALIZATION_PLUGIN_DIR = @XMMS_VISUALIZATION_PLUGIN_DIR@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUFFIXES = .nasm .lo
+STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
+AM_CPPFLAGS = -I$(top_builddir) -I$(srcdir)/include -I$(top_srcdir)/include
+noinst_LTLIBRARIES = libFLAC-asm.la
+libFLAC_asm_la_SOURCES = \
+ cpu_asm.nasm \
+ fixed_asm.nasm \
+ lpc_asm.nasm \
+ nasm.h
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .nasm .lo
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/libFLAC/ia32/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/libFLAC/ia32/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libFLAC-asm.la: $(libFLAC_asm_la_OBJECTS) $(libFLAC_asm_la_DEPENDENCIES) $(EXTRA_libFLAC_asm_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(LINK) $(libFLAC_asm_la_OBJECTS) $(libFLAC_asm_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
+ ctags-am distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+.nasm.lo:
+ $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) $(NASM) -f $(OBJ_FORMAT) -d OBJ_FORMAT_$(OBJ_FORMAT) -i$(srcdir)/ $< -o $@
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/deps/flac-1.3.2/src/libFLAC/ia32/cpu_asm.nasm b/deps/flac-1.3.2/src/libFLAC/ia32/cpu_asm.nasm
new file mode 100644
index 0000000..31baa0a
--- /dev/null
+++ b/deps/flac-1.3.2/src/libFLAC/ia32/cpu_asm.nasm
@@ -0,0 +1,99 @@
+; vim:filetype=nasm ts=8
+
+; libFLAC - Free Lossless Audio Codec library
+; Copyright (C) 2001-2009 Josh Coalson
+; Copyright (C) 2011-2016 Xiph.Org Foundation
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; - Neither the name of the Xiph.org Foundation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "nasm.h"
+
+ data_section
+
+cglobal FLAC__cpu_have_cpuid_asm_ia32
+cglobal FLAC__cpu_info_asm_ia32
+
+ code_section
+
+; **********************************************************************
+;
+; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32()
+;
+
+cident FLAC__cpu_have_cpuid_asm_ia32
+ pushfd
+ pop eax
+ mov edx, eax
+ xor eax, 0x00200000
+ push eax
+ popfd
+ pushfd
+ pop eax
+ xor eax, edx
+ and eax, 0x00200000
+ shr eax, 0x15
+ push edx
+ popfd
+ ret
+
+; **********************************************************************
+;
+; void FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx)
+;
+
+cident FLAC__cpu_info_asm_ia32
+ ;[esp + 8] == flags_edx
+ ;[esp + 12] == flags_ecx
+
+ push ebx
+ call FLAC__cpu_have_cpuid_asm_ia32
+ test eax, eax
+ jz .no_cpuid
+ mov eax, 0
+ cpuid
+ cmp eax, 1
+ jb .no_cpuid
+ xor ecx, ecx
+ mov eax, 1
+ cpuid
+ mov ebx, [esp + 8]
+ mov [ebx], edx
+ mov ebx, [esp + 12]
+ mov [ebx], ecx
+ jmp .end
+.no_cpuid:
+ xor eax, eax
+ mov ebx, [esp + 8]
+ mov [ebx], eax
+ mov ebx, [esp + 12]
+ mov [ebx], eax
+.end:
+ pop ebx
+ ret
+
+; end
diff --git a/deps/flac-1.3.2/src/libFLAC/ia32/fixed_asm.nasm b/deps/flac-1.3.2/src/libFLAC/ia32/fixed_asm.nasm
new file mode 100644
index 0000000..8477724
--- /dev/null
+++ b/deps/flac-1.3.2/src/libFLAC/ia32/fixed_asm.nasm
@@ -0,0 +1,309 @@
+; vim:filetype=nasm ts=8
+
+; libFLAC - Free Lossless Audio Codec library
+; Copyright (C) 2001-2009 Josh Coalson
+; Copyright (C) 2011-2016 Xiph.Org Foundation
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; - Neither the name of the Xiph.org Foundation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "nasm.h"
+
+ data_section
+
+cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
+
+ code_section
+
+; **********************************************************************
+;
+; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+; {
+; FLAC__int32 last_error_0 = data[-1];
+; FLAC__int32 last_error_1 = data[-1] - data[-2];
+; FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
+; FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
+; FLAC__int32 error, save;
+; FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
+; unsigned i, order;
+;
+; for(i = 0; i < data_len; i++) {
+; error = data[i] ; total_error_0 += local_abs(error); save = error;
+; error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
+; error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
+; error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
+; error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
+; }
+;
+; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
+; order = 0;
+; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
+; order = 1;
+; else if(total_error_2 < min(total_error_3, total_error_4))
+; order = 2;
+; else if(total_error_3 < total_error_4)
+; order = 3;
+; else
+; order = 4;
+;
+; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
+; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
+; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
+; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
+; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
+;
+; return order;
+; }
+ ALIGN 16
+cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
+
+ ; esp + 36 == data[]
+ ; esp + 40 == data_len
+ ; esp + 44 == residual_bits_per_sample[]
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+ sub esp, byte 16
+ ; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
+
+ ; ebx == &data[i]
+ ; ecx == loop counter (i)
+ ; ebp == order
+ ; mm0 == total_error_1:total_error_0
+ ; mm1 == total_error_2:total_error_3
+ ; mm2 == :total_error_4
+ ; mm3 == last_error_1:last_error_0
+ ; mm4 == last_error_2:last_error_3
+
+ mov ecx, [esp + 40] ; ecx = data_len
+ test ecx, ecx
+ jz near .data_len_is_0
+
+ mov ebx, [esp + 36] ; ebx = data[]
+ movd mm3, [ebx - 4] ; mm3 = 0:last_error_0
+ movd mm2, [ebx - 8] ; mm2 = 0:data[-2]
+ movd mm1, [ebx - 12] ; mm1 = 0:data[-3]
+ movd mm0, [ebx - 16] ; mm0 = 0:data[-4]
+ movq mm5, mm3 ; mm5 = 0:last_error_0
+ psubd mm5, mm2 ; mm5 = 0:last_error_1
+ punpckldq mm3, mm5 ; mm3 = last_error_1:last_error_0
+ psubd mm2, mm1 ; mm2 = 0:data[-2] - data[-3]
+ psubd mm5, mm2 ; mm5 = 0:last_error_2
+ movq mm4, mm5 ; mm4 = 0:last_error_2
+ psubd mm4, mm2 ; mm4 = 0:last_error_2 - (data[-2] - data[-3])
+ paddd mm4, mm1 ; mm4 = 0:last_error_2 - (data[-2] - 2 * data[-3])
+ psubd mm4, mm0 ; mm4 = 0:last_error_3
+ punpckldq mm4, mm5 ; mm4 = last_error_2:last_error_3
+ pxor mm0, mm0 ; mm0 = total_error_1:total_error_0
+ pxor mm1, mm1 ; mm1 = total_error_2:total_error_3
+ pxor mm2, mm2 ; mm2 = 0:total_error_4
+
+ ALIGN 16
+.loop:
+ movd mm7, [ebx] ; mm7 = 0:error_0
+ add ebx, byte 4
+ movq mm6, mm7 ; mm6 = 0:error_0
+ psubd mm7, mm3 ; mm7 = :error_1
+ punpckldq mm6, mm7 ; mm6 = error_1:error_0
+ movq mm5, mm6 ; mm5 = error_1:error_0
+ movq mm7, mm6 ; mm7 = error_1:error_0
+ psubd mm5, mm3 ; mm5 = error_2:
+ movq mm3, mm6 ; mm3 = error_1:error_0
+ psrad mm6, 31
+ pxor mm7, mm6
+ psubd mm7, mm6 ; mm7 = abs(error_1):abs(error_0)
+ paddd mm0, mm7 ; mm0 = total_error_1:total_error_0
+ movq mm6, mm5 ; mm6 = error_2:
+ psubd mm5, mm4 ; mm5 = error_3:
+ punpckhdq mm5, mm6 ; mm5 = error_2:error_3
+ movq mm7, mm5 ; mm7 = error_2:error_3
+ movq mm6, mm5 ; mm6 = error_2:error_3
+ psubd mm5, mm4 ; mm5 = :error_4
+ movq mm4, mm6 ; mm4 = error_2:error_3
+ psrad mm6, 31
+ pxor mm7, mm6
+ psubd mm7, mm6 ; mm7 = abs(error_2):abs(error_3)
+ paddd mm1, mm7 ; mm1 = total_error_2:total_error_3
+ movq mm6, mm5 ; mm6 = :error_4
+ psrad mm5, 31
+ pxor mm6, mm5
+ psubd mm6, mm5 ; mm6 = :abs(error_4)
+ paddd mm2, mm6 ; mm2 = :total_error_4
+
+ dec ecx
+ jnz short .loop
+
+; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
+; order = 0;
+; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
+; order = 1;
+; else if(total_error_2 < min(total_error_3, total_error_4))
+; order = 2;
+; else if(total_error_3 < total_error_4)
+; order = 3;
+; else
+; order = 4;
+ movq mm3, mm0 ; mm3 = total_error_1:total_error_0
+ movd edi, mm2 ; edi = total_error_4
+ movd esi, mm1 ; esi = total_error_3
+ movd eax, mm0 ; eax = total_error_0
+ punpckhdq mm1, mm1 ; mm1 = total_error_2:total_error_2
+ punpckhdq mm3, mm3 ; mm3 = total_error_1:total_error_1
+ movd edx, mm1 ; edx = total_error_2
+ movd ecx, mm3 ; ecx = total_error_1
+
+ xor ebx, ebx
+ xor ebp, ebp
+ inc ebx
+ cmp ecx, eax
+ cmovb eax, ecx ; eax = min(total_error_0, total_error_1)
+ cmovbe ebp, ebx
+ inc ebx
+ cmp edx, eax
+ cmovb eax, edx ; eax = min(total_error_0, total_error_1, total_error_2)
+ cmovbe ebp, ebx
+ inc ebx
+ cmp esi, eax
+ cmovb eax, esi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3)
+ cmovbe ebp, ebx
+ inc ebx
+ cmp edi, eax
+ cmovb eax, edi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3, total_error_4)
+ cmovbe ebp, ebx
+ movd ebx, mm0 ; ebx = total_error_0
+ emms
+
+ ; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
+ ; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
+ ; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
+ ; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
+ ; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
+ xor eax, eax
+ fild dword [esp + 40] ; ST = data_len (NOTE: assumes data_len is <2gigs)
+.rbps_0:
+ test ebx, ebx
+ jz .total_error_0_is_0
+ fld1 ; ST = 1.0 data_len
+ mov [esp], ebx
+ mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_0
+ mov ebx, [esp + 44]
+ fild qword [esp] ; ST = total_error_0 1.0 data_len
+ fdiv st2 ; ST = total_error_0/data_len 1.0 data_len
+ fldln2 ; ST = ln2 total_error_0/data_len 1.0 data_len
+ fmulp st1 ; ST = ln2*total_error_0/data_len 1.0 data_len
+ fyl2x ; ST = log2(ln2*total_error_0/data_len) data_len
+ fstp dword [ebx] ; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len) ST = data_len
+ jmp short .rbps_1
+.total_error_0_is_0:
+ mov ebx, [esp + 44]
+ mov [ebx], eax ; residual_bits_per_sample[0] = 0.0
+.rbps_1:
+ test ecx, ecx
+ jz .total_error_1_is_0
+ fld1 ; ST = 1.0 data_len
+ mov [esp], ecx
+ mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_1
+ fild qword [esp] ; ST = total_error_1 1.0 data_len
+ fdiv st2 ; ST = total_error_1/data_len 1.0 data_len
+ fldln2 ; ST = ln2 total_error_1/data_len 1.0 data_len
+ fmulp st1 ; ST = ln2*total_error_1/data_len 1.0 data_len
+ fyl2x ; ST = log2(ln2*total_error_1/data_len) data_len
+ fstp dword [ebx + 4] ; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len) ST = data_len
+ jmp short .rbps_2
+.total_error_1_is_0:
+ mov [ebx + 4], eax ; residual_bits_per_sample[1] = 0.0
+.rbps_2:
+ test edx, edx
+ jz .total_error_2_is_0
+ fld1 ; ST = 1.0 data_len
+ mov [esp], edx
+ mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_2
+ fild qword [esp] ; ST = total_error_2 1.0 data_len
+ fdiv st2 ; ST = total_error_2/data_len 1.0 data_len
+ fldln2 ; ST = ln2 total_error_2/data_len 1.0 data_len
+ fmulp st1 ; ST = ln2*total_error_2/data_len 1.0 data_len
+ fyl2x ; ST = log2(ln2*total_error_2/data_len) data_len
+ fstp dword [ebx + 8] ; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len) ST = data_len
+ jmp short .rbps_3
+.total_error_2_is_0:
+ mov [ebx + 8], eax ; residual_bits_per_sample[2] = 0.0
+.rbps_3:
+ test esi, esi
+ jz .total_error_3_is_0
+ fld1 ; ST = 1.0 data_len
+ mov [esp], esi
+ mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_3
+ fild qword [esp] ; ST = total_error_3 1.0 data_len
+ fdiv st2 ; ST = total_error_3/data_len 1.0 data_len
+ fldln2 ; ST = ln2 total_error_3/data_len 1.0 data_len
+ fmulp st1 ; ST = ln2*total_error_3/data_len 1.0 data_len
+ fyl2x ; ST = log2(ln2*total_error_3/data_len) data_len
+ fstp dword [ebx + 12] ; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len) ST = data_len
+ jmp short .rbps_4
+.total_error_3_is_0:
+ mov [ebx + 12], eax ; residual_bits_per_sample[3] = 0.0
+.rbps_4:
+ test edi, edi
+ jz .total_error_4_is_0
+ fld1 ; ST = 1.0 data_len
+ mov [esp], edi
+ mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_4
+ fild qword [esp] ; ST = total_error_4 1.0 data_len
+ fdiv st2 ; ST = total_error_4/data_len 1.0 data_len
+ fldln2 ; ST = ln2 total_error_4/data_len 1.0 data_len
+ fmulp st1 ; ST = ln2*total_error_4/data_len 1.0 data_len
+ fyl2x ; ST = log2(ln2*total_error_4/data_len) data_len
+ fstp dword [ebx + 16] ; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len) ST = data_len
+ jmp short .rbps_end
+.total_error_4_is_0:
+ mov [ebx + 16], eax ; residual_bits_per_sample[4] = 0.0
+.rbps_end:
+ fstp st0 ; ST = [empty]
+ jmp short .end
+.data_len_is_0:
+ ; data_len == 0, so residual_bits_per_sample[*] = 0.0
+ xor ebp, ebp
+ mov edi, [esp + 44]
+ mov [edi], ebp
+ mov [edi + 4], ebp
+ mov [edi + 8], ebp
+ mov [edi + 12], ebp
+ mov [edi + 16], ebp
+ add ebp, byte 4 ; order = 4
+
+.end:
+ mov eax, ebp ; return order
+ add esp, byte 16
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+
+; end
diff --git a/deps/flac-1.3.2/src/libFLAC/ia32/lpc_asm.nasm b/deps/flac-1.3.2/src/libFLAC/ia32/lpc_asm.nasm
new file mode 100644
index 0000000..8539d9b
--- /dev/null
+++ b/deps/flac-1.3.2/src/libFLAC/ia32/lpc_asm.nasm
@@ -0,0 +1,2049 @@
+; vim:filetype=nasm ts=8
+
+; libFLAC - Free Lossless Audio Codec library
+; Copyright (C) 2001-2009 Josh Coalson
+; Copyright (C) 2011-2016 Xiph.Org Foundation
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; - Neither the name of the Xiph.org Foundation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "nasm.h"
+
+ data_section
+
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4_old
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8_old
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old
+cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
+cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
+cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
+cglobal FLAC__lpc_restore_signal_asm_ia32
+cglobal FLAC__lpc_restore_signal_asm_ia32_mmx
+cglobal FLAC__lpc_restore_signal_wide_asm_ia32
+
+ code_section
+
+; **********************************************************************
+;
+; void FLAC__lpc_compute_autocorrelation_asm(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
+; {
+; FLAC__real d;
+; unsigned sample, coeff;
+; const unsigned limit = data_len - lag;
+;
+; FLAC__ASSERT(lag > 0);
+; FLAC__ASSERT(lag <= data_len);
+;
+; for(coeff = 0; coeff < lag; coeff++)
+; autoc[coeff] = 0.0;
+; for(sample = 0; sample <= limit; sample++) {
+; d = data[sample];
+; for(coeff = 0; coeff < lag; coeff++)
+; autoc[coeff] += d * data[sample+coeff];
+; }
+; for(; sample < data_len; sample++) {
+; d = data[sample];
+; for(coeff = 0; coeff < data_len - sample; coeff++)
+; autoc[coeff] += d * data[sample+coeff];
+; }
+; }
+;
+ ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32
+ ;[esp + 28] == autoc[]
+ ;[esp + 24] == lag
+ ;[esp + 20] == data_len
+ ;[esp + 16] == data[]
+
+ ;ASSERT(lag > 0)
+ ;ASSERT(lag <= 33)
+ ;ASSERT(lag <= data_len)
+
+.begin:
+ push esi
+ push edi
+ push ebx
+
+ ; for(coeff = 0; coeff < lag; coeff++)
+ ; autoc[coeff] = 0.0;
+ mov edi, [esp + 28] ; edi == autoc
+ mov ecx, [esp + 24] ; ecx = # of dwords (=lag) of 0 to write
+ xor eax, eax
+ rep stosd
+
+ ; const unsigned limit = data_len - lag;
+ mov eax, [esp + 24] ; eax == lag
+ mov ecx, [esp + 20]
+ sub ecx, eax ; ecx == limit
+
+ mov edi, [esp + 28] ; edi == autoc
+ mov esi, [esp + 16] ; esi == data
+ inc ecx ; we are looping <= limit so we add one to the counter
+
+ ; for(sample = 0; sample <= limit; sample++) {
+ ; d = data[sample];
+ ; for(coeff = 0; coeff < lag; coeff++)
+ ; autoc[coeff] += d * data[sample+coeff];
+ ; }
+ fld dword [esi] ; ST = d <- data[sample]
+ ; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
+ lea edx, [eax + eax*2]
+ neg edx
+ lea edx, [eax + edx*4 + .jumper1_0 - .get_eip1]
+ call .mov_eip_to_ebx
+.get_eip1:
+ add edx, ebx
+ inc edx ; compensate for the shorter opcode on the last iteration
+ inc edx ; compensate for the shorter opcode on the last iteration
+ inc edx ; compensate for the shorter opcode on the last iteration
+ cmp eax, 33
+ jne .loop1_start
+ sub edx, byte 9 ; compensate for the longer opcodes on the first iteration
+.loop1_start:
+ jmp edx
+
+.mov_eip_to_ebx:
+ mov ebx, [esp]
+ ret
+
+ fld st0 ; ST = d d
+ fmul dword [esi + (32*4)] ; ST = d*data[sample+32] d WATCHOUT: not a byte displacement here!
+ fadd dword [edi + (32*4)] ; ST = autoc[32]+d*data[sample+32] d WATCHOUT: not a byte displacement here!
+ fstp dword [edi + (32*4)] ; autoc[32]+=d*data[sample+32] ST = d WATCHOUT: not a byte displacement here!
+ fld st0 ; ST = d d
+ fmul dword [esi + (31*4)] ; ST = d*data[sample+31] d
+ fadd dword [edi + (31*4)] ; ST = autoc[31]+d*data[sample+31] d
+ fstp dword [edi + (31*4)] ; autoc[31]+=d*data[sample+31] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (30*4)] ; ST = d*data[sample+30] d
+ fadd dword [edi + (30*4)] ; ST = autoc[30]+d*data[sample+30] d
+ fstp dword [edi + (30*4)] ; autoc[30]+=d*data[sample+30] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (29*4)] ; ST = d*data[sample+29] d
+ fadd dword [edi + (29*4)] ; ST = autoc[29]+d*data[sample+29] d
+ fstp dword [edi + (29*4)] ; autoc[29]+=d*data[sample+29] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (28*4)] ; ST = d*data[sample+28] d
+ fadd dword [edi + (28*4)] ; ST = autoc[28]+d*data[sample+28] d
+ fstp dword [edi + (28*4)] ; autoc[28]+=d*data[sample+28] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (27*4)] ; ST = d*data[sample+27] d
+ fadd dword [edi + (27*4)] ; ST = autoc[27]+d*data[sample+27] d
+ fstp dword [edi + (27*4)] ; autoc[27]+=d*data[sample+27] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (26*4)] ; ST = d*data[sample+26] d
+ fadd dword [edi + (26*4)] ; ST = autoc[26]+d*data[sample+26] d
+ fstp dword [edi + (26*4)] ; autoc[26]+=d*data[sample+26] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (25*4)] ; ST = d*data[sample+25] d
+ fadd dword [edi + (25*4)] ; ST = autoc[25]+d*data[sample+25] d
+ fstp dword [edi + (25*4)] ; autoc[25]+=d*data[sample+25] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (24*4)] ; ST = d*data[sample+24] d
+ fadd dword [edi + (24*4)] ; ST = autoc[24]+d*data[sample+24] d
+ fstp dword [edi + (24*4)] ; autoc[24]+=d*data[sample+24] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (23*4)] ; ST = d*data[sample+23] d
+ fadd dword [edi + (23*4)] ; ST = autoc[23]+d*data[sample+23] d
+ fstp dword [edi + (23*4)] ; autoc[23]+=d*data[sample+23] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (22*4)] ; ST = d*data[sample+22] d
+ fadd dword [edi + (22*4)] ; ST = autoc[22]+d*data[sample+22] d
+ fstp dword [edi + (22*4)] ; autoc[22]+=d*data[sample+22] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (21*4)] ; ST = d*data[sample+21] d
+ fadd dword [edi + (21*4)] ; ST = autoc[21]+d*data[sample+21] d
+ fstp dword [edi + (21*4)] ; autoc[21]+=d*data[sample+21] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (20*4)] ; ST = d*data[sample+20] d
+ fadd dword [edi + (20*4)] ; ST = autoc[20]+d*data[sample+20] d
+ fstp dword [edi + (20*4)] ; autoc[20]+=d*data[sample+20] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (19*4)] ; ST = d*data[sample+19] d
+ fadd dword [edi + (19*4)] ; ST = autoc[19]+d*data[sample+19] d
+ fstp dword [edi + (19*4)] ; autoc[19]+=d*data[sample+19] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (18*4)] ; ST = d*data[sample+18] d
+ fadd dword [edi + (18*4)] ; ST = autoc[18]+d*data[sample+18] d
+ fstp dword [edi + (18*4)] ; autoc[18]+=d*data[sample+18] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (17*4)] ; ST = d*data[sample+17] d
+ fadd dword [edi + (17*4)] ; ST = autoc[17]+d*data[sample+17] d
+ fstp dword [edi + (17*4)] ; autoc[17]+=d*data[sample+17] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (16*4)] ; ST = d*data[sample+16] d
+ fadd dword [edi + (16*4)] ; ST = autoc[16]+d*data[sample+16] d
+ fstp dword [edi + (16*4)] ; autoc[16]+=d*data[sample+16] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (15*4)] ; ST = d*data[sample+15] d
+ fadd dword [edi + (15*4)] ; ST = autoc[15]+d*data[sample+15] d
+ fstp dword [edi + (15*4)] ; autoc[15]+=d*data[sample+15] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (14*4)] ; ST = d*data[sample+14] d
+ fadd dword [edi + (14*4)] ; ST = autoc[14]+d*data[sample+14] d
+ fstp dword [edi + (14*4)] ; autoc[14]+=d*data[sample+14] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (13*4)] ; ST = d*data[sample+13] d
+ fadd dword [edi + (13*4)] ; ST = autoc[13]+d*data[sample+13] d
+ fstp dword [edi + (13*4)] ; autoc[13]+=d*data[sample+13] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (12*4)] ; ST = d*data[sample+12] d
+ fadd dword [edi + (12*4)] ; ST = autoc[12]+d*data[sample+12] d
+ fstp dword [edi + (12*4)] ; autoc[12]+=d*data[sample+12] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (11*4)] ; ST = d*data[sample+11] d
+ fadd dword [edi + (11*4)] ; ST = autoc[11]+d*data[sample+11] d
+ fstp dword [edi + (11*4)] ; autoc[11]+=d*data[sample+11] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (10*4)] ; ST = d*data[sample+10] d
+ fadd dword [edi + (10*4)] ; ST = autoc[10]+d*data[sample+10] d
+ fstp dword [edi + (10*4)] ; autoc[10]+=d*data[sample+10] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 9*4)] ; ST = d*data[sample+9] d
+ fadd dword [edi + ( 9*4)] ; ST = autoc[9]+d*data[sample+9] d
+ fstp dword [edi + ( 9*4)] ; autoc[9]+=d*data[sample+9] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 8*4)] ; ST = d*data[sample+8] d
+ fadd dword [edi + ( 8*4)] ; ST = autoc[8]+d*data[sample+8] d
+ fstp dword [edi + ( 8*4)] ; autoc[8]+=d*data[sample+8] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 7*4)] ; ST = d*data[sample+7] d
+ fadd dword [edi + ( 7*4)] ; ST = autoc[7]+d*data[sample+7] d
+ fstp dword [edi + ( 7*4)] ; autoc[7]+=d*data[sample+7] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 6*4)] ; ST = d*data[sample+6] d
+ fadd dword [edi + ( 6*4)] ; ST = autoc[6]+d*data[sample+6] d
+ fstp dword [edi + ( 6*4)] ; autoc[6]+=d*data[sample+6] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 5*4)] ; ST = d*data[sample+4] d
+ fadd dword [edi + ( 5*4)] ; ST = autoc[4]+d*data[sample+4] d
+ fstp dword [edi + ( 5*4)] ; autoc[4]+=d*data[sample+4] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 4*4)] ; ST = d*data[sample+4] d
+ fadd dword [edi + ( 4*4)] ; ST = autoc[4]+d*data[sample+4] d
+ fstp dword [edi + ( 4*4)] ; autoc[4]+=d*data[sample+4] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 3*4)] ; ST = d*data[sample+3] d
+ fadd dword [edi + ( 3*4)] ; ST = autoc[3]+d*data[sample+3] d
+ fstp dword [edi + ( 3*4)] ; autoc[3]+=d*data[sample+3] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 2*4)] ; ST = d*data[sample+2] d
+ fadd dword [edi + ( 2*4)] ; ST = autoc[2]+d*data[sample+2] d
+ fstp dword [edi + ( 2*4)] ; autoc[2]+=d*data[sample+2] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 1*4)] ; ST = d*data[sample+1] d
+ fadd dword [edi + ( 1*4)] ; ST = autoc[1]+d*data[sample+1] d
+ fstp dword [edi + ( 1*4)] ; autoc[1]+=d*data[sample+1] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi] ; ST = d*data[sample] d WATCHOUT: no displacement byte here!
+ fadd dword [edi] ; ST = autoc[0]+d*data[sample] d WATCHOUT: no displacement byte here!
+ fstp dword [edi] ; autoc[0]+=d*data[sample] ST = d WATCHOUT: no displacement byte here!
+.jumper1_0:
+
+ fstp st0 ; pop d, ST = empty
+ add esi, byte 4 ; sample++
+ dec ecx
+ jz .loop1_end
+ fld dword [esi] ; ST = d <- data[sample]
+ jmp edx
+.loop1_end:
+
+ ; for(; sample < data_len; sample++) {
+ ; d = data[sample];
+ ; for(coeff = 0; coeff < data_len - sample; coeff++)
+ ; autoc[coeff] += d * data[sample+coeff];
+ ; }
+ mov ecx, [esp + 24] ; ecx <- lag
+ dec ecx ; ecx <- lag - 1
+ jz near .end ; skip loop if 0 (i.e. lag == 1)
+
+ fld dword [esi] ; ST = d <- data[sample]
+ mov eax, ecx ; eax <- lag - 1 == data_len - sample the first time through
+ ; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
+ lea edx, [eax + eax*2]
+ neg edx
+ lea edx, [eax + edx*4 + .jumper2_0 - .get_eip2]
+ call .mov_eip_to_ebx
+.get_eip2:
+ add edx, ebx
+ inc edx ; compensate for the shorter opcode on the last iteration
+ inc edx ; compensate for the shorter opcode on the last iteration
+ inc edx ; compensate for the shorter opcode on the last iteration
+ jmp edx
+
+ fld st0 ; ST = d d
+ fmul dword [esi + (31*4)] ; ST = d*data[sample+31] d
+ fadd dword [edi + (31*4)] ; ST = autoc[31]+d*data[sample+31] d
+ fstp dword [edi + (31*4)] ; autoc[31]+=d*data[sample+31] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (30*4)] ; ST = d*data[sample+30] d
+ fadd dword [edi + (30*4)] ; ST = autoc[30]+d*data[sample+30] d
+ fstp dword [edi + (30*4)] ; autoc[30]+=d*data[sample+30] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (29*4)] ; ST = d*data[sample+29] d
+ fadd dword [edi + (29*4)] ; ST = autoc[29]+d*data[sample+29] d
+ fstp dword [edi + (29*4)] ; autoc[29]+=d*data[sample+29] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (28*4)] ; ST = d*data[sample+28] d
+ fadd dword [edi + (28*4)] ; ST = autoc[28]+d*data[sample+28] d
+ fstp dword [edi + (28*4)] ; autoc[28]+=d*data[sample+28] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (27*4)] ; ST = d*data[sample+27] d
+ fadd dword [edi + (27*4)] ; ST = autoc[27]+d*data[sample+27] d
+ fstp dword [edi + (27*4)] ; autoc[27]+=d*data[sample+27] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (26*4)] ; ST = d*data[sample+26] d
+ fadd dword [edi + (26*4)] ; ST = autoc[26]+d*data[sample+26] d
+ fstp dword [edi + (26*4)] ; autoc[26]+=d*data[sample+26] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (25*4)] ; ST = d*data[sample+25] d
+ fadd dword [edi + (25*4)] ; ST = autoc[25]+d*data[sample+25] d
+ fstp dword [edi + (25*4)] ; autoc[25]+=d*data[sample+25] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (24*4)] ; ST = d*data[sample+24] d
+ fadd dword [edi + (24*4)] ; ST = autoc[24]+d*data[sample+24] d
+ fstp dword [edi + (24*4)] ; autoc[24]+=d*data[sample+24] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (23*4)] ; ST = d*data[sample+23] d
+ fadd dword [edi + (23*4)] ; ST = autoc[23]+d*data[sample+23] d
+ fstp dword [edi + (23*4)] ; autoc[23]+=d*data[sample+23] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (22*4)] ; ST = d*data[sample+22] d
+ fadd dword [edi + (22*4)] ; ST = autoc[22]+d*data[sample+22] d
+ fstp dword [edi + (22*4)] ; autoc[22]+=d*data[sample+22] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (21*4)] ; ST = d*data[sample+21] d
+ fadd dword [edi + (21*4)] ; ST = autoc[21]+d*data[sample+21] d
+ fstp dword [edi + (21*4)] ; autoc[21]+=d*data[sample+21] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (20*4)] ; ST = d*data[sample+20] d
+ fadd dword [edi + (20*4)] ; ST = autoc[20]+d*data[sample+20] d
+ fstp dword [edi + (20*4)] ; autoc[20]+=d*data[sample+20] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (19*4)] ; ST = d*data[sample+19] d
+ fadd dword [edi + (19*4)] ; ST = autoc[19]+d*data[sample+19] d
+ fstp dword [edi + (19*4)] ; autoc[19]+=d*data[sample+19] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (18*4)] ; ST = d*data[sample+18] d
+ fadd dword [edi + (18*4)] ; ST = autoc[18]+d*data[sample+18] d
+ fstp dword [edi + (18*4)] ; autoc[18]+=d*data[sample+18] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (17*4)] ; ST = d*data[sample+17] d
+ fadd dword [edi + (17*4)] ; ST = autoc[17]+d*data[sample+17] d
+ fstp dword [edi + (17*4)] ; autoc[17]+=d*data[sample+17] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (16*4)] ; ST = d*data[sample+16] d
+ fadd dword [edi + (16*4)] ; ST = autoc[16]+d*data[sample+16] d
+ fstp dword [edi + (16*4)] ; autoc[16]+=d*data[sample+16] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (15*4)] ; ST = d*data[sample+15] d
+ fadd dword [edi + (15*4)] ; ST = autoc[15]+d*data[sample+15] d
+ fstp dword [edi + (15*4)] ; autoc[15]+=d*data[sample+15] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (14*4)] ; ST = d*data[sample+14] d
+ fadd dword [edi + (14*4)] ; ST = autoc[14]+d*data[sample+14] d
+ fstp dword [edi + (14*4)] ; autoc[14]+=d*data[sample+14] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (13*4)] ; ST = d*data[sample+13] d
+ fadd dword [edi + (13*4)] ; ST = autoc[13]+d*data[sample+13] d
+ fstp dword [edi + (13*4)] ; autoc[13]+=d*data[sample+13] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (12*4)] ; ST = d*data[sample+12] d
+ fadd dword [edi + (12*4)] ; ST = autoc[12]+d*data[sample+12] d
+ fstp dword [edi + (12*4)] ; autoc[12]+=d*data[sample+12] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (11*4)] ; ST = d*data[sample+11] d
+ fadd dword [edi + (11*4)] ; ST = autoc[11]+d*data[sample+11] d
+ fstp dword [edi + (11*4)] ; autoc[11]+=d*data[sample+11] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + (10*4)] ; ST = d*data[sample+10] d
+ fadd dword [edi + (10*4)] ; ST = autoc[10]+d*data[sample+10] d
+ fstp dword [edi + (10*4)] ; autoc[10]+=d*data[sample+10] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 9*4)] ; ST = d*data[sample+9] d
+ fadd dword [edi + ( 9*4)] ; ST = autoc[9]+d*data[sample+9] d
+ fstp dword [edi + ( 9*4)] ; autoc[9]+=d*data[sample+9] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 8*4)] ; ST = d*data[sample+8] d
+ fadd dword [edi + ( 8*4)] ; ST = autoc[8]+d*data[sample+8] d
+ fstp dword [edi + ( 8*4)] ; autoc[8]+=d*data[sample+8] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 7*4)] ; ST = d*data[sample+7] d
+ fadd dword [edi + ( 7*4)] ; ST = autoc[7]+d*data[sample+7] d
+ fstp dword [edi + ( 7*4)] ; autoc[7]+=d*data[sample+7] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 6*4)] ; ST = d*data[sample+6] d
+ fadd dword [edi + ( 6*4)] ; ST = autoc[6]+d*data[sample+6] d
+ fstp dword [edi + ( 6*4)] ; autoc[6]+=d*data[sample+6] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 5*4)] ; ST = d*data[sample+4] d
+ fadd dword [edi + ( 5*4)] ; ST = autoc[4]+d*data[sample+4] d
+ fstp dword [edi + ( 5*4)] ; autoc[4]+=d*data[sample+4] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 4*4)] ; ST = d*data[sample+4] d
+ fadd dword [edi + ( 4*4)] ; ST = autoc[4]+d*data[sample+4] d
+ fstp dword [edi + ( 4*4)] ; autoc[4]+=d*data[sample+4] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 3*4)] ; ST = d*data[sample+3] d
+ fadd dword [edi + ( 3*4)] ; ST = autoc[3]+d*data[sample+3] d
+ fstp dword [edi + ( 3*4)] ; autoc[3]+=d*data[sample+3] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 2*4)] ; ST = d*data[sample+2] d
+ fadd dword [edi + ( 2*4)] ; ST = autoc[2]+d*data[sample+2] d
+ fstp dword [edi + ( 2*4)] ; autoc[2]+=d*data[sample+2] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi + ( 1*4)] ; ST = d*data[sample+1] d
+ fadd dword [edi + ( 1*4)] ; ST = autoc[1]+d*data[sample+1] d
+ fstp dword [edi + ( 1*4)] ; autoc[1]+=d*data[sample+1] ST = d
+ fld st0 ; ST = d d
+ fmul dword [esi] ; ST = d*data[sample] d WATCHOUT: no displacement byte here!
+ fadd dword [edi] ; ST = autoc[0]+d*data[sample] d WATCHOUT: no displacement byte here!
+ fstp dword [edi] ; autoc[0]+=d*data[sample] ST = d WATCHOUT: no displacement byte here!
+.jumper2_0:
+
+ fstp st0 ; pop d, ST = empty
+ add esi, byte 4 ; sample++
+ dec ecx
+ jz .loop2_end
+ add edx, byte 11 ; adjust our inner loop counter by adjusting the jump target
+ fld dword [esi] ; ST = d <- data[sample]
+ jmp edx
+.loop2_end:
+
+.end:
+ pop ebx
+ pop edi
+ pop esi
+ ret
+
+ ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4_old
+ ;[esp + 16] == autoc[]
+ ;[esp + 12] == lag
+ ;[esp + 8] == data_len
+ ;[esp + 4] == data[]
+
+ ;ASSERT(lag > 0)
+ ;ASSERT(lag <= 4)
+ ;ASSERT(lag <= data_len)
+
+ ; for(coeff = 0; coeff < lag; coeff++)
+ ; autoc[coeff] = 0.0;
+ xorps xmm5, xmm5
+
+ mov edx, [esp + 8] ; edx == data_len
+ mov eax, [esp + 4] ; eax == &data[sample] <- &data[0]
+
+ movss xmm0, [eax] ; xmm0 = 0,0,0,data[0]
+ add eax, 4
+ movaps xmm2, xmm0 ; xmm2 = 0,0,0,data[0]
+ shufps xmm0, xmm0, 0 ; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+.warmup: ; xmm2 == data[sample-3],data[sample-2],data[sample-1],data[sample]
+ mulps xmm0, xmm2 ; xmm0 = xmm0 * xmm2
+ addps xmm5, xmm0 ; xmm5 += xmm0 * xmm2
+ dec edx
+ jz .loop_end
+ ALIGN 16
+.loop_start:
+ ; start by reading the next sample
+ movss xmm0, [eax] ; xmm0 = 0,0,0,data[sample]
+ add eax, 4
+ shufps xmm0, xmm0, 0 ; xmm0 = data[sample],data[sample],data[sample],data[sample]
+ shufps xmm2, xmm2, 93h ; 93h=2-1-0-3 => xmm2 gets rotated left by one float
+ movss xmm2, xmm0
+ mulps xmm0, xmm2 ; xmm0 = xmm0 * xmm2
+ addps xmm5, xmm0 ; xmm5 += xmm0 * xmm2
+ dec edx
+ jnz .loop_start
+.loop_end:
+ ; store autoc
+ mov edx, [esp + 16] ; edx == autoc
+ movups [edx], xmm5
+
+.end:
+ ret
+
+ ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8_old
+ ;[esp + 16] == autoc[]
+ ;[esp + 12] == lag
+ ;[esp + 8] == data_len
+ ;[esp + 4] == data[]
+
+ ;ASSERT(lag > 0)
+ ;ASSERT(lag <= 8)
+ ;ASSERT(lag <= data_len)
+
+ ; for(coeff = 0; coeff < lag; coeff++)
+ ; autoc[coeff] = 0.0;
+ xorps xmm5, xmm5
+ xorps xmm6, xmm6
+
+ mov edx, [esp + 8] ; edx == data_len
+ mov eax, [esp + 4] ; eax == &data[sample] <- &data[0]
+
+ movss xmm0, [eax] ; xmm0 = 0,0,0,data[0]
+ add eax, 4
+ movaps xmm2, xmm0 ; xmm2 = 0,0,0,data[0]
+ shufps xmm0, xmm0, 0 ; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+ movaps xmm1, xmm0 ; xmm1 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+ xorps xmm3, xmm3 ; xmm3 = 0,0,0,0
+.warmup: ; xmm3:xmm2 == data[sample-7],data[sample-6],...,data[sample]
+ mulps xmm0, xmm2
+ mulps xmm1, xmm3 ; xmm1:xmm0 = xmm1:xmm0 * xmm3:xmm2
+ addps xmm5, xmm0
+ addps xmm6, xmm1 ; xmm6:xmm5 += xmm1:xmm0 * xmm3:xmm2
+ dec edx
+ jz .loop_end
+ ALIGN 16
+.loop_start:
+ ; start by reading the next sample
+ movss xmm0, [eax] ; xmm0 = 0,0,0,data[sample]
+ ; here we reorder the instructions; see the (#) indexes for a logical order
+ shufps xmm2, xmm2, 93h ; (3) 93h=2-1-0-3 => xmm2 gets rotated left by one float
+ add eax, 4 ; (0)
+ shufps xmm3, xmm3, 93h ; (4) 93h=2-1-0-3 => xmm3 gets rotated left by one float
+ shufps xmm0, xmm0, 0 ; (1) xmm0 = data[sample],data[sample],data[sample],data[sample]
+ movss xmm3, xmm2 ; (5)
+ movaps xmm1, xmm0 ; (2) xmm1 = data[sample],data[sample],data[sample],data[sample]
+ movss xmm2, xmm0 ; (6)
+ mulps xmm1, xmm3 ; (8)
+ mulps xmm0, xmm2 ; (7) xmm1:xmm0 = xmm1:xmm0 * xmm3:xmm2
+ addps xmm6, xmm1 ; (10)
+ addps xmm5, xmm0 ; (9) xmm6:xmm5 += xmm1:xmm0 * xmm3:xmm2
+ dec edx
+ jnz .loop_start
+.loop_end:
+ ; store autoc
+ mov edx, [esp + 16] ; edx == autoc
+ movups [edx], xmm5
+ movups [edx + 16], xmm6
+
+.end:
+ ret
+
+ ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12_old
+ ;[esp + 16] == autoc[]
+ ;[esp + 12] == lag
+ ;[esp + 8] == data_len
+ ;[esp + 4] == data[]
+
+ ;ASSERT(lag > 0)
+ ;ASSERT(lag <= 12)
+ ;ASSERT(lag <= data_len)
+
+ ; for(coeff = 0; coeff < lag; coeff++)
+ ; autoc[coeff] = 0.0;
+ xorps xmm5, xmm5
+ xorps xmm6, xmm6
+ xorps xmm7, xmm7
+
+ mov edx, [esp + 8] ; edx == data_len
+ mov eax, [esp + 4] ; eax == &data[sample] <- &data[0]
+
+ movss xmm0, [eax] ; xmm0 = 0,0,0,data[0]
+ add eax, 4
+ movaps xmm2, xmm0 ; xmm2 = 0,0,0,data[0]
+ shufps xmm0, xmm0, 0 ; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+ xorps xmm3, xmm3 ; xmm3 = 0,0,0,0
+ xorps xmm4, xmm4 ; xmm4 = 0,0,0,0
+.warmup: ; xmm3:xmm2 == data[sample-7],data[sample-6],...,data[sample]
+ movaps xmm1, xmm0
+ mulps xmm1, xmm2
+ addps xmm5, xmm1
+ movaps xmm1, xmm0
+ mulps xmm1, xmm3
+ addps xmm6, xmm1
+ mulps xmm0, xmm4
+ addps xmm7, xmm0 ; xmm7:xmm6:xmm5 += xmm0:xmm0:xmm0 * xmm4:xmm3:xmm2
+ dec edx
+ jz .loop_end
+ ALIGN 16
+.loop_start:
+ ; start by reading the next sample
+ movss xmm0, [eax] ; xmm0 = 0,0,0,data[sample]
+ add eax, 4
+ shufps xmm0, xmm0, 0 ; xmm0 = data[sample],data[sample],data[sample],data[sample]
+
+ ; shift xmm4:xmm3:xmm2 left by one float
+ shufps xmm2, xmm2, 93h ; 93h=2-1-0-3 => xmm2 gets rotated left by one float
+ shufps xmm3, xmm3, 93h ; 93h=2-1-0-3 => xmm3 gets rotated left by one float
+ shufps xmm4, xmm4, 93h ; 93h=2-1-0-3 => xmm4 gets rotated left by one float
+ movss xmm4, xmm3
+ movss xmm3, xmm2
+ movss xmm2, xmm0
+
+ ; xmm7:xmm6:xmm5 += xmm0:xmm0:xmm0 * xmm4:xmm3:xmm2
+ movaps xmm1, xmm0
+ mulps xmm1, xmm2
+ addps xmm5, xmm1
+ movaps xmm1, xmm0
+ mulps xmm1, xmm3
+ addps xmm6, xmm1
+ mulps xmm0, xmm4
+ addps xmm7, xmm0
+
+ dec edx
+ jnz .loop_start
+.loop_end:
+ ; store autoc
+ mov edx, [esp + 16] ; edx == autoc
+ movups [edx], xmm5
+ movups [edx + 16], xmm6
+ movups [edx + 32], xmm7
+
+.end:
+ ret
+
+ ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16_old
+ ;[ebp + 20] == autoc[]
+ ;[ebp + 16] == lag
+ ;[ebp + 12] == data_len
+ ;[ebp + 8] == data[]
+ ;[esp] == __m128
+ ;[esp + 16] == __m128
+
+ push ebp
+ mov ebp, esp
+ and esp, -16 ; stack realign for SSE instructions 'movaps' and 'addps'
+ sub esp, 32
+
+ ;ASSERT(lag > 0)
+ ;ASSERT(lag <= 12)
+ ;ASSERT(lag <= data_len)
+ ;ASSERT(data_len > 0)
+
+ ; for(coeff = 0; coeff < lag; coeff++)
+ ; autoc[coeff] = 0.0;
+ xorps xmm5, xmm5
+ xorps xmm6, xmm6
+ movaps [esp], xmm5
+ movaps [esp + 16], xmm6
+
+ mov edx, [ebp + 12] ; edx == data_len
+ mov eax, [ebp + 8] ; eax == &data[sample] <- &data[0]
+
+ movss xmm0, [eax] ; xmm0 = 0,0,0,data[0]
+ add eax, 4
+ movaps xmm1, xmm0 ; xmm1 = 0,0,0,data[0]
+ shufps xmm0, xmm0, 0 ; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+ xorps xmm2, xmm2 ; xmm2 = 0,0,0,0
+ xorps xmm3, xmm3 ; xmm3 = 0,0,0,0
+ xorps xmm4, xmm4 ; xmm4 = 0,0,0,0
+ movaps xmm7, xmm0
+ mulps xmm7, xmm1
+ addps xmm5, xmm7
+ dec edx
+ jz .loop_end
+ ALIGN 16
+.loop_start:
+ ; start by reading the next sample
+ movss xmm0, [eax] ; xmm0 = 0,0,0,data[sample]
+ add eax, 4
+ shufps xmm0, xmm0, 0 ; xmm0 = data[sample],data[sample],data[sample],data[sample]
+
+ ; shift xmm4:xmm3:xmm2:xmm1 left by one float
+ shufps xmm1, xmm1, 93h
+ shufps xmm2, xmm2, 93h
+ shufps xmm3, xmm3, 93h
+ shufps xmm4, xmm4, 93h
+ movss xmm4, xmm3
+ movss xmm3, xmm2
+ movss xmm2, xmm1
+ movss xmm1, xmm0
+
+ ; xmmB:xmmA:xmm6:xmm5 += xmm0:xmm0:xmm0:xmm0 * xmm4:xmm3:xmm2:xmm1
+ movaps xmm7, xmm0
+ mulps xmm7, xmm1
+ addps xmm5, xmm7
+ movaps xmm7, xmm0
+ mulps xmm7, xmm2
+ addps xmm6, xmm7
+ movaps xmm7, xmm0
+ mulps xmm7, xmm3
+ mulps xmm0, xmm4
+ addps xmm7, [esp]
+ addps xmm0, [esp + 16]
+ movaps [esp], xmm7
+ movaps [esp + 16], xmm0
+
+ dec edx
+ jnz .loop_start
+.loop_end:
+ ; store autoc
+ mov edx, [ebp + 20] ; edx == autoc
+ movups [edx], xmm5
+ movups [edx + 16], xmm6
+ movaps xmm5, [esp]
+ movaps xmm6, [esp + 16]
+ movups [edx + 32], xmm5
+ movups [edx + 48], xmm6
+.end:
+ mov esp, ebp
+ pop ebp
+ ret
+
+;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+;
+; for(i = 0; i < data_len; i++) {
+; sum = 0;
+; for(j = 0; j < order; j++)
+; sum += qlp_coeff[j] * data[i-j-1];
+; residual[i] = data[i] - (sum >> lp_quantization);
+; }
+;
+ ALIGN 16
+cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
+ ;[esp + 40] residual[]
+ ;[esp + 36] lp_quantization
+ ;[esp + 32] order
+ ;[esp + 28] qlp_coeff[]
+ ;[esp + 24] data_len
+ ;[esp + 20] data[]
+
+ ;ASSERT(order > 0)
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov esi, [esp + 20] ; esi = data[]
+ mov edi, [esp + 40] ; edi = residual[]
+ mov eax, [esp + 32] ; eax = order
+ mov ebx, [esp + 24] ; ebx = data_len
+
+ test ebx, ebx
+ jz near .end ; do nothing if data_len == 0
+.begin:
+ cmp eax, byte 1
+ jg short .i_1more
+
+ mov ecx, [esp + 28]
+ mov edx, [ecx] ; edx = qlp_coeff[0]
+ mov eax, [esi - 4] ; eax = data[-1]
+ mov ecx, [esp + 36] ; cl = lp_quantization
+ ALIGN 16
+.i_1_loop_i:
+ imul eax, edx
+ sar eax, cl
+ neg eax
+ add eax, [esi]
+ mov [edi], eax
+ mov eax, [esi]
+ add edi, byte 4
+ add esi, byte 4
+ dec ebx
+ jnz .i_1_loop_i
+
+ jmp .end
+
+.i_1more:
+ cmp eax, byte 32 ; for order <= 32 there is a faster routine
+ jbe short .i_32
+
+ ; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
+ ALIGN 16
+.i_32more_loop_i:
+ xor ebp, ebp
+ mov ecx, [esp + 32]
+ mov edx, ecx
+ shl edx, 2
+ add edx, [esp + 28]
+ neg ecx
+ ALIGN 16
+.i_32more_loop_j:
+ sub edx, byte 4
+ mov eax, [edx]
+ imul eax, [esi + 4 * ecx]
+ add ebp, eax
+ inc ecx
+ jnz short .i_32more_loop_j
+
+ mov ecx, [esp + 36]
+ sar ebp, cl
+ neg ebp
+ add ebp, [esi]
+ mov [edi], ebp
+ add esi, byte 4
+ add edi, byte 4
+
+ dec ebx
+ jnz .i_32more_loop_i
+
+ jmp .end
+
+.mov_eip_to_eax:
+ mov eax, [esp]
+ ret
+
+.i_32:
+ sub edi, esi
+ neg eax
+ lea edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
+ call .mov_eip_to_eax
+.get_eip0:
+ add edx, eax
+ inc edx
+ mov eax, [esp + 28] ; eax = qlp_coeff[]
+ xor ebp, ebp
+ jmp edx
+
+ mov ecx, [eax + 124]
+ imul ecx, [esi - 128]
+ add ebp, ecx
+ mov ecx, [eax + 120]
+ imul ecx, [esi - 124]
+ add ebp, ecx
+ mov ecx, [eax + 116]
+ imul ecx, [esi - 120]
+ add ebp, ecx
+ mov ecx, [eax + 112]
+ imul ecx, [esi - 116]
+ add ebp, ecx
+ mov ecx, [eax + 108]
+ imul ecx, [esi - 112]
+ add ebp, ecx
+ mov ecx, [eax + 104]
+ imul ecx, [esi - 108]
+ add ebp, ecx
+ mov ecx, [eax + 100]
+ imul ecx, [esi - 104]
+ add ebp, ecx
+ mov ecx, [eax + 96]
+ imul ecx, [esi - 100]
+ add ebp, ecx
+ mov ecx, [eax + 92]
+ imul ecx, [esi - 96]
+ add ebp, ecx
+ mov ecx, [eax + 88]
+ imul ecx, [esi - 92]
+ add ebp, ecx
+ mov ecx, [eax + 84]
+ imul ecx, [esi - 88]
+ add ebp, ecx
+ mov ecx, [eax + 80]
+ imul ecx, [esi - 84]
+ add ebp, ecx
+ mov ecx, [eax + 76]
+ imul ecx, [esi - 80]
+ add ebp, ecx
+ mov ecx, [eax + 72]
+ imul ecx, [esi - 76]
+ add ebp, ecx
+ mov ecx, [eax + 68]
+ imul ecx, [esi - 72]
+ add ebp, ecx
+ mov ecx, [eax + 64]
+ imul ecx, [esi - 68]
+ add ebp, ecx
+ mov ecx, [eax + 60]
+ imul ecx, [esi - 64]
+ add ebp, ecx
+ mov ecx, [eax + 56]
+ imul ecx, [esi - 60]
+ add ebp, ecx
+ mov ecx, [eax + 52]
+ imul ecx, [esi - 56]
+ add ebp, ecx
+ mov ecx, [eax + 48]
+ imul ecx, [esi - 52]
+ add ebp, ecx
+ mov ecx, [eax + 44]
+ imul ecx, [esi - 48]
+ add ebp, ecx
+ mov ecx, [eax + 40]
+ imul ecx, [esi - 44]
+ add ebp, ecx
+ mov ecx, [eax + 36]
+ imul ecx, [esi - 40]
+ add ebp, ecx
+ mov ecx, [eax + 32]
+ imul ecx, [esi - 36]
+ add ebp, ecx
+ mov ecx, [eax + 28]
+ imul ecx, [esi - 32]
+ add ebp, ecx
+ mov ecx, [eax + 24]
+ imul ecx, [esi - 28]
+ add ebp, ecx
+ mov ecx, [eax + 20]
+ imul ecx, [esi - 24]
+ add ebp, ecx
+ mov ecx, [eax + 16]
+ imul ecx, [esi - 20]
+ add ebp, ecx
+ mov ecx, [eax + 12]
+ imul ecx, [esi - 16]
+ add ebp, ecx
+ mov ecx, [eax + 8]
+ imul ecx, [esi - 12]
+ add ebp, ecx
+ mov ecx, [eax + 4]
+ imul ecx, [esi - 8]
+ add ebp, ecx
+ mov ecx, [eax] ; there is one byte missing
+ imul ecx, [esi - 4]
+ add ebp, ecx
+.jumper_0:
+
+ mov ecx, [esp + 36]
+ sar ebp, cl
+ neg ebp
+ add ebp, [esi]
+ mov [edi + esi], ebp
+ add esi, byte 4
+
+ dec ebx
+ jz short .end
+ xor ebp, ebp
+ jmp edx
+
+.end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+
+; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
+; the channel and qlp_coeffs must be <= 16. Especially note that this routine
+; cannot be used for side-channel coded 16bps channels since the effective bps
+; is 17.
+ ALIGN 16
+cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
+ ;[esp + 40] residual[]
+ ;[esp + 36] lp_quantization
+ ;[esp + 32] order
+ ;[esp + 28] qlp_coeff[]
+ ;[esp + 24] data_len
+ ;[esp + 20] data[]
+
+ ;ASSERT(order > 0)
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov esi, [esp + 20] ; esi = data[]
+ mov edi, [esp + 40] ; edi = residual[]
+ mov eax, [esp + 32] ; eax = order
+ mov ebx, [esp + 24] ; ebx = data_len
+
+ test ebx, ebx
+ jz near .end ; do nothing if data_len == 0
+ dec ebx
+ test ebx, ebx
+ jz near .last_one
+
+ mov edx, [esp + 28] ; edx = qlp_coeff[]
+ movd mm6, [esp + 36] ; mm6 = 0:lp_quantization
+ mov ebp, esp
+
+ and esp, 0xfffffff8
+
+ xor ecx, ecx
+.copy_qlp_loop:
+ push word [edx + 4 * ecx]
+ inc ecx
+ cmp ecx, eax
+ jnz short .copy_qlp_loop
+
+ and ecx, 0x3
+ test ecx, ecx
+ je short .za_end
+ sub ecx, byte 4
+.za_loop:
+ push word 0
+ inc eax
+ inc ecx
+ jnz short .za_loop
+.za_end:
+
+ movq mm5, [esp + 2 * eax - 8]
+ movd mm4, [esi - 16]
+ punpckldq mm4, [esi - 12]
+ movd mm0, [esi - 8]
+ punpckldq mm0, [esi - 4]
+ packssdw mm4, mm0
+
+ cmp eax, byte 4
+ jnbe short .mmx_4more
+
+ ALIGN 16
+.mmx_4_loop_i:
+ movd mm1, [esi]
+ movq mm3, mm4
+ punpckldq mm1, [esi + 4]
+ psrlq mm4, 16
+ movq mm0, mm1
+ psllq mm0, 48
+ por mm4, mm0
+ movq mm2, mm4
+ psrlq mm4, 16
+ pxor mm0, mm0
+ punpckhdq mm0, mm1
+ pmaddwd mm3, mm5
+ pmaddwd mm2, mm5
+ psllq mm0, 16
+ por mm4, mm0
+ movq mm0, mm3
+ punpckldq mm3, mm2
+ punpckhdq mm0, mm2
+ paddd mm3, mm0
+ psrad mm3, mm6
+ psubd mm1, mm3
+ movd [edi], mm1
+ punpckhdq mm1, mm1
+ movd [edi + 4], mm1
+
+ add edi, byte 8
+ add esi, byte 8
+
+ sub ebx, 2
+ jg .mmx_4_loop_i
+ jmp .mmx_end
+
+.mmx_4more:
+ shl eax, 2
+ neg eax
+ add eax, byte 16
+
+ ALIGN 16
+.mmx_4more_loop_i:
+ movd mm1, [esi]
+ punpckldq mm1, [esi + 4]
+ movq mm3, mm4
+ psrlq mm4, 16
+ movq mm0, mm1
+ psllq mm0, 48
+ por mm4, mm0
+ movq mm2, mm4
+ psrlq mm4, 16
+ pxor mm0, mm0
+ punpckhdq mm0, mm1
+ pmaddwd mm3, mm5
+ pmaddwd mm2, mm5
+ psllq mm0, 16
+ por mm4, mm0
+
+ mov ecx, esi
+ add ecx, eax
+ mov edx, esp
+
+ ALIGN 16
+.mmx_4more_loop_j:
+ movd mm0, [ecx - 16]
+ movd mm7, [ecx - 8]
+ punpckldq mm0, [ecx - 12]
+ punpckldq mm7, [ecx - 4]
+ packssdw mm0, mm7
+ pmaddwd mm0, [edx]
+ punpckhdq mm7, mm7
+ paddd mm3, mm0
+ movd mm0, [ecx - 12]
+ punpckldq mm0, [ecx - 8]
+ punpckldq mm7, [ecx]
+ packssdw mm0, mm7
+ pmaddwd mm0, [edx]
+ paddd mm2, mm0
+
+ add edx, byte 8
+ add ecx, byte 16
+ cmp ecx, esi
+ jnz .mmx_4more_loop_j
+
+ movq mm0, mm3
+ punpckldq mm3, mm2
+ punpckhdq mm0, mm2
+ paddd mm3, mm0
+ psrad mm3, mm6
+ psubd mm1, mm3
+ movd [edi], mm1
+ punpckhdq mm1, mm1
+ movd [edi + 4], mm1
+
+ add edi, byte 8
+ add esi, byte 8
+
+ sub ebx, 2
+ jg near .mmx_4more_loop_i
+
+.mmx_end:
+ emms
+ mov esp, ebp
+.last_one:
+ mov eax, [esp + 32]
+ inc ebx
+ jnz near FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32.begin
+
+.end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+
+; **********************************************************************
+;
+; void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+; {
+; unsigned i, j;
+; FLAC__int32 sum;
+;
+; FLAC__ASSERT(order > 0);
+;
+; for(i = 0; i < data_len; i++) {
+; sum = 0;
+; for(j = 0; j < order; j++)
+; sum += qlp_coeff[j] * data[i-j-1];
+; data[i] = residual[i] + (sum >> lp_quantization);
+; }
+; }
+ ALIGN 16
+cident FLAC__lpc_restore_signal_asm_ia32
+ ;[esp + 40] data[]
+ ;[esp + 36] lp_quantization
+ ;[esp + 32] order
+ ;[esp + 28] qlp_coeff[]
+ ;[esp + 24] data_len
+ ;[esp + 20] residual[]
+
+ ;ASSERT(order > 0)
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov esi, [esp + 20] ; esi = residual[]
+ mov edi, [esp + 40] ; edi = data[]
+ mov eax, [esp + 32] ; eax = order
+ mov ebx, [esp + 24] ; ebx = data_len
+
+ test ebx, ebx
+ jz near .end ; do nothing if data_len == 0
+
+.begin:
+ cmp eax, byte 1
+ jg short .x87_1more
+
+ mov ecx, [esp + 28]
+ mov edx, [ecx]
+ mov eax, [edi - 4]
+ mov ecx, [esp + 36]
+ ALIGN 16
+.x87_1_loop_i:
+ imul eax, edx
+ sar eax, cl
+ add eax, [esi]
+ mov [edi], eax
+ add esi, byte 4
+ add edi, byte 4
+ dec ebx
+ jnz .x87_1_loop_i
+
+ jmp .end
+
+.x87_1more:
+ cmp eax, byte 32 ; for order <= 32 there is a faster routine
+ jbe short .x87_32
+
+ ; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
+ ALIGN 16
+.x87_32more_loop_i:
+ xor ebp, ebp
+ mov ecx, [esp + 32]
+ mov edx, ecx
+ shl edx, 2
+ add edx, [esp + 28]
+ neg ecx
+ ALIGN 16
+.x87_32more_loop_j:
+ sub edx, byte 4
+ mov eax, [edx]
+ imul eax, [edi + 4 * ecx]
+ add ebp, eax
+ inc ecx
+ jnz short .x87_32more_loop_j
+
+ mov ecx, [esp + 36]
+ sar ebp, cl
+ add ebp, [esi]
+ mov [edi], ebp
+ add edi, byte 4
+ add esi, byte 4
+
+ dec ebx
+ jnz .x87_32more_loop_i
+
+ jmp .end
+
+.mov_eip_to_eax:
+ mov eax, [esp]
+ ret
+
+.x87_32:
+ sub esi, edi
+ neg eax
+ lea edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
+ call .mov_eip_to_eax
+.get_eip0:
+ add edx, eax
+ inc edx ; compensate for the shorter opcode on the last iteration
+ mov eax, [esp + 28] ; eax = qlp_coeff[]
+ xor ebp, ebp
+ jmp edx
+
+ mov ecx, [eax + 124] ; ecx = qlp_coeff[31]
+ imul ecx, [edi - 128] ; ecx = qlp_coeff[31] * data[i-32]
+ add ebp, ecx ; sum += qlp_coeff[31] * data[i-32]
+ mov ecx, [eax + 120] ; ecx = qlp_coeff[30]
+ imul ecx, [edi - 124] ; ecx = qlp_coeff[30] * data[i-31]
+ add ebp, ecx ; sum += qlp_coeff[30] * data[i-31]
+ mov ecx, [eax + 116] ; ecx = qlp_coeff[29]
+ imul ecx, [edi - 120] ; ecx = qlp_coeff[29] * data[i-30]
+ add ebp, ecx ; sum += qlp_coeff[29] * data[i-30]
+ mov ecx, [eax + 112] ; ecx = qlp_coeff[28]
+ imul ecx, [edi - 116] ; ecx = qlp_coeff[28] * data[i-29]
+ add ebp, ecx ; sum += qlp_coeff[28] * data[i-29]
+ mov ecx, [eax + 108] ; ecx = qlp_coeff[27]
+ imul ecx, [edi - 112] ; ecx = qlp_coeff[27] * data[i-28]
+ add ebp, ecx ; sum += qlp_coeff[27] * data[i-28]
+ mov ecx, [eax + 104] ; ecx = qlp_coeff[26]
+ imul ecx, [edi - 108] ; ecx = qlp_coeff[26] * data[i-27]
+ add ebp, ecx ; sum += qlp_coeff[26] * data[i-27]
+ mov ecx, [eax + 100] ; ecx = qlp_coeff[25]
+ imul ecx, [edi - 104] ; ecx = qlp_coeff[25] * data[i-26]
+ add ebp, ecx ; sum += qlp_coeff[25] * data[i-26]
+ mov ecx, [eax + 96] ; ecx = qlp_coeff[24]
+ imul ecx, [edi - 100] ; ecx = qlp_coeff[24] * data[i-25]
+ add ebp, ecx ; sum += qlp_coeff[24] * data[i-25]
+ mov ecx, [eax + 92] ; ecx = qlp_coeff[23]
+ imul ecx, [edi - 96] ; ecx = qlp_coeff[23] * data[i-24]
+ add ebp, ecx ; sum += qlp_coeff[23] * data[i-24]
+ mov ecx, [eax + 88] ; ecx = qlp_coeff[22]
+ imul ecx, [edi - 92] ; ecx = qlp_coeff[22] * data[i-23]
+ add ebp, ecx ; sum += qlp_coeff[22] * data[i-23]
+ mov ecx, [eax + 84] ; ecx = qlp_coeff[21]
+ imul ecx, [edi - 88] ; ecx = qlp_coeff[21] * data[i-22]
+ add ebp, ecx ; sum += qlp_coeff[21] * data[i-22]
+ mov ecx, [eax + 80] ; ecx = qlp_coeff[20]
+ imul ecx, [edi - 84] ; ecx = qlp_coeff[20] * data[i-21]
+ add ebp, ecx ; sum += qlp_coeff[20] * data[i-21]
+ mov ecx, [eax + 76] ; ecx = qlp_coeff[19]
+ imul ecx, [edi - 80] ; ecx = qlp_coeff[19] * data[i-20]
+ add ebp, ecx ; sum += qlp_coeff[19] * data[i-20]
+ mov ecx, [eax + 72] ; ecx = qlp_coeff[18]
+ imul ecx, [edi - 76] ; ecx = qlp_coeff[18] * data[i-19]
+ add ebp, ecx ; sum += qlp_coeff[18] * data[i-19]
+ mov ecx, [eax + 68] ; ecx = qlp_coeff[17]
+ imul ecx, [edi - 72] ; ecx = qlp_coeff[17] * data[i-18]
+ add ebp, ecx ; sum += qlp_coeff[17] * data[i-18]
+ mov ecx, [eax + 64] ; ecx = qlp_coeff[16]
+ imul ecx, [edi - 68] ; ecx = qlp_coeff[16] * data[i-17]
+ add ebp, ecx ; sum += qlp_coeff[16] * data[i-17]
+ mov ecx, [eax + 60] ; ecx = qlp_coeff[15]
+ imul ecx, [edi - 64] ; ecx = qlp_coeff[15] * data[i-16]
+ add ebp, ecx ; sum += qlp_coeff[15] * data[i-16]
+ mov ecx, [eax + 56] ; ecx = qlp_coeff[14]
+ imul ecx, [edi - 60] ; ecx = qlp_coeff[14] * data[i-15]
+ add ebp, ecx ; sum += qlp_coeff[14] * data[i-15]
+ mov ecx, [eax + 52] ; ecx = qlp_coeff[13]
+ imul ecx, [edi - 56] ; ecx = qlp_coeff[13] * data[i-14]
+ add ebp, ecx ; sum += qlp_coeff[13] * data[i-14]
+ mov ecx, [eax + 48] ; ecx = qlp_coeff[12]
+ imul ecx, [edi - 52] ; ecx = qlp_coeff[12] * data[i-13]
+ add ebp, ecx ; sum += qlp_coeff[12] * data[i-13]
+ mov ecx, [eax + 44] ; ecx = qlp_coeff[11]
+ imul ecx, [edi - 48] ; ecx = qlp_coeff[11] * data[i-12]
+ add ebp, ecx ; sum += qlp_coeff[11] * data[i-12]
+ mov ecx, [eax + 40] ; ecx = qlp_coeff[10]
+ imul ecx, [edi - 44] ; ecx = qlp_coeff[10] * data[i-11]
+ add ebp, ecx ; sum += qlp_coeff[10] * data[i-11]
+ mov ecx, [eax + 36] ; ecx = qlp_coeff[ 9]
+ imul ecx, [edi - 40] ; ecx = qlp_coeff[ 9] * data[i-10]
+ add ebp, ecx ; sum += qlp_coeff[ 9] * data[i-10]
+ mov ecx, [eax + 32] ; ecx = qlp_coeff[ 8]
+ imul ecx, [edi - 36] ; ecx = qlp_coeff[ 8] * data[i- 9]
+ add ebp, ecx ; sum += qlp_coeff[ 8] * data[i- 9]
+ mov ecx, [eax + 28] ; ecx = qlp_coeff[ 7]
+ imul ecx, [edi - 32] ; ecx = qlp_coeff[ 7] * data[i- 8]
+ add ebp, ecx ; sum += qlp_coeff[ 7] * data[i- 8]
+ mov ecx, [eax + 24] ; ecx = qlp_coeff[ 6]
+ imul ecx, [edi - 28] ; ecx = qlp_coeff[ 6] * data[i- 7]
+ add ebp, ecx ; sum += qlp_coeff[ 6] * data[i- 7]
+ mov ecx, [eax + 20] ; ecx = qlp_coeff[ 5]
+ imul ecx, [edi - 24] ; ecx = qlp_coeff[ 5] * data[i- 6]
+ add ebp, ecx ; sum += qlp_coeff[ 5] * data[i- 6]
+ mov ecx, [eax + 16] ; ecx = qlp_coeff[ 4]
+ imul ecx, [edi - 20] ; ecx = qlp_coeff[ 4] * data[i- 5]
+ add ebp, ecx ; sum += qlp_coeff[ 4] * data[i- 5]
+ mov ecx, [eax + 12] ; ecx = qlp_coeff[ 3]
+ imul ecx, [edi - 16] ; ecx = qlp_coeff[ 3] * data[i- 4]
+ add ebp, ecx ; sum += qlp_coeff[ 3] * data[i- 4]
+ mov ecx, [eax + 8] ; ecx = qlp_coeff[ 2]
+ imul ecx, [edi - 12] ; ecx = qlp_coeff[ 2] * data[i- 3]
+ add ebp, ecx ; sum += qlp_coeff[ 2] * data[i- 3]
+ mov ecx, [eax + 4] ; ecx = qlp_coeff[ 1]
+ imul ecx, [edi - 8] ; ecx = qlp_coeff[ 1] * data[i- 2]
+ add ebp, ecx ; sum += qlp_coeff[ 1] * data[i- 2]
+ mov ecx, [eax] ; ecx = qlp_coeff[ 0] (NOTE: one byte missing from instruction)
+ imul ecx, [edi - 4] ; ecx = qlp_coeff[ 0] * data[i- 1]
+ add ebp, ecx ; sum += qlp_coeff[ 0] * data[i- 1]
+.jumper_0:
+
+ mov ecx, [esp + 36]
+ sar ebp, cl ; ebp = (sum >> lp_quantization)
+ add ebp, [esi + edi] ; ebp = residual[i] + (sum >> lp_quantization)
+ mov [edi], ebp ; data[i] = residual[i] + (sum >> lp_quantization)
+ add edi, byte 4
+
+ dec ebx
+ jz short .end
+ xor ebp, ebp
+ jmp edx
+
+.end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+
+; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
+; the channel and qlp_coeffs must be <= 16. Especially note that this routine
+; cannot be used for side-channel coded 16bps channels since the effective bps
+; is 17.
+; WATCHOUT: this routine requires that each data array have a buffer of up to
+; 3 zeroes in front (at negative indices) for alignment purposes, i.e. for each
+; channel n, data[n][-1] through data[n][-3] should be accessible and zero.
+ ALIGN 16
+cident FLAC__lpc_restore_signal_asm_ia32_mmx
+ ;[esp + 40] data[]
+ ;[esp + 36] lp_quantization
+ ;[esp + 32] order
+ ;[esp + 28] qlp_coeff[]
+ ;[esp + 24] data_len
+ ;[esp + 20] residual[]
+
+ ;ASSERT(order > 0)
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov esi, [esp + 20]
+ mov edi, [esp + 40]
+ mov eax, [esp + 32]
+ mov ebx, [esp + 24]
+
+ test ebx, ebx
+ jz near .end ; do nothing if data_len == 0
+ cmp eax, byte 4
+ jb near FLAC__lpc_restore_signal_asm_ia32.begin
+
+ mov edx, [esp + 28]
+ movd mm6, [esp + 36]
+ mov ebp, esp
+
+ and esp, 0xfffffff8
+
+ xor ecx, ecx
+.copy_qlp_loop:
+ push word [edx + 4 * ecx]
+ inc ecx
+ cmp ecx, eax
+ jnz short .copy_qlp_loop
+
+ and ecx, 0x3
+ test ecx, ecx
+ je short .za_end
+ sub ecx, byte 4
+.za_loop:
+ push word 0
+ inc eax
+ inc ecx
+ jnz short .za_loop
+.za_end:
+
+ movq mm5, [esp + 2 * eax - 8]
+ movd mm4, [edi - 16]
+ punpckldq mm4, [edi - 12]
+ movd mm0, [edi - 8]
+ punpckldq mm0, [edi - 4]
+ packssdw mm4, mm0
+
+ cmp eax, byte 4
+ jnbe short .mmx_4more
+
+ ALIGN 16
+.mmx_4_loop_i:
+ movq mm7, mm4
+ pmaddwd mm7, mm5
+ movq mm0, mm7
+ punpckhdq mm7, mm7
+ paddd mm7, mm0
+ psrad mm7, mm6
+ movd mm1, [esi]
+ paddd mm7, mm1
+ movd [edi], mm7
+ psllq mm7, 48
+ psrlq mm4, 16
+ por mm4, mm7
+
+ add esi, byte 4
+ add edi, byte 4
+
+ dec ebx
+ jnz .mmx_4_loop_i
+ jmp .mmx_end
+.mmx_4more:
+ shl eax, 2
+ neg eax
+ add eax, byte 16
+ ALIGN 16
+.mmx_4more_loop_i:
+ mov ecx, edi
+ add ecx, eax
+ mov edx, esp
+
+ movq mm7, mm4
+ pmaddwd mm7, mm5
+
+ ALIGN 16
+.mmx_4more_loop_j:
+ movd mm0, [ecx - 16]
+ punpckldq mm0, [ecx - 12]
+ movd mm1, [ecx - 8]
+ punpckldq mm1, [ecx - 4]
+ packssdw mm0, mm1
+ pmaddwd mm0, [edx]
+ paddd mm7, mm0
+
+ add edx, byte 8
+ add ecx, byte 16
+ cmp ecx, edi
+ jnz .mmx_4more_loop_j
+
+ movq mm0, mm7
+ punpckhdq mm7, mm7
+ paddd mm7, mm0
+ psrad mm7, mm6
+ movd mm1, [esi]
+ paddd mm7, mm1
+ movd [edi], mm7
+ psllq mm7, 48
+ psrlq mm4, 16
+ por mm4, mm7
+
+ add esi, byte 4
+ add edi, byte 4
+
+ dec ebx
+ jnz short .mmx_4more_loop_i
+.mmx_end:
+ emms
+ mov esp, ebp
+
+.end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+
+
+; **********************************************************************
+;
+;void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+; {
+; unsigned i, j;
+; FLAC__int64 sum;
+;
+; FLAC__ASSERT(order > 0);
+;
+; for(i = 0; i < data_len; i++) {
+; sum = 0;
+; for(j = 0; j < order; j++)
+; sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1];
+; residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+; }
+; }
+ ALIGN 16
+cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
+ ;[esp + 40] residual[]
+ ;[esp + 36] lp_quantization
+ ;[esp + 32] order
+ ;[esp + 28] qlp_coeff[]
+ ;[esp + 24] data_len
+ ;[esp + 20] data[]
+
+ ;ASSERT(order > 0)
+ ;ASSERT(order <= 32)
+ ;ASSERT(lp_quantization <= 31)
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov ebx, [esp + 24] ; ebx = data_len
+ test ebx, ebx
+ jz near .end ; do nothing if data_len == 0
+
+.begin:
+ mov eax, [esp + 32] ; eax = order
+ cmp eax, 1
+ jg short .i_32
+
+ mov esi, [esp + 40] ; esi = residual[]
+ mov edi, [esp + 20] ; edi = data[]
+ mov ecx, [esp + 28] ; ecx = qlp_coeff[]
+ mov ebp, [ecx] ; ebp = qlp_coeff[0]
+ mov eax, [edi - 4] ; eax = data[-1]
+ mov ecx, [esp + 36] ; cl = lp_quantization
+ ALIGN 16
+.i_1_loop_i:
+ imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
+ shrd eax, edx, cl ; 0 <= lp_quantization <= 15
+ neg eax
+ add eax, [edi]
+ mov [esi], eax
+ mov eax, [edi]
+ add esi, 4
+ add edi, 4
+ dec ebx
+ jnz .i_1_loop_i
+ jmp .end
+
+.mov_eip_to_eax:
+ mov eax, [esp]
+ ret
+
+.i_32: ; eax = order
+ neg eax
+ add eax, eax
+ lea ebp, [eax + eax * 4 + .jumper_0 - .get_eip0]
+ call .mov_eip_to_eax
+.get_eip0:
+ add ebp, eax
+ inc ebp ; compensate for the shorter opcode on the last iteration
+
+ mov ebx, [esp + 28] ; ebx = qlp_coeff[]
+ mov edi, [esp + 20] ; edi = data[]
+ sub [esp + 40], edi ; residual[] -= data[]
+
+ xor ecx, ecx
+ xor esi, esi
+ jmp ebp
+
+;eax = --
+;edx = --
+;ecx = 0
+;esi = 0
+;
+;ebx = qlp_coeff[]
+;edi = data[]
+;ebp = @address
+
+ mov eax, [ebx + 124] ; eax = qlp_coeff[31]
+ imul dword [edi - 128] ; edx:eax = qlp_coeff[31] * data[i-32]
+ add ecx, eax
+ adc esi, edx ; sum += qlp_coeff[31] * data[i-32]
+
+ mov eax, [ebx + 120] ; eax = qlp_coeff[30]
+ imul dword [edi - 124] ; edx:eax = qlp_coeff[30] * data[i-31]
+ add ecx, eax
+ adc esi, edx ; sum += qlp_coeff[30] * data[i-31]
+
+ mov eax, [ebx + 116]
+ imul dword [edi - 120]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 112]
+ imul dword [edi - 116]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 108]
+ imul dword [edi - 112]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 104]
+ imul dword [edi - 108]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 100]
+ imul dword [edi - 104]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 96]
+ imul dword [edi - 100]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 92]
+ imul dword [edi - 96]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 88]
+ imul dword [edi - 92]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 84]
+ imul dword [edi - 88]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 80]
+ imul dword [edi - 84]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 76]
+ imul dword [edi - 80]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 72]
+ imul dword [edi - 76]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 68]
+ imul dword [edi - 72]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 64]
+ imul dword [edi - 68]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 60]
+ imul dword [edi - 64]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 56]
+ imul dword [edi - 60]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 52]
+ imul dword [edi - 56]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 48]
+ imul dword [edi - 52]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 44]
+ imul dword [edi - 48]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 40]
+ imul dword [edi - 44]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 36]
+ imul dword [edi - 40]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 32]
+ imul dword [edi - 36]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 28]
+ imul dword [edi - 32]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 24]
+ imul dword [edi - 28]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 20]
+ imul dword [edi - 24]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 16]
+ imul dword [edi - 20]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 12]
+ imul dword [edi - 16]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 8]
+ imul dword [edi - 12]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 4]
+ imul dword [edi - 8]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx] ; eax = qlp_coeff[ 0] (NOTE: one byte missing from instruction)
+ imul dword [edi - 4] ; edx:eax = qlp_coeff[ 0] * data[i- 1]
+ add ecx, eax
+ adc esi, edx ; sum += qlp_coeff[ 0] * data[i- 1]
+
+.jumper_0:
+ mov edx, ecx
+;esi:edx = sum
+ mov ecx, [esp + 36] ; cl = lp_quantization
+ shrd edx, esi, cl ; edx = (sum >> lp_quantization)
+;eax = --
+;ecx = --
+;edx = sum >> lp_q
+;esi = --
+ neg edx ; edx = -(sum >> lp_quantization)
+ mov eax, [esp + 40] ; residual[] - data[]
+ add edx, [edi] ; edx = data[i] - (sum >> lp_quantization)
+ mov [edi + eax], edx
+ add edi, 4
+
+ dec dword [esp + 24]
+ jz short .end
+ xor ecx, ecx
+ xor esi, esi
+ jmp ebp
+
+.end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+
+; **********************************************************************
+;
+; void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+; {
+; unsigned i, j;
+; FLAC__int64 sum;
+;
+; FLAC__ASSERT(order > 0);
+;
+; for(i = 0; i < data_len; i++) {
+; sum = 0;
+; for(j = 0; j < order; j++)
+; sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1];
+; data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+; }
+; }
+ ALIGN 16
+cident FLAC__lpc_restore_signal_wide_asm_ia32
+ ;[esp + 40] data[]
+ ;[esp + 36] lp_quantization
+ ;[esp + 32] order
+ ;[esp + 28] qlp_coeff[]
+ ;[esp + 24] data_len
+ ;[esp + 20] residual[]
+
+ ;ASSERT(order > 0)
+ ;ASSERT(order <= 32)
+ ;ASSERT(lp_quantization <= 31)
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov ebx, [esp + 24] ; ebx = data_len
+ test ebx, ebx
+ jz near .end ; do nothing if data_len == 0
+
+.begin:
+ mov eax, [esp + 32] ; eax = order
+ cmp eax, 1
+ jg short .x87_32
+
+ mov esi, [esp + 20] ; esi = residual[]
+ mov edi, [esp + 40] ; edi = data[]
+ mov ecx, [esp + 28] ; ecx = qlp_coeff[]
+ mov ebp, [ecx] ; ebp = qlp_coeff[0]
+ mov eax, [edi - 4] ; eax = data[-1]
+ mov ecx, [esp + 36] ; cl = lp_quantization
+ ALIGN 16
+.x87_1_loop_i:
+ imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
+ shrd eax, edx, cl ; 0 <= lp_quantization <= 15
+;
+ add eax, [esi]
+ mov [edi], eax
+;
+ add esi, 4
+ add edi, 4
+ dec ebx
+ jnz .x87_1_loop_i
+ jmp .end
+
+.mov_eip_to_eax:
+ mov eax, [esp]
+ ret
+
+.x87_32: ; eax = order
+ neg eax
+ add eax, eax
+ lea ebp, [eax + eax * 4 + .jumper_0 - .get_eip0]
+ call .mov_eip_to_eax
+.get_eip0:
+ add ebp, eax
+ inc ebp ; compensate for the shorter opcode on the last iteration
+
+ mov ebx, [esp + 28] ; ebx = qlp_coeff[]
+ mov edi, [esp + 40] ; esi = data[]
+ sub [esp + 20], edi ; residual[] -= data[]
+
+ xor ecx, ecx
+ xor esi, esi
+ jmp ebp
+
+;eax = --
+;edx = --
+;ecx = 0
+;esi = 0
+;
+;ebx = qlp_coeff[]
+;edi = data[]
+;ebp = @address
+
+ mov eax, [ebx + 124] ; eax = qlp_coeff[31]
+ imul dword [edi - 128] ; edx:eax = qlp_coeff[31] * data[i-32]
+ add ecx, eax
+ adc esi, edx ; sum += qlp_coeff[31] * data[i-32]
+
+ mov eax, [ebx + 120] ; eax = qlp_coeff[30]
+ imul dword [edi - 124] ; edx:eax = qlp_coeff[30] * data[i-31]
+ add ecx, eax
+ adc esi, edx ; sum += qlp_coeff[30] * data[i-31]
+
+ mov eax, [ebx + 116]
+ imul dword [edi - 120]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 112]
+ imul dword [edi - 116]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 108]
+ imul dword [edi - 112]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 104]
+ imul dword [edi - 108]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 100]
+ imul dword [edi - 104]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 96]
+ imul dword [edi - 100]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 92]
+ imul dword [edi - 96]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 88]
+ imul dword [edi - 92]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 84]
+ imul dword [edi - 88]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 80]
+ imul dword [edi - 84]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 76]
+ imul dword [edi - 80]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 72]
+ imul dword [edi - 76]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 68]
+ imul dword [edi - 72]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 64]
+ imul dword [edi - 68]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 60]
+ imul dword [edi - 64]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 56]
+ imul dword [edi - 60]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 52]
+ imul dword [edi - 56]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 48]
+ imul dword [edi - 52]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 44]
+ imul dword [edi - 48]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 40]
+ imul dword [edi - 44]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 36]
+ imul dword [edi - 40]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 32]
+ imul dword [edi - 36]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 28]
+ imul dword [edi - 32]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 24]
+ imul dword [edi - 28]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 20]
+ imul dword [edi - 24]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 16]
+ imul dword [edi - 20]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 12]
+ imul dword [edi - 16]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 8]
+ imul dword [edi - 12]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx + 4]
+ imul dword [edi - 8]
+ add ecx, eax
+ adc esi, edx
+
+ mov eax, [ebx] ; eax = qlp_coeff[ 0] (NOTE: one byte missing from instruction)
+ imul dword [edi - 4] ; edx:eax = qlp_coeff[ 0] * data[i- 1]
+ add ecx, eax
+ adc esi, edx ; sum += qlp_coeff[ 0] * data[i- 1]
+
+.jumper_0:
+ mov edx, ecx
+;esi:edx = sum
+ mov ecx, [esp + 36] ; cl = lp_quantization
+ shrd edx, esi, cl ; edx = (sum >> lp_quantization)
+;eax = --
+;ecx = --
+;edx = sum >> lp_q
+;esi = --
+;
+ mov eax, [esp + 20] ; residual[] - data[]
+ add edx, [edi + eax] ; edx = residual[i] + (sum >> lp_quantization)
+ mov [edi], edx ; data[i] = residual[i] + (sum >> lp_quantization)
+ add edi, 4
+
+ dec dword [esp + 24]
+ jz short .end
+ xor ecx, ecx
+ xor esi, esi
+ jmp ebp
+
+.end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+
+; end
diff --git a/deps/flac-1.3.2/src/libFLAC/ia32/nasm.h b/deps/flac-1.3.2/src/libFLAC/ia32/nasm.h
new file mode 100644
index 0000000..ff479bf
--- /dev/null
+++ b/deps/flac-1.3.2/src/libFLAC/ia32/nasm.h
@@ -0,0 +1,90 @@
+; libFLAC - Free Lossless Audio Codec library
+; Copyright (C) 2001-2009 Josh Coalson
+; Copyright (C) 2011-2016 Xiph.Org Foundation
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; - Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;
+; - Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in the
+; documentation and/or other materials provided with the distribution.
+;
+; - Neither the name of the Xiph.org Foundation nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ bits 32
+
+%ifdef OBJ_FORMAT_win32
+ %define FLAC__PUBLIC_NEEDS_UNDERSCORE
+ %idefine code_section section .text align=16 class=CODE use32
+ %idefine data_section section .data align=32 class=DATA use32
+ %idefine bss_section section .bss align=32 class=DATA use32
+%elifdef OBJ_FORMAT_aout
+ %define FLAC__PUBLIC_NEEDS_UNDERSCORE
+ %idefine code_section section .text
+ %idefine data_section section .data
+ %idefine bss_section section .bss
+%elifdef OBJ_FORMAT_aoutb
+ %define FLAC__PUBLIC_NEEDS_UNDERSCORE
+ %idefine code_section section .text
+ %idefine data_section section .data
+ %idefine bss_section section .bss
+%elifdef OBJ_FORMAT_macho
+ %define FLAC__PUBLIC_NEEDS_UNDERSCORE
+ %idefine code_section section .text
+ %idefine data_section section .data
+ %idefine bss_section section .bss
+%elifdef OBJ_FORMAT_elf
+ %idefine code_section section .text align=16
+ %idefine data_section section .data align=32
+ %idefine bss_section section .bss align=32
+%else
+ %error unsupported object format! ; this directive doesn't really work here
+%endif
+
+%imacro cglobal 1
+ %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
+ global _%1
+ %else
+ %if __NASM_MAJOR__ >= 2
+ global %1:function hidden
+ %else
+ global %1
+ %endif
+ %endif
+%endmacro
+
+%imacro cextern 1
+ %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
+ extern _%1
+ %else
+ extern %1
+ %endif
+%endmacro
+
+%imacro cident 1
+_%1:
+%1:
+%endmacro
+
+%ifdef OBJ_FORMAT_elf
+section .note.GNU-stack progbits noalloc noexec nowrite align=1
+%endif
+