1 files changed, 510 insertions, 0 deletions
diff --git a/lib/rbcodec/codecs/libmad/fixed.h b/lib/rbcodec/codecs/libmad/fixed.h
new file mode 100644
index 0000000000..6015684644
--- /dev/null
+++ b/lib/rbcodec/codecs/libmad/fixed.h
@@ -0,0 +1,510 @@
+/*
+ * libmad - MPEG audio decoder library
+ * Copyright (C) 2000-2004 Underbit Technologies, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * $Id$
+ */
+# ifndef LIBMAD_FIXED_H
+# define LIBMAD_FIXED_H
+#include <inttypes.h>
+typedef   int32_t mad_fixed_t;
+typedef   int32_t mad_fixed64hi_t;
+typedef  uint32_t mad_fixed64lo_t;
+# if defined(_MSC_VER)
+#  define mad_fixed64_t  signed __int64
+# elif 1 || defined(__GNUC__)
+#  define mad_fixed64_t  signed long long
+# endif
+# if defined(FPM_FLOAT)
+typedef double mad_sample_t;
+# else
+typedef mad_fixed_t mad_sample_t;
+# endif
+/*
+ * Fixed-point format: 0xABBBBBBB
+ * A == whole part      (sign + 3 bits)
+ * B == fractional part (28 bits)
+ *
+ * Values are signed two's complement, so the effective range is:
+ * 0x80000000 to 0x7fffffff
+ *       -8.0 to +7.9999999962747097015380859375
+ *
+ * The smallest representable value is:
+ * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
+ *
+ * 28 bits of fractional accuracy represent about
+ * 8.6 digits of decimal accuracy.
+ *
+ * Fixed-point numbers can be added or subtracted as normal
+ * integers, but multiplication requires shifting the 64-bit result
+ * from 56 fractional bits back to 28 (and rounding.)
+ *
+ * Changing the definition of MAD_F_FRACBITS is only partially
+ * supported, and must be done with care.
+ */
+# define MAD_F_FRACBITS         28
+# if MAD_F_FRACBITS == 28
+#  define MAD_F(x)              ((mad_fixed_t) (x##L))
+# else
+#  if MAD_F_FRACBITS < 28
+#   warning "MAD_F_FRACBITS < 28"
+#   define MAD_F(x)             ((mad_fixed_t)  \
+                                 (((x##L) +  \
+                                   (1L << (28 - MAD_F_FRACBITS - 1))) >>  \
+                                  (28 - MAD_F_FRACBITS)))
+#  elif MAD_F_FRACBITS > 28
+#   error "MAD_F_FRACBITS > 28 not currently supported"
+#   define MAD_F(x)             ((mad_fixed_t)  \
+                                 ((x##L) << (MAD_F_FRACBITS - 28)))
+#  endif
+# endif
+# define MAD_F_MIN              ((mad_fixed_t) -0x80000000L)
+# define MAD_F_MAX              ((mad_fixed_t) +0x7fffffffL)
+# define MAD_F_ONE              MAD_F(0x10000000)
+# define mad_f_tofixed(x)       ((mad_fixed_t)  \
+                                 ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
+# define mad_f_todouble(x)      ((double)  \
+                                 ((x) / (double) (1L << MAD_F_FRACBITS)))
+# define mad_f_intpart(x)       ((x) >> MAD_F_FRACBITS)
+# define mad_f_fracpart(x)      ((x) & ((1L << MAD_F_FRACBITS) - 1))
+                                /* (x should be positive) */
+# define mad_f_fromint(x)       ((x) << MAD_F_FRACBITS)
+# define mad_f_add(x, y)        ((x) + (y))
+# define mad_f_sub(x, y)        ((x) - (y))
+# if defined(FPM_FLOAT)
+#  error "FPM_FLOAT not yet supported"
+#  undef MAD_F
+#  define MAD_F(x)              mad_f_todouble(x)
+#  define mad_f_mul(x, y)       ((x) * (y))
+#  define mad_f_scale64
+# elif defined(FPM_64BIT)
+/*
+ * This version should be the most accurate if 64-bit types are supported by
+ * the compiler, although it may not be the most efficient.
+ */
+#  if defined(OPT_ACCURACY)
+#   define mad_f_mul(x, y)  \
+    ((mad_fixed_t)  \
+     ((((mad_fixed64_t) (x) * (y)) +  \
+       (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
+#  else
+#   define mad_f_mul(x, y)  \
+    ((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS))
+#  endif
+#  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+/* --- Intel --------------------------------------------------------------- */
+# elif defined(FPM_INTEL)
+#  if defined(_MSC_VER)
+#   pragma warning(push)
+#   pragma warning(disable: 4035)  /* no return value */
+static __forceinline
+mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
+{
+  enum {
+    fracbits = MAD_F_FRACBITS
+  };
+  __asm {
+    mov eax, x
+    imul y
+    shrd eax, edx, fracbits
+  }
+  /* implicit return of eax */
+}
+#   pragma warning(pop)
+#   define mad_f_mul            mad_f_mul_inline
+#   define mad_f_scale64
+#  else
+/*
+ * This Intel version is fast and accurate; the disposition of the least
+ * significant bit depends on OPT_ACCURACY via mad_f_scale64().
+ */
+#   define MAD_F_MLX(hi, lo, x, y)  \
+    asm ("imull %3"  \
+         : "=a" (lo), "=d" (hi)  \
+         : "%a" (x), "rm" (y)  \
+         : "cc")
+#   if defined(OPT_ACCURACY)
+/*
+ * This gives best accuracy but is not very fast.
+ */
+#    define MAD_F_MLA(hi, lo, x, y)  \
+    ({ mad_fixed64hi_t __hi;  \
+       mad_fixed64lo_t __lo;  \
+       MAD_F_MLX(__hi, __lo, (x), (y));  \
+       asm ("addl %2,%0\n\t"  \
+            "adcl %3,%1"  \
+            : "=rm" (lo), "=rm" (hi)  \
+            : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi)  \
+            : "cc");  \
+    })
+#   endif  /* OPT_ACCURACY */
+#   if defined(OPT_ACCURACY)
+/*
+ * Surprisingly, this is faster than SHRD followed by ADC.
+ */
+#    define mad_f_scale64(hi, lo)  \
+    ({ mad_fixed64hi_t __hi_;  \
+       mad_fixed64lo_t __lo_;  \
+       mad_fixed_t __result;  \
+       asm ("addl %4,%2\n\t"  \
+            "adcl %5,%3"  \
+            : "=rm" (__lo_), "=rm" (__hi_)  \
+            : "0" (lo), "1" (hi),  \
+              "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0)  \
+            : "cc");  \
+       asm ("shrdl %3,%2,%1"  \
+            : "=rm" (__result)  \
+            : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS)  \
+            : "cc");  \
+       __result;  \
+    })
+#   elif defined(OPT_INTEL)
+/*
+ * Alternate Intel scaling that may or may not perform better.
+ */
+#    define mad_f_scale64(hi, lo)  \
+    ({ mad_fixed_t __result;  \
+       asm ("shrl %3,%1\n\t"  \
+            "shll %4,%2\n\t"  \
+            "orl %2,%1"  \
+            : "=rm" (__result)  \
+            : "0" (lo), "r" (hi),  \
+              "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS)  \
+            : "cc");  \
+       __result;  \
+    })
+#   else
+#    define mad_f_scale64(hi, lo)  \
+    ({ mad_fixed_t __result;  \
+       asm ("shrdl %3,%2,%1"  \
+            : "=rm" (__result)  \
+            : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS)  \
+            : "cc");  \
+       __result;  \
+    })
+#   endif  /* OPT_ACCURACY */
+#   define MAD_F_SCALEBITS  MAD_F_FRACBITS
+#  endif
+/* --- ARM ----------------------------------------------------------------- */
+# elif defined(FPM_ARM)
+/* 
+ * This ARM V4 version is as accurate as FPM_64BIT but much faster. The
+ * least significant bit is properly rounded at no CPU cycle cost!
+ */
+# if 1
+/*
+ * This is faster than the default implementation via MAD_F_MLX() and
+ * mad_f_scale64().
+ */
+#  define mad_f_mul(x, y)  \
+    ({ mad_fixed64hi_t __hi;  \
+       mad_fixed64lo_t __lo;  \
+       mad_fixed_t __result;  \
+       asm ("smull      %0, %1, %3, %4\n\t"  \
+            "movs       %0, %0, lsr %5\n\t"  \
+            "adc        %2, %0, %1, lsl %6"  \
+            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
+            : "%r" (x), "r" (y),  \
+              "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
+            : "cc");  \
+       __result;  \
+    })
+# endif
+#  define MAD_F_MLX(hi, lo, x, y)  \
+    asm ("smull %0, %1, %2, %3"  \
+         : "=&r" (lo), "=&r" (hi)  \
+         : "%r" (x), "r" (y))
+#  define MAD_F_MLA(hi, lo, x, y)  \
+    asm ("smlal %0, %1, %2, %3"  \
+         : "+r" (lo), "+r" (hi)  \
+         : "%r" (x), "r" (y))
+#  define MAD_F_MLN(hi, lo)  \
+    asm ("rsbs  %0, %2, #0\n\t"  \
+         "rsc   %1, %3, #0"  \
+         : "=r" (lo), "=r" (hi)  \
+         : "0" (lo), "1" (hi)  \
+         : "cc")
+#  define mad_f_scale64(hi, lo)  \
+    ({ mad_fixed_t __result;  \
+       asm ("movs       %0, %1, lsr %3\n\t"  \
+            "adc        %0, %0, %2, lsl %4"  \
+            : "=&r" (__result)  \
+            : "r" (lo), "r" (hi),  \
+              "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS)  \
+            : "cc");  \
+       __result;  \
+    })
+#  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+/* --- MIPS ---------------------------------------------------------------- */
+# elif defined(FPM_MIPS)
+/*
+ * This MIPS version is fast and accurate; the disposition of the least
+ * significant bit depends on OPT_ACCURACY via mad_f_scale64().
+ */
+#  define MAD_F_MLX(hi, lo, x, y)  \
+    asm ("mult  %2,%3"  \
+         : "=l" (lo), "=h" (hi)  \
+         : "%r" (x), "r" (y))
+# if defined(HAVE_MADD_ASM)
+#  define MAD_F_MLA(hi, lo, x, y)  \
+    asm ("madd  %2,%3"  \
+         : "+l" (lo), "+h" (hi)  \
+         : "%r" (x), "r" (y))
+# elif defined(HAVE_MADD16_ASM)
+/*
+ * This loses significant accuracy due to the 16-bit integer limit in the
+ * multiply/accumulate instruction.
+ */
+#  define MAD_F_ML0(hi, lo, x, y)  \
+    asm ("mult  %2,%3"  \
+         : "=l" (lo), "=h" (hi)  \
+         : "%r" ((x) >> 12), "r" ((y) >> 16))
+#  define MAD_F_MLA(hi, lo, x, y)  \
+    asm ("madd16        %2,%3"  \
+         : "+l" (lo), "+h" (hi)  \
+         : "%r" ((x) >> 12), "r" ((y) >> 16))
+#  define MAD_F_MLZ(hi, lo)  ((mad_fixed_t) (lo))
+# endif
+# if defined(OPT_SPEED)
+#  define mad_f_scale64(hi, lo)  \
+    ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
+#  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+# endif
+/* --- SPARC --------------------------------------------------------------- */
+# elif defined(FPM_SPARC)
+/*
+ * This SPARC V8 version is fast and accurate; the disposition of the least
+ * significant bit depends on OPT_ACCURACY via mad_f_scale64().
+ */
+#  define MAD_F_MLX(hi, lo, x, y)  \
+    asm ("smul %2, %3, %0\n\t"  \
+         "rd %%y, %1"  \
+         : "=r" (lo), "=r" (hi)  \
+         : "%r" (x), "rI" (y))
+/* --- PowerPC ------------------------------------------------------------- */
+# elif defined(FPM_PPC)
+/*
+ * This PowerPC version is fast and accurate; the disposition of the least
+ * significant bit depends on OPT_ACCURACY via mad_f_scale64().
+ */
+#  define MAD_F_MLX(hi, lo, x, y)  \
+    do {  \
+      asm ("mullw %0,%1,%2"  \
+           : "=r" (lo)  \
+           : "%r" (x), "r" (y));  \
+      asm ("mulhw %0,%1,%2"  \
+           : "=r" (hi)  \
+           : "%r" (x), "r" (y));  \
+    }  \
+    while (0)
+#  if defined(OPT_ACCURACY)
+/*
+ * This gives best accuracy but is not very fast.
+ */
+#   define MAD_F_MLA(hi, lo, x, y)  \
+    ({ mad_fixed64hi_t __hi;  \
+       mad_fixed64lo_t __lo;  \
+       MAD_F_MLX(__hi, __lo, (x), (y));  \
+       asm ("addc %0,%2,%3\n\t"  \
+            "adde %1,%4,%5"  \
+            : "=r" (lo), "=r" (hi)  \
+            : "%r" (lo), "r" (__lo),  \
+              "%r" (hi), "r" (__hi)  \
+            : "xer");  \
+    })
+#  endif
+#  if defined(OPT_ACCURACY)
+/*
+ * This is slower than the truncating version below it.
+ */
+#   define mad_f_scale64(hi, lo)  \
+    ({ mad_fixed_t __result, __round;  \
+       asm ("rotrwi %0,%1,%2"  \
+            : "=r" (__result)  \
+            : "r" (lo), "i" (MAD_F_SCALEBITS));  \
+       asm ("extrwi %0,%1,1,0"  \
+            : "=r" (__round)  \
+            : "r" (__result));  \
+       asm ("insrwi %0,%1,%2,0"  \
+            : "+r" (__result)  \
+            : "r" (hi), "i" (MAD_F_SCALEBITS));  \
+       asm ("add %0,%1,%2"  \
+            : "=r" (__result)  \
+            : "%r" (__result), "r" (__round));  \
+       __result;  \
+    })
+#  else
+#   define mad_f_scale64(hi, lo)  \
+    ({ mad_fixed_t __result;  \
+       asm ("rotrwi %0,%1,%2"  \
+            : "=r" (__result)  \
+            : "r" (lo), "i" (MAD_F_SCALEBITS));  \
+       asm ("insrwi %0,%1,%2,0"  \
+            : "+r" (__result)  \
+            : "r" (hi), "i" (MAD_F_SCALEBITS));  \
+       __result;  \
+    })
+#  endif
+#  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+# elif defined(FPM_COLDFIRE_EMAC)
+/* mad_f_mul using the Coldfire MCF5249 EMAC unit. Loses 3 bits of accuracy.
+   Note that we don't define any of the libmad accumulator macros, as
+   any functions that use these should have the relevant sections rewritten
+   in assembler to utilise the EMAC accumulators properly.
+   Assumes the default +/- 3.28 fixed point format 
+ */
+#define mad_f_mul(x, y) \
+({ \
+  mad_fixed64hi_t hi; \
+  asm volatile("mac.l %[a], %[b], %%acc0\n\t" \
+               "movclr.l %%acc0, %[hi]\n\t" \
+               "asl.l #3, %[hi]" \
+               : [hi] "=d" (hi) \
+               : [a] "r" ((x)), [b] "r" ((y))); \
+  hi; \
+})
+/* Define dummy mad_f_scale64 to prevent libmad from defining MAD_F_SCALEBITS
+   below. Having MAD_F_SCALEBITS defined screws up the PRESHIFT macro in synth.c
+ */
+#define mad_f_scale64(hi, lo) (lo)
+/* --- Default ------------------------------------------------------------- */
+# elif defined(FPM_DEFAULT)
+/*
+ * This version is the most portable but it loses significant accuracy.
+ * Furthermore, accuracy is biased against the second argument, so care
+ * should be taken when ordering operands.
+ *
+ * The scale factors are constant as this is not used with SSO.
+ *
+ * Pre-rounding is required to stay within the limits of compliance.
+ */
+#  if defined(OPT_SPEED)
+#   define mad_f_mul(x, y)      (((x) >> 12) * ((y) >> 16))
+#  else
+#   define mad_f_mul(x, y)      ((((x) + (1L << 11)) >> 12) *  \
+                                 (((y) + (1L << 15)) >> 16))
+#  endif
+/* ------------------------------------------------------------------------- */
+# else
+#  error "no FPM selected"
+# endif
+/* default implementations */
+# if !defined(mad_f_mul)
+#  define mad_f_mul(x, y)  \
+    ({ register mad_fixed64hi_t __hi;  \
+       register mad_fixed64lo_t __lo;  \
+       MAD_F_MLX(__hi, __lo, (x), (y));  \
+       mad_f_scale64(__hi, __lo);  \
+    })
+# endif
+# if !defined(MAD_F_MLA)
+#  define MAD_F_ML0(hi, lo, x, y)       ((lo)  = mad_f_mul((x), (y)))
+#  define MAD_F_MLA(hi, lo, x, y)       ((lo) += mad_f_mul((x), (y)))
+#  define MAD_F_MLN(hi, lo)             ((lo)  = -(lo))
+#  define MAD_F_MLZ(hi, lo)             ((void) (hi), (mad_fixed_t) (lo))
+# endif
+# if !defined(MAD_F_ML0)
+#  define MAD_F_ML0(hi, lo, x, y)       MAD_F_MLX((hi), (lo), (x), (y))
+# endif
+# if !defined(MAD_F_MLN)
+#  define MAD_F_MLN(hi, lo)             ((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
+# endif
+# if !defined(MAD_F_MLZ)
+#  define MAD_F_MLZ(hi, lo)             mad_f_scale64((hi), (lo))
+# endif
+# if !defined(mad_f_scale64)
+#  if defined(OPT_ACCURACY)
+#   define mad_f_scale64(hi, lo)  \
+    ((((mad_fixed_t)  \
+       (((hi) << (32 - (MAD_F_SCALEBITS - 1))) |  \
+        ((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
+#  else
+#   define mad_f_scale64(hi, lo)  \
+    ((mad_fixed_t)  \
+     (((hi) << (32 - MAD_F_SCALEBITS)) |  \
+      ((lo) >> MAD_F_SCALEBITS)))
+#  endif
+#  define MAD_F_SCALEBITS  MAD_F_FRACBITS
+# endif
+# endif

diff --git a/lib/rbcodec/codecs/libmad/fixed.h b/lib/rbcodec/codecs/libmad/fixed.h new file mode 100644 index 0000000000..6015684644 --- /dev/null +++ b/lib/rbcodec/codecs/libmad/fixed.h
@@ -0,0 +1,510 @@
	1	/*
	2	* libmad - MPEG audio decoder library
	3	* Copyright (C) 2000-2004 Underbit Technologies, Inc.
	4	*
	5	* This program is free software; you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation; either version 2 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program; if not, write to the Free Software
	17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	18	*
	19	* $Id$
	20	*/
	21
	22	# ifndef LIBMAD_FIXED_H
	23	# define LIBMAD_FIXED_H
	24
	25	#include <inttypes.h>
	26
	27	typedef int32_t mad_fixed_t;
	28
	29	typedef int32_t mad_fixed64hi_t;
	30	typedef uint32_t mad_fixed64lo_t;
	31
	32	# if defined(_MSC_VER)
	33	# define mad_fixed64_t signed __int64
	34	# elif 1 \|\| defined(__GNUC__)
	35	# define mad_fixed64_t signed long long
	36	# endif
	37
	38	# if defined(FPM_FLOAT)
	39	typedef double mad_sample_t;
	40	# else
	41	typedef mad_fixed_t mad_sample_t;
	42	# endif
	43
	44	/*
	45	* Fixed-point format: 0xABBBBBBB
	46	* A == whole part (sign + 3 bits)
	47	* B == fractional part (28 bits)
	48	*
	49	* Values are signed two's complement, so the effective range is:
	50	* 0x80000000 to 0x7fffffff
	51	* -8.0 to +7.9999999962747097015380859375
	52	*
	53	* The smallest representable value is:
	54	* 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9)
	55	*
	56	* 28 bits of fractional accuracy represent about
	57	* 8.6 digits of decimal accuracy.
	58	*
	59	* Fixed-point numbers can be added or subtracted as normal
	60	* integers, but multiplication requires shifting the 64-bit result
	61	* from 56 fractional bits back to 28 (and rounding.)
	62	*
	63	* Changing the definition of MAD_F_FRACBITS is only partially
	64	* supported, and must be done with care.
	65	*/
	66
	67	# define MAD_F_FRACBITS 28
	68
	69	# if MAD_F_FRACBITS == 28
	70	# define MAD_F(x) ((mad_fixed_t) (x##L))
	71	# else
	72	# if MAD_F_FRACBITS < 28
	73	# warning "MAD_F_FRACBITS < 28"
	74	# define MAD_F(x) ((mad_fixed_t) \
	75	(((x##L) + \
	76	(1L << (28 - MAD_F_FRACBITS - 1))) >> \
	77	(28 - MAD_F_FRACBITS)))
	78	# elif MAD_F_FRACBITS > 28
	79	# error "MAD_F_FRACBITS > 28 not currently supported"
	80	# define MAD_F(x) ((mad_fixed_t) \
	81	((x##L) << (MAD_F_FRACBITS - 28)))
	82	# endif
	83	# endif
	84
	85	# define MAD_F_MIN ((mad_fixed_t) -0x80000000L)
	86	# define MAD_F_MAX ((mad_fixed_t) +0x7fffffffL)
	87
	88	# define MAD_F_ONE MAD_F(0x10000000)
	89
	90	# define mad_f_tofixed(x) ((mad_fixed_t) \
	91	((x) * (double) (1L << MAD_F_FRACBITS) + 0.5))
	92	# define mad_f_todouble(x) ((double) \
	93	((x) / (double) (1L << MAD_F_FRACBITS)))
	94
	95	# define mad_f_intpart(x) ((x) >> MAD_F_FRACBITS)
	96	# define mad_f_fracpart(x) ((x) & ((1L << MAD_F_FRACBITS) - 1))
	97	/* (x should be positive) */
	98
	99	# define mad_f_fromint(x) ((x) << MAD_F_FRACBITS)
	100
	101	# define mad_f_add(x, y) ((x) + (y))
	102	# define mad_f_sub(x, y) ((x) - (y))
	103
	104	# if defined(FPM_FLOAT)
	105	# error "FPM_FLOAT not yet supported"
	106
	107	# undef MAD_F
	108	# define MAD_F(x) mad_f_todouble(x)
	109
	110	# define mad_f_mul(x, y) ((x) * (y))
	111	# define mad_f_scale64
	112
	113	# elif defined(FPM_64BIT)
	114
	115	/*
	116	* This version should be the most accurate if 64-bit types are supported by
	117	* the compiler, although it may not be the most efficient.
	118	*/
	119	# if defined(OPT_ACCURACY)
	120	# define mad_f_mul(x, y) \
	121	((mad_fixed_t) \
	122	((((mad_fixed64_t) (x) * (y)) + \
	123	(1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS))
	124	# else
	125	# define mad_f_mul(x, y) \
	126	((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS))
	127	# endif
	128
	129	# define MAD_F_SCALEBITS MAD_F_FRACBITS
	130
	131	/* --- Intel --------------------------------------------------------------- */
	132
	133	# elif defined(FPM_INTEL)
	134
	135	# if defined(_MSC_VER)
	136	# pragma warning(push)
	137	# pragma warning(disable: 4035) /* no return value */
	138	static __forceinline
	139	mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y)
	140	{
	141	enum {
	142	fracbits = MAD_F_FRACBITS
	143	};
	144
	145	__asm {
	146	mov eax, x
	147	imul y
	148	shrd eax, edx, fracbits
	149	}
	150
	151	/* implicit return of eax */
	152	}
	153	# pragma warning(pop)
	154
	155	# define mad_f_mul mad_f_mul_inline
	156	# define mad_f_scale64
	157	# else
	158	/*
	159	* This Intel version is fast and accurate; the disposition of the least
	160	* significant bit depends on OPT_ACCURACY via mad_f_scale64().
	161	*/
	162	# define MAD_F_MLX(hi, lo, x, y) \
	163	asm ("imull %3" \
	164	: "=a" (lo), "=d" (hi) \
	165	: "%a" (x), "rm" (y) \
	166	: "cc")
	167
	168	# if defined(OPT_ACCURACY)
	169	/*
	170	* This gives best accuracy but is not very fast.
	171	*/
	172	# define MAD_F_MLA(hi, lo, x, y) \
	173	({ mad_fixed64hi_t __hi; \
	174	mad_fixed64lo_t __lo; \
	175	MAD_F_MLX(__hi, __lo, (x), (y)); \
	176	asm ("addl %2,%0\n\t" \
	177	"adcl %3,%1" \
	178	: "=rm" (lo), "=rm" (hi) \
	179	: "r" (__lo), "r" (__hi), "0" (lo), "1" (hi) \
	180	: "cc"); \
	181	})
	182	# endif /* OPT_ACCURACY */
	183
	184	# if defined(OPT_ACCURACY)
	185	/*
	186	* Surprisingly, this is faster than SHRD followed by ADC.
	187	*/
	188	# define mad_f_scale64(hi, lo) \
	189	({ mad_fixed64hi_t __hi_; \
	190	mad_fixed64lo_t __lo_; \
	191	mad_fixed_t __result; \
	192	asm ("addl %4,%2\n\t" \
	193	"adcl %5,%3" \
	194	: "=rm" (__lo_), "=rm" (__hi_) \
	195	: "0" (lo), "1" (hi), \
	196	"ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0) \
	197	: "cc"); \
	198	asm ("shrdl %3,%2,%1" \
	199	: "=rm" (__result) \
	200	: "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS) \
	201	: "cc"); \
	202	__result; \
	203	})
	204	# elif defined(OPT_INTEL)
	205	/*
	206	* Alternate Intel scaling that may or may not perform better.
	207	*/
	208	# define mad_f_scale64(hi, lo) \
	209	({ mad_fixed_t __result; \
	210	asm ("shrl %3,%1\n\t" \
	211	"shll %4,%2\n\t" \
	212	"orl %2,%1" \
	213	: "=rm" (__result) \
	214	: "0" (lo), "r" (hi), \
	215	"I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS) \
	216	: "cc"); \
	217	__result; \
	218	})
	219	# else
	220	# define mad_f_scale64(hi, lo) \
	221	({ mad_fixed_t __result; \
	222	asm ("shrdl %3,%2,%1" \
	223	: "=rm" (__result) \
	224	: "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS) \
	225	: "cc"); \
	226	__result; \
	227	})
	228	# endif /* OPT_ACCURACY */
	229
	230	# define MAD_F_SCALEBITS MAD_F_FRACBITS
	231	# endif
	232
	233	/* --- ARM ----------------------------------------------------------------- */
	234
	235	# elif defined(FPM_ARM)
	236
	237	/*
	238	* This ARM V4 version is as accurate as FPM_64BIT but much faster. The
	239	* least significant bit is properly rounded at no CPU cycle cost!
	240	*/
	241	# if 1
	242	/*
	243	* This is faster than the default implementation via MAD_F_MLX() and
	244	* mad_f_scale64().
	245	*/
	246	# define mad_f_mul(x, y) \
	247	({ mad_fixed64hi_t __hi; \
	248	mad_fixed64lo_t __lo; \
	249	mad_fixed_t __result; \
	250	asm ("smull %0, %1, %3, %4\n\t" \
	251	"movs %0, %0, lsr %5\n\t" \
	252	"adc %2, %0, %1, lsl %6" \
	253	: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
	254	: "%r" (x), "r" (y), \
	255	"M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
	256	: "cc"); \
	257	__result; \
	258	})
	259	# endif
	260
	261	# define MAD_F_MLX(hi, lo, x, y) \
	262	asm ("smull %0, %1, %2, %3" \
	263	: "=&r" (lo), "=&r" (hi) \
	264	: "%r" (x), "r" (y))
	265
	266	# define MAD_F_MLA(hi, lo, x, y) \
	267	asm ("smlal %0, %1, %2, %3" \
	268	: "+r" (lo), "+r" (hi) \
	269	: "%r" (x), "r" (y))
	270
	271	# define MAD_F_MLN(hi, lo) \
	272	asm ("rsbs %0, %2, #0\n\t" \
	273	"rsc %1, %3, #0" \
	274	: "=r" (lo), "=r" (hi) \
	275	: "0" (lo), "1" (hi) \
	276	: "cc")
	277
	278	# define mad_f_scale64(hi, lo) \
	279	({ mad_fixed_t __result; \
	280	asm ("movs %0, %1, lsr %3\n\t" \
	281	"adc %0, %0, %2, lsl %4" \
	282	: "=&r" (__result) \
	283	: "r" (lo), "r" (hi), \
	284	"M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \
	285	: "cc"); \
	286	__result; \
	287	})
	288
	289	# define MAD_F_SCALEBITS MAD_F_FRACBITS
	290
	291	/* --- MIPS ---------------------------------------------------------------- */
	292
	293	# elif defined(FPM_MIPS)
	294
	295	/*
	296	* This MIPS version is fast and accurate; the disposition of the least
	297	* significant bit depends on OPT_ACCURACY via mad_f_scale64().
	298	*/
	299	# define MAD_F_MLX(hi, lo, x, y) \
	300	asm ("mult %2,%3" \
	301	: "=l" (lo), "=h" (hi) \
	302	: "%r" (x), "r" (y))
	303
	304	# if defined(HAVE_MADD_ASM)
	305	# define MAD_F_MLA(hi, lo, x, y) \
	306	asm ("madd %2,%3" \
	307	: "+l" (lo), "+h" (hi) \
	308	: "%r" (x), "r" (y))
	309	# elif defined(HAVE_MADD16_ASM)
	310	/*
	311	* This loses significant accuracy due to the 16-bit integer limit in the
	312	* multiply/accumulate instruction.
	313	*/
	314	# define MAD_F_ML0(hi, lo, x, y) \
	315	asm ("mult %2,%3" \
	316	: "=l" (lo), "=h" (hi) \
	317	: "%r" ((x) >> 12), "r" ((y) >> 16))
	318	# define MAD_F_MLA(hi, lo, x, y) \
	319	asm ("madd16 %2,%3" \
	320	: "+l" (lo), "+h" (hi) \
	321	: "%r" ((x) >> 12), "r" ((y) >> 16))
	322	# define MAD_F_MLZ(hi, lo) ((mad_fixed_t) (lo))
	323	# endif
	324
	325	# if defined(OPT_SPEED)
	326	# define mad_f_scale64(hi, lo) \
	327	((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS)))
	328	# define MAD_F_SCALEBITS MAD_F_FRACBITS
	329	# endif
	330
	331	/* --- SPARC --------------------------------------------------------------- */
	332
	333	# elif defined(FPM_SPARC)
	334
	335	/*
	336	* This SPARC V8 version is fast and accurate; the disposition of the least
	337	* significant bit depends on OPT_ACCURACY via mad_f_scale64().
	338	*/
	339	# define MAD_F_MLX(hi, lo, x, y) \
	340	asm ("smul %2, %3, %0\n\t" \
	341	"rd %%y, %1" \
	342	: "=r" (lo), "=r" (hi) \
	343	: "%r" (x), "rI" (y))
	344
	345	/* --- PowerPC ------------------------------------------------------------- */
	346
	347	# elif defined(FPM_PPC)
	348
	349	/*
	350	* This PowerPC version is fast and accurate; the disposition of the least
	351	* significant bit depends on OPT_ACCURACY via mad_f_scale64().
	352	*/
	353	# define MAD_F_MLX(hi, lo, x, y) \
	354	do { \
	355	asm ("mullw %0,%1,%2" \
	356	: "=r" (lo) \
	357	: "%r" (x), "r" (y)); \
	358	asm ("mulhw %0,%1,%2" \
	359	: "=r" (hi) \
	360	: "%r" (x), "r" (y)); \
	361	} \
	362	while (0)
	363
	364	# if defined(OPT_ACCURACY)
	365	/*
	366	* This gives best accuracy but is not very fast.
	367	*/
	368	# define MAD_F_MLA(hi, lo, x, y) \
	369	({ mad_fixed64hi_t __hi; \
	370	mad_fixed64lo_t __lo; \
	371	MAD_F_MLX(__hi, __lo, (x), (y)); \
	372	asm ("addc %0,%2,%3\n\t" \
	373	"adde %1,%4,%5" \
	374	: "=r" (lo), "=r" (hi) \
	375	: "%r" (lo), "r" (__lo), \
	376	"%r" (hi), "r" (__hi) \
	377	: "xer"); \
	378	})
	379	# endif
	380
	381	# if defined(OPT_ACCURACY)
	382	/*
	383	* This is slower than the truncating version below it.
	384	*/
	385	# define mad_f_scale64(hi, lo) \
	386	({ mad_fixed_t __result, __round; \
	387	asm ("rotrwi %0,%1,%2" \
	388	: "=r" (__result) \
	389	: "r" (lo), "i" (MAD_F_SCALEBITS)); \
	390	asm ("extrwi %0,%1,1,0" \
	391	: "=r" (__round) \
	392	: "r" (__result)); \
	393	asm ("insrwi %0,%1,%2,0" \
	394	: "+r" (__result) \
	395	: "r" (hi), "i" (MAD_F_SCALEBITS)); \
	396	asm ("add %0,%1,%2" \
	397	: "=r" (__result) \
	398	: "%r" (__result), "r" (__round)); \
	399	__result; \
	400	})
	401	# else
	402	# define mad_f_scale64(hi, lo) \
	403	({ mad_fixed_t __result; \
	404	asm ("rotrwi %0,%1,%2" \
	405	: "=r" (__result) \
	406	: "r" (lo), "i" (MAD_F_SCALEBITS)); \
	407	asm ("insrwi %0,%1,%2,0" \
	408	: "+r" (__result) \
	409	: "r" (hi), "i" (MAD_F_SCALEBITS)); \
	410	__result; \
	411	})
	412	# endif
	413
	414	# define MAD_F_SCALEBITS MAD_F_FRACBITS
	415
	416	# elif defined(FPM_COLDFIRE_EMAC)
	417
	418	/* mad_f_mul using the Coldfire MCF5249 EMAC unit. Loses 3 bits of accuracy.
	419	Note that we don't define any of the libmad accumulator macros, as
	420	any functions that use these should have the relevant sections rewritten
	421	in assembler to utilise the EMAC accumulators properly.
	422	Assumes the default +/- 3.28 fixed point format
	423	*/
	424	#define mad_f_mul(x, y) \
	425	({ \
	426	mad_fixed64hi_t hi; \
	427	asm volatile("mac.l %[a], %[b], %%acc0\n\t" \
	428	"movclr.l %%acc0, %[hi]\n\t" \
	429	"asl.l #3, %[hi]" \
	430	: [hi] "=d" (hi) \
	431	: [a] "r" ((x)), [b] "r" ((y))); \
	432	hi; \
	433	})
	434	/* Define dummy mad_f_scale64 to prevent libmad from defining MAD_F_SCALEBITS
	435	below. Having MAD_F_SCALEBITS defined screws up the PRESHIFT macro in synth.c
	436	*/
	437	#define mad_f_scale64(hi, lo) (lo)
	438
	439	/* --- Default ------------------------------------------------------------- */
	440
	441	# elif defined(FPM_DEFAULT)
	442
	443	/*
	444	* This version is the most portable but it loses significant accuracy.
	445	* Furthermore, accuracy is biased against the second argument, so care
	446	* should be taken when ordering operands.
	447	*
	448	* The scale factors are constant as this is not used with SSO.
	449	*
	450	* Pre-rounding is required to stay within the limits of compliance.
	451	*/
	452	# if defined(OPT_SPEED)
	453	# define mad_f_mul(x, y) (((x) >> 12) * ((y) >> 16))
	454	# else
	455	# define mad_f_mul(x, y) ((((x) + (1L << 11)) >> 12) * \
	456	(((y) + (1L << 15)) >> 16))
	457	# endif
	458
	459	/* ------------------------------------------------------------------------- */
	460
	461	# else
	462	# error "no FPM selected"
	463	# endif
	464
	465	/* default implementations */
	466
	467	# if !defined(mad_f_mul)
	468	# define mad_f_mul(x, y) \
	469	({ register mad_fixed64hi_t __hi; \
	470	register mad_fixed64lo_t __lo; \
	471	MAD_F_MLX(__hi, __lo, (x), (y)); \
	472	mad_f_scale64(__hi, __lo); \
	473	})
	474	# endif
	475
	476	# if !defined(MAD_F_MLA)
	477	# define MAD_F_ML0(hi, lo, x, y) ((lo) = mad_f_mul((x), (y)))
	478	# define MAD_F_MLA(hi, lo, x, y) ((lo) += mad_f_mul((x), (y)))
	479	# define MAD_F_MLN(hi, lo) ((lo) = -(lo))
	480	# define MAD_F_MLZ(hi, lo) ((void) (hi), (mad_fixed_t) (lo))
	481	# endif
	482
	483	# if !defined(MAD_F_ML0)
	484	# define MAD_F_ML0(hi, lo, x, y) MAD_F_MLX((hi), (lo), (x), (y))
	485	# endif
	486
	487	# if !defined(MAD_F_MLN)
	488	# define MAD_F_MLN(hi, lo) ((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi))
	489	# endif
	490
	491	# if !defined(MAD_F_MLZ)
	492	# define MAD_F_MLZ(hi, lo) mad_f_scale64((hi), (lo))
	493	# endif
	494
	495	# if !defined(mad_f_scale64)
	496	# if defined(OPT_ACCURACY)
	497	# define mad_f_scale64(hi, lo) \
	498	((((mad_fixed_t) \
	499	(((hi) << (32 - (MAD_F_SCALEBITS - 1))) \| \
	500	((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1)
	501	# else
	502	# define mad_f_scale64(hi, lo) \
	503	((mad_fixed_t) \
	504	(((hi) << (32 - MAD_F_SCALEBITS)) \| \
	505	((lo) >> MAD_F_SCALEBITS)))
	506	# endif
	507	# define MAD_F_SCALEBITS MAD_F_FRACBITS
	508	# endif
	509
	510	# endif