18 #define MAX(a,b) ((a) > (b) ? (a) : (b))
21 #define MIN(a,b) ((a) < (b) ? (a) : (b))
25 #define FORCEINLINE __forceinline
26 #elif defined(__GNUC__)
27 #define FORCEINLINE inline __attribute__((always_inline))
29 #define FORCEINLINE inline
34 #define MUL64(a,b) __emul((a), (b))
36 #define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
39 #define MULHI(a,b) ((int32_t)(MUL64(a, b) >> 32))
40 #define MULQ31(a,b) ((int32_t)(MUL64(a, b) >> 31))
41 #define MULDIV64(a,b,c) (int32_t)(MUL64(a, b) / (c))
43 #define ADDMOD32(a,b) (int32_t)((uint32_t)(a) + (uint32_t)(b))
44 #define SUBMOD32(a,b) (int32_t)((uint32_t)(a) - (uint32_t)(b))
49 #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
51 #include <xmmintrin.h>
53 FORCEINLINE
static int32_t floatToInt(
float x) {
54 return _mm_cvt_ss2si(_mm_set_ss(x));
60 FORCEINLINE
static int32_t floatToInt(
float x) {
61 x += (x < 0.0f ? -0.5f : 0.5f);
67 static const double FIXQ31 = 2147483648.0;
68 static const double DB_TO_LOG2 = 0.16609640474436813;
71 FORCEINLINE
static double dBToGain(
double dB) {
72 return pow(10.0, dB / 20.0);
76 FORCEINLINE
static int32_t msToTc(
double ms,
double sampleRate) {
77 double tc = exp(-1000.0 / (ms * sampleRate));
78 return (int32_t)(FIXQ31 * tc);
82 static const int LOG2_INTBITS = 5;
83 static const int LOG2_FRACBITS = 31 - LOG2_INTBITS;
86 static const int LOG2_HEADROOM = 15;
89 static const int32_t LOG2_BIAS = 347;
90 static const int32_t EXP2_BIAS = 64;
98 static const int LOG2_TABBITS = 4;
99 static const int32_t log2Table[1 << LOG2_TABBITS][3] = {
100 { -0x56dfe26d, 0x5c46daff, 0x00000000 },
101 { -0x4d397571, 0x5bae58e7, 0x00025a75 },
102 { -0x4518f84b, 0x5aabcac4, 0x000a62db },
103 { -0x3e3075ec, 0x596168c0, 0x0019d0e6 },
104 { -0x384486e9, 0x57e769c7, 0x00316109 },
105 { -0x332742ba, 0x564f1461, 0x00513776 },
106 { -0x2eb4bad4, 0x54a4cdfe, 0x00791de2 },
107 { -0x2ad07c6c, 0x52f18320, 0x00a8aa46 },
108 { -0x2763c4d6, 0x513ba123, 0x00df574c },
109 { -0x245c319b, 0x4f87c5c4, 0x011c9399 },
110 { -0x21aac79f, 0x4dd93bef, 0x015fcb52 },
111 { -0x1f433872, 0x4c325584, 0x01a86ddc },
112 { -0x1d1b54b4, 0x4a94ac6e, 0x01f5f13e },
113 { -0x1b2a9f81, 0x4901524f, 0x0247d3f2 },
114 { -0x1969fa57, 0x4778f3a7, 0x029d9dbf },
115 { -0x17d36370, 0x45fbf1e8, 0x02f6dfe8 },
125 static const int EXP2_TABBITS = 4;
126 static const int32_t exp2Table[1 << EXP2_TABBITS][3] = {
127 { 0x3ed838c8, 0x58b574b7, 0x40000000 },
128 { 0x41a0821c, 0x5888db8f, 0x4000b2b7 },
129 { 0x4488548d, 0x582bcbc6, 0x40039be1 },
130 { 0x4791158a, 0x579a1128, 0x400a71ae },
131 { 0x4abc3a53, 0x56cf3089, 0x4017212e },
132 { 0x4e0b48af, 0x55c66396, 0x402bd31b },
133 { 0x517fd7a7, 0x547a946d, 0x404af0ec },
134 { 0x551b9049, 0x52e658f9, 0x40772a57 },
135 { 0x58e02e75, 0x5103ee08, 0x40b37b31 },
136 { 0x5ccf81b1, 0x4ecd321f, 0x410331b5 },
137 { 0x60eb6e09, 0x4c3ba007, 0x4169f548 },
138 { 0x6535ecf9, 0x49484909, 0x41ebcdaf },
139 { 0x69b10e5b, 0x45ebcede, 0x428d2acd },
140 { 0x6e5ef96c, 0x421e5d48, 0x4352ece7 },
141 { 0x7341edcb, 0x3dd7a354, 0x44426d7b },
142 { 0x785c4499, 0x390ecc3a, 0x456188bd },
145 static const int IEEE754_FABS_MASK = 0x7fffffff;
146 static const int IEEE754_MANT_BITS = 23;
147 static const int IEEE754_EXPN_BITS = 8;
148 static const int IEEE754_EXPN_BIAS = 127;
155 FORCEINLINE
static int32_t peaklog2(
float* input) {
158 uint32_t u = *(uint32_t*)input;
161 uint32_t peak = u & IEEE754_FABS_MASK;
164 int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
165 int32_t x = (peak << IEEE754_EXPN_BITS) & 0x7fffffff;
168 if ((uint32_t)e > 31) {
169 return 0x7fffffff & ~(e >> 31);
172 int k = x >> (31 - LOG2_TABBITS);
175 int32_t c0 = log2Table[k][0];
176 int32_t c1 = log2Table[k][1];
177 int32_t c2 = log2Table[k][2];
183 return (e << LOG2_FRACBITS) - (c2 >> 3);
191 FORCEINLINE
static int32_t peaklog2(
float* input0,
float* input1) {
194 uint32_t u0 = *(uint32_t*)input0;
195 uint32_t u1 = *(uint32_t*)input1;
198 u0 &= IEEE754_FABS_MASK;
199 u1 &= IEEE754_FABS_MASK;
200 uint32_t peak = MAX(u0, u1);
203 int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
204 int32_t x = (peak << IEEE754_EXPN_BITS) & 0x7fffffff;
207 if ((uint32_t)e > 31) {
208 return 0x7fffffff & ~(e >> 31);
211 int k = x >> (31 - LOG2_TABBITS);
214 int32_t c0 = log2Table[k][0];
215 int32_t c1 = log2Table[k][1];
216 int32_t c2 = log2Table[k][2];
222 return (e << LOG2_FRACBITS) - (c2 >> 3);
230 FORCEINLINE
static int32_t peaklog2(
float* input0,
float* input1,
float* input2,
float* input3) {
233 uint32_t u0 = *(uint32_t*)input0;
234 uint32_t u1 = *(uint32_t*)input1;
235 uint32_t u2 = *(uint32_t*)input2;
236 uint32_t u3 = *(uint32_t*)input3;
239 u0 &= IEEE754_FABS_MASK;
240 u1 &= IEEE754_FABS_MASK;
241 u2 &= IEEE754_FABS_MASK;
242 u3 &= IEEE754_FABS_MASK;
243 uint32_t peak = MAX(MAX(u0, u1), MAX(u2, u3));
246 int32_t e = IEEE754_EXPN_BIAS - (peak >> IEEE754_MANT_BITS) + LOG2_HEADROOM;
247 int32_t x = (peak << IEEE754_EXPN_BITS) & 0x7fffffff;
250 if ((uint32_t)e > 31) {
251 return 0x7fffffff & ~(e >> 31);
254 int k = x >> (31 - LOG2_TABBITS);
257 int32_t c0 = log2Table[k][0];
258 int32_t c1 = log2Table[k][1];
259 int32_t c2 = log2Table[k][2];
265 return (e << LOG2_FRACBITS) - (c2 >> 3);
272 FORCEINLINE
static int CLZ(uint32_t u) {
279 if (u < 0x00010000) {
283 if (u < 0x01000000) {
287 if (u < 0x10000000) {
291 if (u < 0x40000000) {
295 if (u < 0x80000000) {
305 FORCEINLINE
static int32_t fixlog2(int32_t x) {
312 uint32_t u = (uint32_t)x;
314 x = (u << e) & 0x7fffffff;
316 int k = x >> (31 - LOG2_TABBITS);
319 int32_t c0 = log2Table[k][0];
320 int32_t c1 = log2Table[k][1];
321 int32_t c2 = log2Table[k][2];
327 return (e << LOG2_FRACBITS) - (c2 >> 3);
334 FORCEINLINE
static int32_t fixexp2(int32_t x) {
341 uint32_t u = (uint32_t)x;
342 int e = u >> LOG2_FRACBITS;
343 x = ~(u << LOG2_INTBITS) & 0x7fffffff;
345 int k = x >> (31 - EXP2_TABBITS);
348 int32_t c0 = exp2Table[k][0];
349 int32_t c1 = exp2Table[k][1];
350 int32_t c2 = exp2Table[k][2];
360 FORCEINLINE
static float dither() {
361 static uint32_t rz = 0;
363 int32_t r0 = rz & 0xffff;
364 int32_t r1 = rz >> 16;
365 return (r0 - r1) * (1/65536.0f);
375 template<
int N,
int CIC1,
int CIC2>
378 static_assert((N & (N - 1)) == 0,
"N must be a power of 2");
379 static_assert((CIC1 - 1) + (CIC2 - 1) == (N - 1),
"Total CIC delay must be N-1");
381 int32_t _buffer[2*N] = {};
391 for (
size_t n = 0; n < N-1; n++) {
396 int32_t process(int32_t x) {
398 const size_t MASK = 2*N - 1;
405 for (
size_t n = 1; n < N; n <<= 1) {
409 x = MIN(x, _buffer[i]);
417 const int32_t CICGAIN = 0xffffffff / (CIC1 * CIC2);
418 x = MULHI(x, CICGAIN);
421 _acc1 = ADDMOD32(_acc1, x);
422 i = (i + CIC1 - 1) & MASK;
423 x = SUBMOD32(_acc1, _buffer[i]);
426 _acc2 = ADDMOD32(_acc2, x);
427 i = (i + CIC2 - 1) & MASK;
428 x = SUBMOD32(_acc2, _buffer[i]);
430 _index = (i + 1) & MASK;
442 template<
int N,
int CIC1,
int CIC2>
445 static_assert((N & (N - 1)) == 0,
"N must be a power of 2");
446 static_assert((CIC1 - 1) + (CIC2 - 1) == (N - 1),
"Total CIC delay must be N-1");
448 int32_t _buffer[2*N] = {};
458 for (
size_t n = 0; n < N-1; n++) {
463 int32_t process(int32_t x) {
465 const size_t MASK = 2*N - 1;
472 for (
size_t n = 1; n < N; n <<= 1) {
476 x = MAX(x, _buffer[i]);
484 const int32_t CICGAIN = 0xffffffff / (CIC1 * CIC2);
485 x = MULHI(x, CICGAIN);
488 _acc1 = ADDMOD32(_acc1, x);
489 i = (i + CIC1 - 1) & MASK;
490 x = SUBMOD32(_acc1, _buffer[i]);
493 _acc2 = ADDMOD32(_acc2, x);
494 i = (i + CIC2 - 1) & MASK;
495 x = SUBMOD32(_acc2, _buffer[i]);
497 _index = (i + 1) & MASK;
505 template<
int N>
class MinFilter;
506 template<>
class MinFilter< 16> :
public MinFilterT< 16, 7, 10> {};
507 template<>
class MinFilter< 32> :
public MinFilterT< 32, 14, 19> {};
508 template<>
class MinFilter< 64> :
public MinFilterT< 64, 27, 38> {};
509 template<>
class MinFilter<128> :
public MinFilterT<128, 53, 76> {};
510 template<>
class MinFilter<256> :
public MinFilterT<256, 106, 151> {};
512 template<
int N>
class MaxFilter;
513 template<>
class MaxFilter< 16> :
public MaxFilterT< 16, 7, 10> {};
514 template<>
class MaxFilter< 32> :
public MaxFilterT< 32, 14, 19> {};
515 template<>
class MaxFilter< 64> :
public MaxFilterT< 64, 27, 38> {};
516 template<>
class MaxFilter<128> :
public MaxFilterT<128, 53, 76> {};
517 template<>
class MaxFilter<256> :
public MaxFilterT<256, 106, 151> {};
522 template<
int N,
typename T =
float>
525 static_assert((N & (N - 1)) == 0,
"N must be a power of 2");
533 const size_t MASK = N - 1;
538 i = (i + (N - 1)) & MASK;
549 template<
int N,
typename T =
float>
552 static_assert((N & (N - 1)) == 0,
"N must be a power of 2");
558 void process(T& x0, T& x1) {
560 const size_t MASK = 2*N - 1;
566 i = (i + 2*(N - 1)) & MASK;
578 template<
int N,
typename T =
float>
581 static_assert((N & (N - 1)) == 0,
"N must be a power of 2");
587 void process(T& x0, T& x1, T& x2, T& x3) {
589 const size_t MASK = 4*N - 1;
597 i = (i + 4*(N - 1)) & MASK;