diff --git a/lib/filters.c b/lib/filters.c index bea7581..ab7e5bb 100644 --- a/lib/filters.c +++ b/lib/filters.c @@ -32,12 +32,31 @@ void init_lpf(LPFFilter *filter, float cutoff, int sample_rate) { } float process_lpf(LPFFilter *filter, float x) { - float y = x; - for (int i = 0; i < LPF_ORDER; i++) { - float w0_new = filter->d1[i] * filter->w1[i] + filter->d2[i] * filter->w2[i] + y; - y = filter->A[i] * (w0_new + 2.0f * filter->w1[i] + filter->w2[i]); - filter->w2[i] = filter->w1[i]; - filter->w1[i] = w0_new; - } - return y; +#if USE_NEON + float32x4_t y_vec = vdupq_n_f32(x); + for (int i = 0; i < LPF_ORDER; i += 4) { + float32x4_t d1_vec = vld1q_f32(&filter->d1[i]); + float32x4_t d2_vec = vld1q_f32(&filter->d2[i]); + float32x4_t w1_vec = vld1q_f32(&filter->w1[i]); + float32x4_t w2_vec = vld1q_f32(&filter->w2[i]); + float32x4_t A_vec = vld1q_f32(&filter->A[i]); + float32x4_t w0_new = vmlaq_f32(y_vec, d1_vec, w1_vec); + w0_new = vmlaq_f32(w0_new, d2_vec, w2_vec); + float32x4_t temp = vmlaq_f32(w0_new, w1_vec, vdupq_n_f32(2.0f)); + temp = vaddq_f32(temp, w2_vec); + y_vec = vmulq_f32(A_vec, temp); + vst1q_f32(&filter->w2[i], w1_vec); + vst1q_f32(&filter->w1[i], w0_new); + } + return vgetq_lane_f32(y_vec, 0); +#else + float y = x; + for (int i = 0; i < LPF_ORDER; i++) { + float w0_new = filter->d1[i] * filter->w1[i] + filter->d2[i] * filter->w2[i] + y; + y = filter->A[i] * (w0_new + 2.0f * filter->w1[i] + filter->w2[i]); + filter->w2[i] = filter->w1[i]; + filter->w1[i] = w0_new; + } + return y; +#endif } \ No newline at end of file diff --git a/lib/filters.h b/lib/filters.h index 7d112bb..12b8ddd 100644 --- a/lib/filters.h +++ b/lib/filters.h @@ -4,6 +4,7 @@ #include #include #include "constants.h" +#include "optimization.h" #include "oscillator.h" #define LPF_ORDER 10 diff --git a/lib/optimization.h b/lib/optimization.h new file mode 100644 index 0000000..5bd4772 --- /dev/null +++ b/lib/optimization.h @@ -0,0 +1,7 @@ +#pragma once +#if defined(__ARM_NEON) || defined(__ARM_NEON__) + #include + #define USE_NEON 1 +#else + #define USE_NEON 0 +#endif \ No newline at end of file diff --git a/lib/oscillator.h b/lib/oscillator.h index 6a7ed8b..e5dfcd8 100644 --- a/lib/oscillator.h +++ b/lib/oscillator.h @@ -1,14 +1,8 @@ #pragma once -#if defined(__ARM_NEON) || defined(__ARM_NEON__) - #include - #define USE_NEON 1 -#else - #define USE_NEON 0 -#endif - #include "constants.h" #include +#include "optimization.h" typedef struct { float phase;