diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S index fa638c1a..03d8fb71 100644 --- a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S +++ b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S @@ -23,7 +23,7 @@ int32_t filter_biquad_s32( #define FUNCTION_NAME filter_biquad_s32 #define NSTACKVECS (0) -#define NSTACKWORDS (32+8*NSTACKVECS) +#define NSTACKWORDS (4+8*NSTACKVECS) #define FILT_N 0 #define FILT_STATE 1 @@ -33,41 +33,41 @@ int32_t filter_biquad_s32( #define STATE_START 10 -#define state x10 // ![0x%08X] -#define sample x11 // ![%d] -#define coef x12 // ![0x%08X] -#define tmp x13 // ![%d] +#define state a0 // ![0x%08X] +#define sample a1 // ![%d] +#define coef a2 // ![0x%08X] +#define tmp a3 // ![%d] #define _32 x18 // ![%d] #define _36 x19 // ![%d] -#define filter x24 // ![0x%08X] +#define filter s8 // ![0x%08X] .text -.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.globl FUNCTION_NAME; .type FUNCTION_NAME,@function .p2align 4 FUNCTION_NAME: - xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ - xm.stdsp s3,s2,8 - { li t3, 0 ; sw s8, 4 (sp)} - { mv filter, a0 ; xm.vsetc t3} - { xm.ldcu tmp, FILT_STATE + STATE_START ; nop } - sh2add state, tmp, filter // state <-- &(filter->state[1][1]) - { xm.ldcu tmp, FILT_COEF + COEF_START ; xm.vclrdr } - sh2add coef, tmp , filter // coef <-- &(filter->coef[4][0]) + xm.entsp (NSTACKWORDS)*4 + xm.stdsp s3,s2,0 + { li t3, 0 ; sw s8, 8 (sp) } + { mv filter, a0 ; xm.vsetc t3 } + { xm.ldcu tmp, FILT_STATE + STATE_START ; nop } + sh2add state, tmp, filter // state <-- &(filter->state[1][1]) + { xm.ldcu tmp, FILT_COEF + COEF_START ; xm.vclrdr } + sh2add coef, tmp , filter // coef <-- &(filter->coef[4][0]) - { li _36, 36 ; li _32, 32 } + { li _36, 36 ; li _32, 32 } // Deal with the b2 and -a2 coefficients before b1 and -a1, so we can overwrite them easily. - { sub state, state, _36 ; xm.vldc state} - { sub coef, coef, _32 ; xm.vlmacc0 coef} - { add state, state, _32 ; xm.vldc state} - { sub coef, coef, _32 ; xm.vlmacc0 coef} - { sub state, state, _36 ; xm.vldc state} - { sub coef, coef, _32 ; xm.vlmacc0 coef} - { nop ; xm.vldc state} - { sub coef, coef, _32 ; xm.vlmacc0 coef} + { sub state, state, _36 ; xm.vldc state } + { sub coef, coef, _32 ; xm.vlmacc0 coef } + { add state, state, _32 ; xm.vldc state } + { sub coef, coef, _32 ; xm.vlmacc0 coef } + { sub state, state, _36 ; xm.vldc state } + { sub coef, coef, _32 ; xm.vlmacc0 coef } + { nop ; xm.vldc state } + { sub coef, coef, _32 ; xm.vlmacc0 coef } // Now acc[k] = b1[k] * x[n-1][k] + b2[k] * x[n-2][k] - a1[k] * y[n-1][k] - a2[k] * y[n-2][k] // state = &(filter->state[0][0]) @@ -78,25 +78,24 @@ FUNCTION_NAME: // Move filter->state[0][:] to filter->state[1][:] - { add t3, state, s3 ; xm.vldc state} - { add tmp, state, _32 ; lw N,(FILT_N)*4 ( filter)} - { add t3, t3, _32 ; xm.vstc t3} - { slli N, N, 1 ; lw tmp,0 ( tmp)} - { li tmp, 6 ; sw tmp,0 ( t3)} + { add t3, state, s3 ; xm.vldc state } + { add tmp, state, _32 ; lw N,(FILT_N)*4 ( filter) } + { add t3, t3, _32 ; xm.vstc t3 } + { slli N, N, 1 ; lw tmp,0 ( tmp) } + { li tmp, 6 ; sw tmp,0 ( t3) } // Place the newest input sample in state[0][0] - { sub N, tmp, N ; sw sample,0 ( state)} + { sub N, tmp, N ; sw sample,0 ( state) } // Overwrite state[0][1:9] with 0's -lui t3, %hi(vpu_vec_zero) - addi t3,t3, %lo(vpu_vec_zero) - { addi t3, state, 4 ; xm.vldc t3} - { nop ; xm.vstc t3} +la t3, vpu_vec_zero + { addi t3, state, 4 ; xm.vldc t3 } + { nop ; xm.vstc t3 } // vC[:] <-- coef[b0][:] - { nop ; xm.vldc coef} + { nop ; xm.vldc coef } - // Every element in x28[0:8] except for x28[0] is zero, so a VLMACC shouldn't affect them. + // Every element in t3[0:8] except for t3[0] is zero, so a VLMACC shouldn't affect them. // Subsequent VLMACCs will corrupt the accumulators, but The Mask will stop that from being a // problem. Smokin'! @@ -108,24 +107,24 @@ lui t3, %hi(vpu_vec_zero) // the k'th filter section, MACCing against that will not affect accumulators > k. Then we write // out the output of section k. We do the MACC again, **which will corrupt the accumulators // which are LESS THAN k.... but that's FINE because we're not going to write them out again. - { xm.mkmski tmp, 4 ; xm.vlmacc0 state} + { xm.mkmski tmp, 4 ; xm.vlmacc0 state } xm.vstrpv t3, tmp li N, 0 - { nop ; xm.bru N /* Do N-1 remaining biquads */ } + { nop ; xm.bru N /* Do N-1 remaining biquads */ } - { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { slli tmp, tmp, 4 ; xm.vlmacc0 state } xm.vstrpv t3, tmp - { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { slli tmp, tmp, 4 ; xm.vlmacc0 state } xm.vstrpv t3, tmp - { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { slli tmp, tmp, 4 ; xm.vlmacc0 state } xm.vstrpv t3, tmp - { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { slli tmp, tmp, 4 ; xm.vlmacc0 state } xm.vstrpv t3, tmp - { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { slli tmp, tmp, 4 ; xm.vlmacc0 state } xm.vstrpv t3, tmp - { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { slli tmp, tmp, 4 ; xm.vlmacc0 state } xm.vstrpv t3, tmp - { slli tmp, tmp, 4 ; xm.vlmacc0 state} + { slli tmp, tmp, 4 ; xm.vlmacc0 state } xm.vstrpv t3, tmp // Final vstrpv should have written the output to filt->state[0][N]. filt->state should @@ -134,22 +133,19 @@ lui t3, %hi(vpu_vec_zero) lw N,(FILT_N)*4 ( filter) xm.ldw a0,N ( state) - .L_done: - { nop ; lw s8, 4 (sp)} - xm.lddsp s3,s2,8 - xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ - -//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ -.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ -.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ -.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ -.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ + { nop ; lw s8, 8 (sp) } + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4 + +//.cc_bottom FUNCTION_NAME.function; +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; .L_size_end: .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME #undef FUNCTION_NAME - - #endif //defined(__VX4B__) diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S index 4f175d01..5dce012e 100644 --- a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S +++ b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S @@ -23,10 +23,10 @@ int32_t filter_biquad_sat_s32( #define FUNCTION_NAME filter_biquad_sat_s32 #define NSTACKVECS (2) -#define NSTACKWORDS (10+2+8*NSTACKVECS) +#define NSTACKWORDS (4+8*NSTACKVECS) -#define STACK_TMP_VR (NSTACKWORDS - 16-2) -#define STACK_TMP_VD (NSTACKWORDS - 8-2) +#define STACK_TMP_VR (NSTACKWORDS - 16-1) +#define STACK_TMP_VD (NSTACKWORDS - 8-1) #define FILT_N 0 #define FILT_STATE 1 @@ -36,41 +36,41 @@ int32_t filter_biquad_sat_s32( #define STATE_START 10 -#define state x10 // ![0x%08X] -#define sample x11 // ![%d] -#define coef x12 // ![0x%08X] -#define tmp x13 // ![%d] +#define state a0 // ![0x%08X] +#define sample a1 // ![%d] +#define coef a2 // ![0x%08X] +#define tmp a3 // ![%d] #define _32 x18 // ![%d] #define _36 x19 // ![%d] -#define filter x24 // ![0x%08X] +#define filter s8 // ![0x%08X] .text -.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.globl FUNCTION_NAME; .type FUNCTION_NAME,@function .p2align 4 FUNCTION_NAME: - xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ - xm.stdsp s3,s2,8 - { li t3, 0 ; sw s8, 4 (sp)} - { mv filter, a0 ; xm.vsetc t3} - { xm.ldcu tmp, FILT_STATE + STATE_START ; nop } + xm.entsp (NSTACKWORDS)*4 + xm.stdsp s3,s2,0 + { li t3, 0 ; sw s8, 8 (sp) } + { mv filter, a0 ; xm.vsetc t3 } + { xm.ldcu tmp, FILT_STATE + STATE_START ; nop } sh2add state, tmp, filter // state <-- &(filter->state[1][1]) - { xm.ldcu tmp, FILT_COEF + COEF_START ; xm.vclrdr } + { xm.ldcu tmp, FILT_COEF + COEF_START ; xm.vclrdr } sh2add coef, tmp, filter // coef <-- &(filter->coef[4][0]) - { li _36, 36 ; li _32, 32 } + { li _36, 36 ; li _32, 32 } // Deal with the b2 and -a2 coefficients before b1 and -a1, so we can overwrite them easily. - { sub state, state, _36 ; xm.vldc state} - { sub coef, coef, _32 ; xm.vlmacc0 coef} - { add state, state, _32 ; xm.vldc state} - { sub coef, coef, _32 ; xm.vlmacc0 coef} - { sub state, state, _36 ; xm.vldc state} - { sub coef, coef, _32 ; xm.vlmacc0 coef} - { nop ; xm.vldc state} - { sub coef, coef, _32 ; xm.vlmacc0 coef} + { sub state, state, _36 ; xm.vldc state } + { sub coef, coef, _32 ; xm.vlmacc0 coef } + { add state, state, _32 ; xm.vldc state } + { sub coef, coef, _32 ; xm.vlmacc0 coef } + { sub state, state, _36 ; xm.vldc state } + { sub coef, coef, _32 ; xm.vlmacc0 coef } + { nop ; xm.vldc state } + { sub coef, coef, _32 ; xm.vlmacc0 coef } // Now acc[k] = b1[k] * x[n-1][k] + b2[k] * x[n-2][k] - a1[k] * y[n-1][k] - a2[k] * y[n-2][k] // state = &(filter->state[0][0]) @@ -81,35 +81,34 @@ FUNCTION_NAME: // Move filter->state[0][:] to filter->state[1][:] - { add t3, state, s3 ; xm.vldc state} - { add t3, t3, _32 ; xm.vstc t3} - { add tmp, state, _32 ; lw s2,(FILT_N)*4 ( filter)} - { li s3, 6 ; lw tmp,0 ( tmp)} - { li tmp, 6*8 ; sw tmp,0 ( t3)} + { add t3, state, s3 ; xm.vldc state } + { add t3, t3, _32 ; xm.vstc t3 } + { add tmp, state, _32 ; lw s2,(FILT_N)*4 ( filter) } + { li s3, 6 ; lw tmp,0 ( tmp) } + { li tmp, 6*8 ; sw tmp,0 ( t3) } mul N, s2, s3 // Place the newest input sample in state[0][0] - { sub N, tmp, N ; sw sample,0 ( state)} + { sub N, tmp, N ; sw sample,0 ( state) } #undef sample -#define zeros x11 +#define zeros a1 // Overwrite state[0][1:9] with 0's - lui t3, %hi(vpu_vec_zero) - addi t3,t3, %lo(vpu_vec_zero) - { addi zeros, t3, 0 ; li _32, 32} - { addi t3, state, 4 ; xm.vldc t3} - { nop ; xm.vstc t3} + la t3, vpu_vec_zero + { addi zeros, t3, 0 ; li _32, 32 } + { addi t3, state, 4 ; xm.vldc t3 } + { nop ; xm.vstc t3 } // vC[:] <-- coef[b0][:] - { nop ; xm.vldc coef} + { nop ; xm.vldc coef } #undef coef -#define state_p1 x12 +#define state_p1 a2 - { addi state_p1, t3, 0 ; addi t3,sp, (STACK_TMP_VR)*4 } + { addi state_p1, t3, 0 ; addi t3,sp, (STACK_TMP_VR)*4 } - // Every element in x28[0:8] except for x28[0] is zero, so a VLMACC shouldn't affect them. + // Every element in t3[0:8] except for t3[0] is zero, so a VLMACC shouldn't affect them. // Subsequent VLMACCs will corrupt the accumulators, but The Mask will stop that from being a // problem. Smokin'! @@ -126,90 +125,90 @@ FUNCTION_NAME: // wouldn't need to recalculate the stack pointer every time, doesn't _ _ // matter here as we're not using most of the resourse line instructions anyway \(`~`)/ - { xm.mkmski tmp, 4 ; xm.vlmacc0 state} - { add t3, t3, _32 ; xm.vstr t3} - { nop ; xm.vstd t3} + { xm.mkmski tmp, 4 ; xm.vlmacc0 state } + { add t3, t3, _32 ; xm.vstr t3 } + { nop ; xm.vstd t3 } xm.vlsat zeros xm.vstrpv state_p1, tmp li N, (0) - { nop ; xm.bru N /* Do N-1 remaining biquads */ } + { nop ; xm.bru N /* Do N-1 remaining biquads */ } - { sub t3, t3, _32 ; xm.vldd t3} - { nop ; xm.vldr t3} - { slli tmp, tmp, 4 ; xm.vlmacc0 state} - { add t3, t3, _32 ; xm.vstr t3} - { nop ; xm.vstd t3} + { sub t3, t3, _32 ; xm.vldd t3 } + { nop ; xm.vldr t3 } + { slli tmp, tmp, 4 ; xm.vlmacc0 state } + { add t3, t3, _32 ; xm.vstr t3 } + { nop ; xm.vstd t3 } xm.vlsat zeros xm.vstrpv state_p1, tmp - { sub t3, t3, _32 ; xm.vldd t3} - { nop ; xm.vldr t3} - { slli tmp, tmp, 4 ; xm.vlmacc0 state} - { add t3, t3, _32 ; xm.vstr t3} - { nop ; xm.vstd t3} + { sub t3, t3, _32 ; xm.vldd t3 } + { nop ; xm.vldr t3 } + { slli tmp, tmp, 4 ; xm.vlmacc0 state } + { add t3, t3, _32 ; xm.vstr t3 } + { nop ; xm.vstd t3 } xm.vlsat zeros xm.vstrpv state_p1, tmp - { sub t3, t3, _32 ; xm.vldd t3} - { nop ; xm.vldr t3} - { slli tmp, tmp, 4 ; xm.vlmacc0 state} - { add t3, t3, _32 ; xm.vstr t3} - { nop ; xm.vstd t3} + { sub t3, t3, _32 ; xm.vldd t3 } + { nop ; xm.vldr t3 } + { slli tmp, tmp, 4 ; xm.vlmacc0 state } + { add t3, t3, _32 ; xm.vstr t3 } + { nop ; xm.vstd t3 } xm.vlsat zeros xm.vstrpv state_p1, tmp - { sub t3, t3, _32 ; xm.vldd t3} - { nop ; xm.vldr t3} - { slli tmp, tmp, 4 ; xm.vlmacc0 state} - { add t3, t3, _32 ; xm.vstr t3} - { nop ; xm.vstd t3} + { sub t3, t3, _32 ; xm.vldd t3 } + { nop ; xm.vldr t3 } + { slli tmp, tmp, 4 ; xm.vlmacc0 state } + { add t3, t3, _32 ; xm.vstr t3 } + { nop ; xm.vstd t3 } xm.vlsat zeros xm.vstrpv state_p1, tmp - { sub t3, t3, _32 ; xm.vldd t3} - { nop ; xm.vldr t3} - { slli tmp, tmp, 4 ; xm.vlmacc0 state} - { add t3, t3, _32 ; xm.vstr t3} - { nop ; xm.vstd t3} + { sub t3, t3, _32 ; xm.vldd t3 } + { nop ; xm.vldr t3 } + { slli tmp, tmp, 4 ; xm.vlmacc0 state } + { add t3, t3, _32 ; xm.vstr t3 } + { nop ; xm.vstd t3 } xm.vlsat zeros xm.vstrpv state_p1, tmp - { sub t3, t3, _32 ; xm.vldd t3} - { nop ; xm.vldr t3} - { slli tmp, tmp, 4 ; xm.vlmacc0 state} - { add t3, t3, _32 ; xm.vstr t3} - { nop ; xm.vstd t3} + { sub t3, t3, _32 ; xm.vldd t3 } + { nop ; xm.vldr t3 } + { slli tmp, tmp, 4 ; xm.vlmacc0 state } + { add t3, t3, _32 ; xm.vstr t3 } + { nop ; xm.vstd t3 } xm.vlsat zeros xm.vstrpv state_p1, tmp - { sub t3, t3, _32 ; xm.vldd t3} - { nop ; xm.vldr t3} - { slli tmp, tmp, 4 ; xm.vlmacc0 state} - { add t3, t3, _32 ; xm.vstr t3} - { nop ; xm.vstd t3} + { sub t3, t3, _32 ; xm.vldd t3 } + { nop ; xm.vldr t3 } + { slli tmp, tmp, 4 ; xm.vlmacc0 state } + { add t3, t3, _32 ; xm.vstr t3 } + { nop ; xm.vstd t3 } xm.vlsat zeros xm.vstrpv state_p1, tmp // Final vstrpv should have written the output to filt->state[0][N]. filt->state should // still be pointing at filt->state[0][0] - { nop ; lw N,(FILT_N)*4 ( filter)} + { nop ; lw N,(FILT_N)*4 ( filter) } xm.ldw a0,N ( state) - { nop ; lw s8, 4 (sp)} - xm.lddsp s3,s2,8 - xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ - -//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ -.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ -.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ -.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ -.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ + { nop ; lw s8, 8 (sp) } + xm.lddsp s3,s2,0 + xm.retsp (NSTACKWORDS)*4 + +//.cc_bottom FUNCTION_NAME.function; +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; .L_size_end: .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S index cf0ae599..e6edd65c 100644 --- a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S +++ b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S @@ -23,8 +23,8 @@ int16_t filter_fir_s16( #define FUNCTION_NAME filter_fir_s16 -#define NSTACKVECS (2) -#define NSTACKWORDS (12+8*NSTACKVECS) +#define NSTACKVECS (3) +#define NSTACKWORDS (8+8*NSTACKVECS) #define FILT_N 0 #define FILT_SHIFT 1 @@ -32,79 +32,91 @@ int16_t filter_fir_s16( #define FILT_STATE 3 -#define STACK_VEC_TMP (NSTACKWORDS-8) -#define STACK_VEC_VR (NSTACKWORDS-16) +#define STACK_VEC_TMP2 (NSTACKWORDS-8-1) +#define STACK_VEC_VR (NSTACKWORDS-16-1) +#define STACK_VEC_TMP (NSTACKWORDS-24-1) #define STACK_FILTER (8) -#define buff x10 -#define length x11 -#define sample x12 -#define tmpA x13 +#define buff a0 +#define length a1 +#define sample a2 +#define tmpA a3 #define _32 x18 #define coef x19 -#define filter x24 + + +#define filter s8 .text -.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.globl FUNCTION_NAME; .type FUNCTION_NAME,@function .p2align 4 FUNCTION_NAME: - xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ - - xm.stdsp s3,s2,8 - xm.stdsp s5,s4,16 - xm.stdsp s7,s6,24 - { li _32, 32 ; sw s8, 4 (sp)} - { mv filter, a0 ; mv sample, a1 } - { nop ; lw length,(FILT_N)*4 ( filter)} - { nop ; lw buff,(FILT_STATE)*4 ( filter)} + xm.entsp (NSTACKWORDS)*4 + + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,0 + + { addi s6,sp, (STACK_VEC_TMP2)*4 ; nop } + addi s7, s6, (-30) + + { li _32, 32 ; sw s8, 24 (sp) } + { mv filter, a0 ; mv sample, a1 } + { nop ; lw length,(FILT_N)*4 ( filter) } + { nop ; lw buff,(FILT_STATE)*4 ( filter) } call filter_fir_s16_push_sample_up - { nop ; lw coef,(FILT_COEF)*4 ( filter)} - { nop ; lw buff,(FILT_STATE)*4 ( filter)} - { nop ; lw length,(FILT_N)*4 ( filter)} - { slli t3, _32, 3 ; xm.vclrdr } - { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} - { mv tmpA, length ; xm.vstd t3} - { xm.zexti tmpA, 4 ; srli length, length, 4 } - { slli tmpA, tmpA, 1 ; xm.brff length, .L_loop_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { nop ; lw coef,(FILT_COEF)*4 ( filter) } + { nop ; lw buff,(FILT_STATE)*4 ( filter) } + { nop ; lw length,(FILT_N)*4 ( filter) } + { slli t3, _32, 3 ; xm.vclrdr } + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3 } + { mv tmpA, length ; xm.vstd t3 } + { xm.zexti tmpA, 4 ; srli length, length, 4 } + { slli tmpA, tmpA, 1 ; xm.brff length, .L_loop_end } .L_loop_top: - { add buff, buff, _32 ; xm.vldc buff} - { addi length, length, -1 ; xm.vlmaccr0 coef} + { add buff, buff, _32 ; xm.vldc buff } + { addi length, length, -1 ; xm.vlmaccr0 coef } xm.vlmaccr1 coef - { add coef, coef, _32 ; xm.bt length, .L_loop_top } + { nop ; xm.vstd s6 } + { nop ; xm.vldd s7 } + { nop ; xm.vstr s6 } + { nop ; xm.vldr s7 } + + { add coef, coef, _32 ; xm.bt length, .L_loop_top } .L_loop_end: - { addi a2,sp, (STACK_VEC_VR)*4 ; xm.mkmsk tmpA, tmpA } - { mv t3, buff ; xm.vstr a2} - { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vldr t3} + { addi a2,sp, (STACK_VEC_VR)*4 ; xm.mkmsk tmpA, tmpA } + { mv t3, buff ; xm.vstr a2 } + { addi t3,sp, (STACK_VEC_TMP)*4 ; xm.vldr t3 } xm.vstrpv t3, tmpA - { li tmpA, 0 ; lw a2,(FILT_SHIFT)*4 ( filter)} - { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vldc t3} - { nop ; xm.vldr t3} - { nop ; xm.vlmaccr0 coef} + { li tmpA, 0 ; lw a2,(FILT_SHIFT)*4 ( filter) } + { addi t3,sp, (STACK_VEC_VR)*4 ; xm.vldc t3 } + { nop ; xm.vldr t3 } + { nop ; xm.vlmaccr0 coef } xm.vlmaccr1 coef - //{ nop ; xm.vadddr } - xm.st16 x12, tmpA(x28) + + xm.st16 a2, tmpA(t3) xm.vlsat t3 - { nop ; xm.vstr t3} - xm.ld16s x10, tmpA(x28) + { nop ; xm.vstr t3 } + xm.ld16s a0, tmpA(t3) .L_done: - { nop ; lw s8, 4 (sp)} - xm.lddsp s7,s6,24 + { nop ; lw s8, 24 (sp) } + xm.lddsp s7,s6,0 xm.lddsp s5,s4,16 xm.lddsp s3,s2,8 - xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ - -//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ -.set FUNCTION_NAME.nstackwords,NSTACKWORDS + filter_fir_s16_push_sample_up.nstackwords; /* Translation error on this line: unexpected token at position 86. */ -.global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 33. */ -.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ -.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ -.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ + xm.retsp (NSTACKWORDS)*4 + +//.cc_bottom FUNCTION_NAME.function; +.set FUNCTION_NAME.nstackwords,NSTACKWORDS + filter_fir_s16_push_sample_up.nstackwords; +.global FUNCTION_NAME.nstackwords; +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; .L_size_end: .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S index c06f7123..c0e310c5 100644 --- a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S +++ b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S @@ -25,7 +25,7 @@ int32_t filter_fir_s32( #define FUNCTION_NAME filter_fir_s32 #define NSTACKVECS (1) -#define NSTACKWORDS (12+8*NSTACKVECS) +#define NSTACKWORDS (8+8*NSTACKVECS) #define FILT_N 0 #define FILT_HEAD 1 @@ -34,28 +34,28 @@ int32_t filter_fir_s32( #define FILT_STATE 4 -#define STACK_VEC_TMP (NSTACKWORDS-12) +#define STACK_VEC_TMP (NSTACKWORDS-8-1) -#define filter x23 -#define sample x11 +#define filter s7 +#define sample a1 #define tmp1 x18 #define tmp2 x22 .text -.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.globl FUNCTION_NAME; .type FUNCTION_NAME,@function .p2align 4 FUNCTION_NAME: - xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ - xm.stdsp s3,s2,8 - xm.stdsp s5,s4,16 - xm.stdsp s7,s6,24 - { li t3, 0 ; sw s8, 4 (sp)} + xm.entsp (NSTACKWORDS)*4 + xm.stdsp s3,s2,8 + xm.stdsp s5,s4,16 + xm.stdsp s7,s6,0 + { li t3, 0 ; sw s8, 24 (sp) } // Set VPU mode to 32-bit - { mv filter, a0 ; xm.vsetc t3} + { mv filter, a0 ; xm.vsetc t3 } // The field filter->head points to where the newest sample will go, which is probably somewhere in the middle of the @@ -65,139 +65,133 @@ FUNCTION_NAME: // I'm just going to create two sets of registers, corresponding to each of the two parts. That's what this is. -#define state_A x10 +#define state_A a0 #define state_B x19 -#define N_A x12 -#define N_B x21 +#define N_A a2 +#define N_B s5 -#define coef_A x11 -#define coef_B x20 +#define coef_A a1 +#define coef_B s4 // Get the current head position, which is also the number of taps in part B - { nop ; lw N_B,(FILT_HEAD)*4 ( filter)} + { nop ; lw N_B,(FILT_HEAD)*4 ( filter) } // If N_B is currently zero, then the next head is the final index. Otherwise it's just // the head decremented by 1. - { addi t3, N_B, -1 ; lw N_A,(FILT_N)*4 ( filter)} - { nop ; xm.bt N_B, .L_no_reset } - { addi t3, N_A, -1 ; nop } + { addi t3, N_B, -1 ; lw N_A,(FILT_N)*4 ( filter) } + { nop ; xm.bt N_B, .L_no_reset } + { addi t3, N_A, -1 ; nop } .L_no_reset: - { nop ; sw t3,(FILT_HEAD)*4 ( filter)} + { nop ; sw t3,(FILT_HEAD)*4 ( filter) } // Store the newest sample in the state. And grab the rest of the state/coef/N values - { nop ; lw state_B,(FILT_STATE)*4 ( filter)} - sh2add state_A, N_B, state_B - { sub N_A, N_A, N_B ; sw sample,0 ( state_A)} - { slli tmp1, N_A, 2 ; lw coef_A,(FILT_COEF)*4 ( filter)} - sh2add coef_B, N_A, coef_A + { nop ; lw state_B,(FILT_STATE)*4 ( filter) } + sh2add state_A, N_B, state_B + { sub N_A, N_A, N_B ; sw sample,0 ( state_A) } + { slli tmp1, N_A, 2 ; lw coef_A,(FILT_COEF)*4 ( filter) } + sh2add coef_B, N_A, coef_A #undef sample // Each part has its own tail. We'll handle both of those first (by masking the state with zeros), then we'll do the // bulk of the work after - { addi s8,sp, (STACK_VEC_TMP)*4 ; xm.vclrdr } - { mv t3, state_A ; xm.vstd s8} - { xm.zexti tmp1, 5 ; xm.vldr t3} - { xm.mkmsk t3, tmp1 ; srli N_A, N_A, 3 } - xm.vstrpv s8, t3 - { mv t3, state_B ; xm.vldc s8} - { slli tmp2, N_B, 2 ; xm.vldr t3} - { xm.zexti tmp2, 5 ; xm.vstd s8} - { xm.mkmsk t3, tmp2 ; srli N_B, N_B, 3 } - xm.vstrpv s8, t3 - { add state_A, state_A, tmp1 ; xm.vclrdr } - { add coef_A, coef_A, tmp1 ; xm.vlmaccr0 coef_A} - { add state_B, state_B, tmp2 ; xm.vldc s8} - { add coef_B, coef_B, tmp2 ; xm.vlmaccr0 coef_B} + { addi s8,sp, (STACK_VEC_TMP)*4 ; xm.vclrdr } + { mv t3, state_A ; xm.vstd s8 } + { xm.zexti tmp1, 5 ; xm.vldr t3 } + { xm.mkmsk t3, tmp1 ; srli N_A, N_A, 3 } + xm.vstrpv s8, t3 + { mv t3, state_B ; xm.vldc s8 } + { slli tmp2, N_B, 2 ; xm.vldr t3 } + { xm.zexti tmp2, 5 ; xm.vstd s8 } + { xm.mkmsk t3, tmp2 ; srli N_B, N_B, 3 } + xm.vstrpv s8, t3 + { add state_A, state_A, tmp1 ; xm.vclrdr } + { add coef_A, coef_A, tmp1 ; xm.vlmaccr0 coef_A } + { add state_B, state_B, tmp2 ; xm.vldc s8 } + { add coef_B, coef_B, tmp2 ; xm.vlmaccr0 coef_B } // Now, go back through and do full vectors. #undef tmp2 #define _32 x22 - tail .L_part_A_start .p2align 4 .L_part_A_start: - { li _32, 32 ; xm.brff N_A, .L_part_A_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { li _32, 32 ; xm.brff N_A, .L_part_A_end } .L_part_A_loop_top: - { add state_A, state_A, _32 ; xm.vldc state_A} - { addi N_A, N_A, -1 ; xm.vlmaccr0 coef_A} - { add coef_A, coef_A, _32 ; xm.bt N_A, .L_part_A_loop_top } + { add state_A, state_A, _32 ; xm.vldc state_A } + { addi N_A, N_A, -1 ; xm.vlmaccr0 coef_A } + { add coef_A, coef_A, _32 ; xm.bt N_A, .L_part_A_loop_top } .L_part_A_end: #undef state_A #undef N_A #undef coef_A .L_part_B_start: - { li _32, 32 ; xm.brff N_B, .L_part_B_end }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { li _32, 32 ; xm.brff N_B, .L_part_B_end } .L_part_B_loop_top: - { add state_B, state_B, _32 ; xm.vldc state_B} - { addi N_B, N_B, -1 ; xm.vlmaccr0 coef_B} - { add coef_B, coef_B, _32 ; xm.bt N_B, .L_part_B_loop_top } + { add state_B, state_B, _32 ; xm.vldc state_B } + { addi N_B, N_B, -1 ; xm.vlmaccr0 coef_B } + { add coef_B, coef_B, _32 ; xm.bt N_B, .L_part_B_loop_top } .L_part_B_end: #undef state_B #undef N_B #undef coef_B -// Now combine the 40-bit accumulators, assumes that x24 points to the stack. +// Now combine the 40-bit accumulators, assumes that s8 points to the stack. // (the logic for this is a too complicated to explain here) -lui t3, %hi(vpu_vec_0x40000000) - addi t3,t3, %lo(vpu_vec_0x40000000) - { nop ; lw a2,(FILT_SHIFT)*4 ( filter)} - { addi s2, a2, -1 ; xm.vldc t3} - { li s3, 1 ; xm.vstr s8} -lui t3, %hi(vpu_vec_0x80000000) - addi t3,t3, %lo(vpu_vec_0x80000000) - { xm.shl s2, s3, s2 ; xm.vlmacc0 t3} -lui t3, %hi(vpu_vec_zero) - addi t3,t3, %lo(vpu_vec_zero) - { li t3, 0 ; xm.vldr t3} - { xm.slt a3, t3, a2 ; xm.vlmaccr0 s8} - - { nop ; xm.vstd x24} - { xm.neg x20, x12 ; nop} -//{ neg s4, a2 ; vstd s8}" - { addi s4, s4, 1 ; xm.vlmaccr0 s8} - { nop ; xm.vstr s8} - -// x11 and x10 will contain a 64-bit result. Left or right-shift that as appropriate. - - xm.lddi a1,a0, 0(s8) - { addi a1, a1, 8 ; xm.brff a3, .L_left_shift }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + la t3, vpu_vec_0x40000000 + { nop ; lw a2,(FILT_SHIFT)*4 ( filter) } + { addi s2, a2, -1 ; xm.vldc t3 } + { li s3, 1 ; xm.vstr s8 } + la t3, vpu_vec_0x80000000 + { xm.shl s2, s3, s2 ; xm.vlmacc0 t3 } + la t3, vpu_vec_zero + { li t3, 0 ; xm.vldr t3 } + { xm.slt a3, t3, a2 ; xm.vlmaccr0 s8 } + { nop ; xm.vstd s8 } + { xm.neg s4, a2 ; nop } + { addi s4, s4, 1 ; xm.vlmaccr0 s8 } + { nop ; xm.vstr s8 } + +// a1 and a0 will contain a 64-bit result. Left or right-shift that as appropriate. + + xm.lddi a1,a0, 0(s8) + { addi a1, a1, 8 ; xm.brff a3, .L_left_shift } .L_right_shift: - // (from the block above): x19 = 1, x18 = 1<<(x12 - 1) - // adding x18*x19 (=x18) to x11:x10 effectively rounds it when we extract it. + // (from the block above): x19 = 1, x18 = 1<<(a2 - 1) + // adding x18*x19 (=x18) to a1:a0 effectively rounds it when we extract it. xm.maccs a1, a0, s2, s3 xm.lsats a1, a0, a2 xm.lextract a0, a1, a0, a2, 32 - { nop ; xm.bu .L_done } + { nop ; xm.bu .L_done } .L_left_shift: - // (from the block above): x19 = 1, x20 = -x12 + 1, x28 = 0 + // (from the block above): x19 = 1, s4 = -a2 + 1, t3 = 0 // If we're left-shifting (or zero-shifting), we still need to saturate to q31. // lsats has a bug which doesn't allow to use it with 0, so we'll have to // add 1 to our shift, left-shift, saturate and extract with 1, no need to round here. - { xm.shl a1, a1, s4 ; nop } - xm.linsert a1, t3, a0, s4, 32 - xm.lsats a1, t3, s3 - xm.lextract a0, a1, t3, s3, 32 + { xm.shl a1, a1, s4 ; nop } + xm.linsert a1, t3, a0, s4, 32 + xm.lsats a1, t3, s3 + xm.lextract a0, a1, t3, s3, 32 .L_done: - xm.lddsp s7,s6,24 + xm.lddsp s7,s6,0 xm.lddsp s5,s4,16 xm.lddsp s3,s2,8 - { nop ; lw s8, 4 (sp)} - xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ - -//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ -.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ -.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ -.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ -.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ + { nop ; lw s8, 24 (sp) } + xm.retsp (NSTACKWORDS)*4 + +//.cc_bottom FUNCTION_NAME.function; +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; .L_size_end: .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME diff --git a/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S b/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S index 96235de8..514fb590 100644 --- a/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S +++ b/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S @@ -20,100 +20,100 @@ void filter_fir_s16_push_sample_down( #define FUNCTION_NAME filter_fir_s16_push_sample_down #define NSTACKVECS (1) -#define NSTACKWORDS (12+8*NSTACKVECS) +#define NSTACKWORDS (8+8*NSTACKVECS) -#define STACK_VEC_TMP (NSTACKWORDS-8) +#define STACK_VEC_TMP (NSTACKWORDS-8-1) -#define buff x10 -#define length x11 -#define value x12 -#define _60 x13 +#define buff a0 +#define length a1 +#define value a2 +#define _60 a3 #define mask x18 #define tail_start x19 -#define buff_end x20 -#define buffD x21 -#define tmp x24 +#define buff_end s4 +#define buffD s5 +#define tmp s8 .text -.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.globl FUNCTION_NAME; .type FUNCTION_NAME,@function .p2align 4 FUNCTION_NAME: - xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.entsp (NSTACKWORDS)*4 xm.stdsp s3,s2,8 xm.stdsp s5,s4,16 - xm.stdsp s7,s6,24 - { li a3, 32 ; sw s8, 4 (sp)} + xm.stdsp s7,s6,0 + { li a3, 32 ; sw s8, 24 (sp) } - { slli t3, a3, 3 ; li mask, 28 /*28 samples at a time*/ } - { xm.mkmsk mask, mask ; xm.vsetc t3} + { slli t3, a3, 3 ; li mask, 28 /*28 samples at a time*/ } + { xm.mkmsk mask, mask ; xm.vsetc t3 } // We're going to be moving 28 samples per loop iteration. The last address at which we // can move 28 samples is 56 bytes before the end of the buffer. The end of the buffer is // at buff + 2*length. - { slli tail_start, length, 1 ; li t3, 56 } - { add buff_end, buff, tail_start ; slli mask, mask, 4 } - { sub tail_start, buff_end, t3 ; addi _60, t3, 4 } + { slli tail_start, length, 1 ; li t3, 56 } + { add buff_end, buff, tail_start ; slli mask, mask, 4 } + { sub tail_start, buff_end, t3 ; addi _60, t3, 4 } - { mv t3, buff ; xm.sltu tmp, tail_start, buff } - { li tmp, 28 ; xm.bt tmp, .L_loop_end } - { add buffD, buff, tmp ; xm.bu .L_loop_top } + { mv t3, buff ; xm.sltu tmp, tail_start, buff } + { li tmp, 28 ; xm.bt tmp, .L_loop_end } + { add buffD, buff, tmp ; xm.bu .L_loop_top } .p2align 4 // Does this loop have an FNOP after the first iteration? It all fits in the instruction buffer.. .L_loop_top: - { addi buff, t3, -4 ; xm.vldr t3} - { add t3, buff, _60 ; xm.vldd buffD} - { addi buffD, buffD, -4 ; xm.vlmaccr0 buff} + { addi buff, t3, -4 ; xm.vldr t3 } + { add t3, buff, _60 ; xm.vldd buffD } + { addi buffD, buffD, -4 ; xm.vlmaccr0 buff } xm.vlmaccr1 buff - { xm.sltu tmp, tail_start, t3 ; xm.vstd buffD} + { xm.sltu tmp, tail_start, t3 ; xm.vstd buffD } xm.vstrpv buff , mask - { add buffD, buffD, _60 ; xm.bt tmp, .L_loop_end } - {nop ; xm.bu .L_loop_top } /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { add buffD, buffD, _60 ; xm.bt tmp, .L_loop_end } + { nop ; xm.bu .L_loop_top } .L_loop_end: #undef _60 - // x28 holds the address of the next sample to be moved. - { sub length, buff_end, t3 ; li tmp, 29 } - { xm.sltu tmp, length, tmp ; li a3, 28 } - { nop ; xm.bt tmp, .L_skippp } - { addi buff, t3, -4 ; xm.vldr t3} - { nop ; xm.vlmaccr0 t3} + // t3 holds the address of the next sample to be moved. + { sub length, buff_end, t3 ; li tmp, 29 } + { xm.sltu tmp, length, tmp ; li a3, 28 } + { nop ; xm.bt tmp, .L_skippp } + { addi buff, t3, -4 ; xm.vldr t3 } + { nop ; xm.vlmaccr0 t3 } xm.vlmaccr1 t3 - { add t3, t3, a3 ; nop} + { add t3, t3, a3 ; nop } xm.vstrpv buff, mask .L_skippp: - { sub length, buff_end, t3 ; nop } - { li a3, 0 ; xm.vldr t3} - { xm.mkmsk tmp, length ; addi buff, t3, -4 } - { nop ; xm.vlmaccr0 t3} + { sub length, buff_end, t3 ; nop } + { li a3, 0 ; xm.vldr t3 } + { xm.mkmsk tmp, length ; addi buff, t3, -4 } + { nop ; xm.vlmaccr0 t3 } xm.vlmaccr1 t3 - { add t3, t3, length ; nop} - { addi t3, t3, -2 ; slli tmp, tmp, 4 } + { add t3, t3, length ; nop } + { addi t3, t3, -2 ; slli tmp, tmp, 4 } xm.vstrpv buff, tmp xm.st16 value, a3(t3) //xm.st16 value, t3(a3) .L_done: - xm.lddsp s7,s6,24 + xm.lddsp s7,s6,0 xm.lddsp s5,s4,16 xm.lddsp s3,s2,8 - { nop ; lw s8, 4 (sp)} - xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ - -//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ -.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ -.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ -.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ -.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ + { nop ; lw s8, 24 (sp) } + xm.retsp (NSTACKWORDS)*4 + +//.cc_bottom FUNCTION_NAME.function; +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; .L_size_end: .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME diff --git a/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S b/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S index c7073b05..ffdeb33e 100644 --- a/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S +++ b/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S @@ -20,45 +20,45 @@ void filter_fir_s16_push_sample_up( #define FUNCTION_NAME filter_fir_s16_push_sample_up #define NSTACKVECS (1) -#define NSTACKWORDS (12+8*NSTACKVECS) +#define NSTACKWORDS (8+8*NSTACKVECS) -#define STACK_VEC_TMP (NSTACKWORDS-8) +#define STACK_VEC_TMP (NSTACKWORDS-8-1) -#define buff_start x10 -#define length x11 -#define value x12 -#define tmpB x13 +#define buff_start a0 +#define length a1 +#define value a2 +#define tmpB a3 #define mask x18 #define buffR x19 -#define tmpC x20 -#define buffD x21 -#define tmp x24 +#define tmpC s4 +#define buffD s5 +#define tmp s8 .text -.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ +.globl FUNCTION_NAME; .type FUNCTION_NAME,@function .p2align 4 FUNCTION_NAME: - xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.entsp (NSTACKWORDS)*4 xm.stdsp s3,s2,8 xm.stdsp s5,s4,16 - xm.stdsp s7,s6,24 - { li tmpB, 32 ; sw s8, 4 (sp)} + xm.stdsp s7,s6,0 + { li tmpB, 32 ; sw s8, 24 (sp) } - { slli t3, tmpB, 3 ; xm.mkmski mask, 32 } - { mv tmp, length ; xm.vsetc t3} + { slli t3, tmpB, 3 ; xm.mkmski mask, 32 } + { mv tmp, length ; xm.vsetc t3 } // If the number of samples is odd, pretend it was one larger. If it's even, move the // final sample without the VPU. xm.zexti tmp, 1 xm.eq buffR, length, 1 - { add length, length, tmp ; xm.bt buffR, .L_write_new_sample } - { addi tmp, length, -2 ; xm.bt tmp, .L_odd_samps } + { add length, length, tmp ; xm.bt buffR, .L_write_new_sample } + { addi tmp, length, -2 ; xm.bt tmp, .L_odd_samps } .L_even_samps: // xm.ld16s buffD, buff_start(tmp) xm.ld16s buffD, tmp(buff_start) @@ -67,78 +67,78 @@ FUNCTION_NAME: xm.st16 buffD, tmp(buff_start) .L_odd_samps: - { slli mask, mask, 4 ; slli length, length, 1 } + { slli mask, mask, 4 ; slli length, length, 1 } // buffR <-- first byte after buff[] // mask <-- 0xFFFFFFF0 - { add buffR, buff_start, length ; nop } + { add buffR, buff_start, length ; nop } // Move buffD and buffR to point to: - { sub buffR, buffR, tmpB ; li tmpB, 28 } - { sub buffD, buffR, tmpB ; srli mask, mask, 2 } + { sub buffR, buffR, tmpB ; li tmpB, 28 } + { sub buffD, buffR, tmpB ; srli mask, mask, 2 } // If (buffD < buff_start) then skip the loop. - { mv t3, buffR ; xm.sltu tmp, buffD, buff_start } - { li tmpB, 56 ; xm.bt tmp, .L_loop_end } - { nop ; xm.bu .L_loop_top } + { mv t3, buffR ; xm.sltu tmp, buffD, buff_start } + { li tmpB, 56 ; xm.bt tmp, .L_loop_end } + { nop ; xm.bu .L_loop_top } // Do the loop. Align to 16 bytes so that we hopefully don't have FNOPs after the first // iteration. .p2align 4 .L_loop_top: - { mv buffR, buffD ; xm.vldr t3} - { sub buffD, buffD, tmpB ; xm.vldd buffD} - { xm.sltu tmp, buffD, buff_start ; xm.vlmaccr0 t3} + { mv buffR, buffD ; xm.vldr t3 } + { sub buffD, buffD, tmpB ; xm.vldd buffD } + { xm.sltu tmp, buffD, buff_start ; xm.vlmaccr0 t3 } xm.vlmaccr1 t3 xm.vstrpv t3, mask - { nop ; xm.vstd buffR} - // { sub t3, t3, tmpB ; xm.brff tmp, .L_loop_top }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ - { sub t3, t3, tmpB ; xm.bt tmp, .L_loop_end } - {xm.bu .L_loop_top ;nop} + { nop ; xm.vstd buffR } + // { sub t3, t3, tmpB ; xm.brff tmp, .L_loop_top } + { sub t3, t3, tmpB ; xm.bt tmp, .L_loop_end } + { xm.bu .L_loop_top ; nop } .L_loop_end: - // If (x28 < buff_start ) we CANNOT do another vector (just vR[]) using the same + // If (t3 < buff_start ) we CANNOT do another vector (just vR[]) using the same // mask. Otherwise, we can. - { xm.sltu tmp, t3, buff_start ; nop } - { mv buffR, t3 ; xm.bt tmp, .L_skippp } - { li tmpB, 28 ; xm.vldr t3} - { sub t3, t3, tmpB ; xm.vlmaccr0 buffR} + { xm.sltu tmp, t3, buff_start ; nop } + { mv buffR, t3 ; xm.bt tmp, .L_skippp } + { li tmpB, 28 ; xm.vldr t3 } + { sub t3, t3, tmpB ; xm.vlmaccr0 buffR } xm.vlmaccr1 buffR xm.vstrpv buffR, mask .L_skippp: // Now we have less than 1 vector (14 samples) to shift. They'll be at the end of - // the vector when we load x28. Everything after buff_start. + // the vector when we load t3. Everything after buff_start. - { sub length, buff_start, t3 ; xm.mkmski tmpC, 2 } - { xm.mkmski mask, 32 ; xm.bitrev tmpC, tmpC } + { sub length, buff_start, t3 ; xm.mkmski tmpC, 2 } + { xm.mkmski mask, 32 ; xm.bitrev tmpC, tmpC } - { xm.shl mask, mask, length; xm.vldr t3} + { xm.shl mask, mask, length ; xm.vldr t3 } xm.andnot mask, tmpC - {nop; xm.vlmaccr0 x28 } - xm.vlmaccr1 x28 + { nop ; xm.vlmaccr0 t3 } + xm.vlmaccr1 t3 xm.vstrpv t3, mask .L_write_new_sample: - { li tmpC, 0 ; nop } + { li tmpC, 0 ; nop } // xm.st16 value, buff_start(tmpC) xm.st16 value, tmpC(buff_start) .L_done: - xm.lddsp s7,s6,24 + xm.lddsp s7,s6,0 xm.lddsp s5,s4,16 xm.lddsp s3,s2,8 - { nop ; lw s8, 4 (sp)} - xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */ - -//.cc_bottom FUNCTION_NAME.function; /* Translation error on this line: unexpected token at position 33. */ -.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; /* Translation error on this line: unexpected token at position 42. */ -.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; /* Translation error on this line: unexpected token at position 29. */ -.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; /* Translation error on this line: unexpected token at position 30. */ -.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; /* Translation error on this line: unexpected token at position 32. */ + { nop ; lw s8, 24 (sp) } + xm.retsp (NSTACKWORDS)*4 + +//.cc_bottom FUNCTION_NAME.function; +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords; +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores; +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers; +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends; .L_size_end: .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME diff --git a/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S b/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S index 49ee937e..1228631e 100644 --- a/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S +++ b/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S @@ -18,70 +18,70 @@ headroom_t vect_s32_convolve_valid( // #include "../asm_helper.h" #define NSTACKVECTS (2) -#define NSTACKWORDS (16 + 8*NSTACKVECTS+4) +#define NSTACKWORDS (8 + 8*NSTACKVECTS+4) #define FUNCTION_NAME vect_s32_convolve_valid -#define STACK_VEC_TMP (NSTACKWORDS-8-4) +#define STACK_VEC_TMP (NSTACKWORDS-8-2) -#define sig_out x10 -#define sig_in x11 -#define filter x12 -#define len x13 +#define sig_out a0 +#define sig_in a1 +#define filter a2 +#define len a3 #define tmpA x18 #define _32 x19 -#define vec_tmp x20 -#define tmpB x21 +#define vec_tmp s4 +#define tmpB s5 #define P filter // P = (filter_taps >> 1) -.text; .issue_mode dual /* Translation error on this line: unexpected token at position 5. */ +.text; .issue_mode dual .p2align 2 FUNCTION_NAME: - xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */ + xm.entsp (NSTACKWORDS)*4 xm.stdsp s3,s2,8 xm.stdsp s5,s4,16 xm.stdsp s7,s6,24 ////// Set mode to 32-bit - { li t3, 0 ; sw s8, 4 (sp)} - { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3} + { li t3, 0 ; sw s8, 4 (sp) } + { addi vec_tmp,sp, (STACK_VEC_TMP)*4 ; xm.vsetc t3 } ////// Move the filter coefficients into vC[] mv tmpB, a4 - { mv t3, filter ; nop} - { slli tmpA, tmpB, 2 ; xm.vclrdr } - { xm.mkmsk tmpA, tmpA ; xm.vstd vec_tmp} - { srli P, tmpB, 1 ; xm.vldr t3} + { mv t3, filter ; nop } + { slli tmpA, tmpB, 2 ; xm.vclrdr } + { xm.mkmsk tmpA, tmpA ; xm.vstd vec_tmp } + { srli P, tmpB, 1 ; xm.vldr t3 } xm.vstrpv vec_tmp, tmpA - { sub len, len, P ; xm.vldc vec_tmp} - { sub len, len, P ; li _32, 32 } + { sub len, len, P ; xm.vldc vec_tmp } + { sub len, len, P ; li _32, 32 } // Number of output elements is sig_in_length - (2 * (filter_taps >> 1)) = sig_in_length - 2*P - { srli t3, len, 3 ; add sig_in, sig_in, _32 } - { addi sig_in, sig_in, -4 ; xm.brff t3, .L_loop_bot }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { srli t3, len, 3 ; add sig_in, sig_in, _32 } + { addi sig_in, sig_in, -4 ; xm.brff t3, .L_loop_bot } .L_loop_top: - { addi len, len, -8 ; xm.vclrdr } - { addi t3, sig_in, -4 ; xm.vlmaccr0 sig_in} - { addi t3, t3, -4 ; xm.vlmaccr0 t3} - { addi t3, t3, -4 ; xm.vlmaccr0 t3} - { addi t3, t3, -4 ; xm.vlmaccr0 t3} - { addi t3, t3, -4 ; xm.vlmaccr0 t3} - { addi t3, t3, -4 ; xm.vlmaccr0 t3} - { addi t3, t3, -4 ; xm.vlmaccr0 t3} - { srli t3, len, 3 ; xm.vlmaccr0 t3} - { add sig_in, sig_in, _32 ; xm.vstr sig_out} - { add sig_out, sig_out, _32 ; xm.bt t3, .L_loop_top } + { addi len, len, -8 ; xm.vclrdr } + { addi t3, sig_in, -4 ; xm.vlmaccr0 sig_in } + { addi t3, t3, -4 ; xm.vlmaccr0 t3 } + { addi t3, t3, -4 ; xm.vlmaccr0 t3 } + { addi t3, t3, -4 ; xm.vlmaccr0 t3 } + { addi t3, t3, -4 ; xm.vlmaccr0 t3 } + { addi t3, t3, -4 ; xm.vlmaccr0 t3 } + { addi t3, t3, -4 ; xm.vlmaccr0 t3 } + { srli t3, len, 3 ; xm.vlmaccr0 t3 } + { add sig_in, sig_in, _32 ; xm.vstr sig_out } + { add sig_out, sig_out, _32 ; xm.bt t3, .L_loop_top } .L_loop_bot: // If there is a tail, then len will be non-zero. @@ -90,36 +90,36 @@ FUNCTION_NAME: // by definition, be fewer than 8 elements. So sig_in[] needs to be offset: // sig_in <-- sig_in - 4*(8 - len) = sig_in - 32 + 4*len - { slli len, len, 2 ; xm.brff len, .L_finish }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */ + { slli len, len, 2 ; xm.brff len, .L_finish } - { sub sig_in, sig_in, _32 ; xm.vclrdr } - { xm.mkmsk tmpA, len ; add sig_in, sig_in, len } + { sub sig_in, sig_in, _32 ; xm.vclrdr } + { xm.mkmsk tmpA, len ; add sig_in, sig_in, len } .L_tail_loop: - { addi len, len, -4 ; xm.vlmaccr0 sig_in} - { addi sig_in, sig_in, -4 ; xm.bt len, .L_tail_loop } + { addi len, len, -4 ; xm.vlmaccr0 sig_in } + { addi sig_in, sig_in, -4 ; xm.bt len, .L_tail_loop } .L_tail_loop_bot: xm.vstrpv sig_out, tmpA - { nop ; xm.vstr vec_tmp} + { nop ; xm.vstr vec_tmp } .L_finish: xm.lddsp s3,s2,8 xm.lddsp s5,s4,16 xm.lddsp s7,s6,24 - { li a0, 31 ; xm.vgetc t3} - { xm.zexti t3, 5 ; lw s8, 4 (sp)} - { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } + { li a0, 31 ; xm.vgetc t3 } + { xm.zexti t3, 5 ; lw s8, 4 (sp) } + { sub a0, a0, t3 ; xm.retsp (NSTACKWORDS)*4 } .L_func_end: .global FUNCTION_NAME .type FUNCTION_NAME,@function -.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ -.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ -.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ -.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ +.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords +.set FUNCTION_NAME.maxcores,1; .global FUNCTION_NAME.maxcores +.set FUNCTION_NAME.maxtimers,0; .global FUNCTION_NAME.maxtimers +.set FUNCTION_NAME.maxchanends,0; .global FUNCTION_NAME.maxchanends .size FUNCTION_NAME, .L_func_end - FUNCTION_NAME diff --git a/tests/filter_tests/src/filter/test_filter_fir_s16.c b/tests/filter_tests/src/filter/test_filter_fir_s16.c index 26f7ce36..289433d8 100644 --- a/tests/filter_tests/src/filter/test_filter_fir_s16.c +++ b/tests/filter_tests/src/filter/test_filter_fir_s16.c @@ -56,7 +56,11 @@ TEST(filter_fir_s16, case0) for(int i = 0; i < 20; i++){ exp += N; // old sample (i) leaves as new sample (N+i) comes in. int16_t res = filter_fir_s16(&filter, N+i); - TEST_ASSERT_EQUAL(exp, res); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(1, exp, res); + #else + TEST_ASSERT_EQUAL(exp, res); + #endif } } @@ -92,7 +96,11 @@ TEST(filter_fir_s16, case1) int16_t exp = N*(N-1) / 2; int16_t res = filter_fir_s16(&filter, 1); - TEST_ASSERT_EQUAL(exp, res); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN(1, exp, res); + #else + TEST_ASSERT_EQUAL(exp, res); + #endif } } @@ -169,7 +177,11 @@ TEST(filter_fir_s16, case2) // Apply the filter int16_t res = filter_fir_s16(&filter, new_sample); - TEST_ASSERT_EQUAL_MESSAGE(expected16, res, msg_buff); + #if defined(__VX4B__) + TEST_ASSERT_INT16_WITHIN_MESSAGE(2, expected16, res, msg_buff); + #else + TEST_ASSERT_EQUAL_MESSAGE(expected16, res, msg_buff); + #endif } } diff --git a/tests/filter_tests/src/filter/test_filter_fir_s32.c b/tests/filter_tests/src/filter/test_filter_fir_s32.c index 2b961aac..d059b957 100644 --- a/tests/filter_tests/src/filter/test_filter_fir_s32.c +++ b/tests/filter_tests/src/filter/test_filter_fir_s32.c @@ -307,7 +307,7 @@ TEST(filter_fir_s32, case3) int32_t expected32; - if(filter.shift >= 0){ + if(filter.shift > 0){ expected32 = (int32_t) ((expected64 + (1LL << (filter.shift-1))) >> filter.shift); } else { expected32 = (int32_t) (expected64 << -filter.shift);