diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S
index fa638c1a..03d8fb71 100644
--- a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S
+++ b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_s32.S
@@ -23,7 +23,7 @@ int32_t filter_biquad_s32(
 #define FUNCTION_NAME filter_biquad_s32
 
 #define NSTACKVECS      (0)
-#define NSTACKWORDS     (32+8*NSTACKVECS)
+#define NSTACKWORDS     (4+8*NSTACKVECS)
 
 #define FILT_N          0
 #define FILT_STATE      1
@@ -33,41 +33,41 @@ int32_t filter_biquad_s32(
 #define STATE_START     10
 
 
-#define state       x10      // ![0x%08X]
-#define sample      x11      // ![%d]
-#define coef        x12      // ![0x%08X]
-#define tmp         x13      // ![%d]
+#define state       a0      // ![0x%08X]
+#define sample      a1      // ![%d]
+#define coef        a2      // ![0x%08X]
+#define tmp         a3      // ![%d]
 #define _32         x18      // ![%d]
 #define _36         x19      // ![%d]
-#define filter      x24     // ![0x%08X]
+#define filter      s8      // ![0x%08X]
     
 .text
-.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.globl FUNCTION_NAME;
 .type FUNCTION_NAME,@function
 .p2align 4
 
 FUNCTION_NAME:
-        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
-        xm.stdsp  s3,s2,8
-    {   li t3, 0                              ;   sw s8, 4                          (sp)}
-    {   mv filter, a0                          ;   xm.vsetc t3}
-    {   xm.ldcu tmp, FILT_STATE + STATE_START       ; nop                                           }
-        sh2add state, tmp, filter          // state <-- &(filter->state[1][1])
-    {   xm.ldcu tmp, FILT_COEF + COEF_START         ;   xm.vclrdr                                  }
-        sh2add coef,  tmp , filter           // coef <-- &(filter->coef[4][0])
+    xm.entsp (NSTACKWORDS)*4
+    xm.stdsp  s3,s2,0
+    { li t3, 0                          ; sw s8, 8                         (sp) }
+    { mv filter, a0                     ; xm.vsetc t3                           }
+    { xm.ldcu tmp, FILT_STATE + STATE_START ; nop                               }
+    sh2add state, tmp, filter          // state <-- &(filter->state[1][1])
+    { xm.ldcu tmp, FILT_COEF + COEF_START ; xm.vclrdr                           }
+    sh2add coef,  tmp , filter           // coef <-- &(filter->coef[4][0])
 
-    {   li _36, 36                             ;   li _32, 32                             }
+    { li _36, 36                        ; li _32, 32                            }
 
 // Deal with the b2 and -a2 coefficients before b1 and -a1, so we can overwrite them easily.
 
-    {   sub state, state, _36                   ;   xm.vldc state}
-    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
-    {   add state, state, _32                   ;   xm.vldc state}
-    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
-    {   sub state, state, _36                   ;   xm.vldc state}
-    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
-    { nop                                       ;   xm.vldc state}
-    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+    { sub state, state, _36             ; xm.vldc state                         }
+    { sub coef, coef, _32               ; xm.vlmacc0 coef                       }
+    { add state, state, _32             ; xm.vldc state                         }
+    { sub coef, coef, _32               ; xm.vlmacc0 coef                       }
+    { sub state, state, _36             ; xm.vldc state                         }
+    { sub coef, coef, _32               ; xm.vlmacc0 coef                       }
+    { nop                               ; xm.vldc state                         }
+    { sub coef, coef, _32               ; xm.vlmacc0 coef                       }
 
     // Now acc[k] =  b1[k] * x[n-1][k] + b2[k] * x[n-2][k] - a1[k] * y[n-1][k] - a2[k] * y[n-2][k]
     // state = &(filter->state[0][0])
@@ -78,25 +78,24 @@ FUNCTION_NAME:
 
     // Move filter->state[0][:] to filter->state[1][:]
 
-    {   add t3, state, s3                      ;   xm.vldc state}
-    {   add tmp, state, _32                     ;   lw N,(FILT_N)*4                   ( filter)}
-    {   add t3, t3, _32                       ;   xm.vstc t3}
-    {   slli N, N, 1                             ;   lw tmp,0                         ( tmp)}
-    {   li tmp, 6                             ;   sw tmp,0                         ( t3)}
+    { add t3, state, s3                 ; xm.vldc state                         }
+    { add tmp, state, _32               ; lw N,(FILT_N)*4             ( filter) }
+    { add t3, t3, _32                   ; xm.vstc t3                            }
+    { slli N, N, 1                      ; lw tmp,0                       ( tmp) }
+    { li tmp, 6                         ; sw tmp,0                        ( t3) }
 
     // Place the newest input sample in state[0][0]
-    {   sub N, tmp, N                           ;   sw sample,0                    ( state)}
+    { sub N, tmp, N                     ; sw sample,0                  ( state) }
 
     // Overwrite state[0][1:9] with 0's
-lui t3, %hi(vpu_vec_zero)
-        addi t3,t3, %lo(vpu_vec_zero)
-    {   addi t3, state, 4                       ;   xm.vldc t3}
-    { nop                                           ;   xm.vstc t3}
+la t3, vpu_vec_zero
+    { addi t3, state, 4                 ; xm.vldc t3                            }
+    { nop                               ; xm.vstc t3                            }
 
     // vC[:] <-- coef[b0][:]
-    { nop                                           ;   xm.vldc coef}
+    { nop                               ; xm.vldc coef                          }
 
-    // Every element in x28[0:8] except for x28[0] is zero, so a VLMACC shouldn't affect them.
+    // Every element in t3[0:8] except for t3[0] is zero, so a VLMACC shouldn't affect them.
     // Subsequent VLMACCs will corrupt the accumulators, but The Mask will stop that from being a
     // problem. Smokin'!
 
@@ -108,24 +107,24 @@ lui t3, %hi(vpu_vec_zero)
     // the k'th filter section, MACCing against that will not affect accumulators > k. Then we write
     // out the output of section k. We do the MACC again, **which will corrupt the accumulators 
     // which are LESS THAN k.... but that's FINE because we're not going to write them out again.   
-    {   xm.mkmski tmp, 4                            ;   xm.vlmacc0 state}
+    { xm.mkmski tmp, 4                  ; xm.vlmacc0 state                      }
         xm.vstrpv t3, tmp
         li N, 0
-    { nop                                           ;   xm.bru N /* Do N-1 remaining biquads */    }
+    { nop                               ; xm.bru N /* Do N-1 remaining biquads */ }
 
-    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
         xm.vstrpv t3, tmp
-    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
         xm.vstrpv t3, tmp
-    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
         xm.vstrpv t3, tmp
-    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
         xm.vstrpv t3, tmp
-    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
         xm.vstrpv t3, tmp
-    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
         xm.vstrpv t3, tmp
-    {   slli tmp, tmp, 4                         ;   xm.vlmacc0 state}
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
         xm.vstrpv t3, tmp
    
     // Final vstrpv should have written the output to filt->state[0][N]. filt->state should
@@ -134,22 +133,19 @@ lui t3, %hi(vpu_vec_zero)
     lw N,(FILT_N)*4                   ( filter)
     xm.ldw a0,N                        ( state)
 
-
 .L_done:
-    { nop                                           ;   lw s8, 4                          (sp)}
-        xm.lddsp  s3,s2,8
-        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
-
-//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
-.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
-.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
-.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
-.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+    { nop                               ; lw s8, 8                          (sp) }
+        xm.lddsp  s3,s2,0
+        xm.retsp (NSTACKWORDS)*4
+
+//.cc_bottom FUNCTION_NAME.function; 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords; 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores; 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers; 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends; 
 .L_size_end:
     .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
 
 #undef FUNCTION_NAME
 
-
-
 #endif //defined(__VX4B__)
diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S
index 4f175d01..5dce012e 100644
--- a/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S
+++ b/lib_xcore_math/src/arch/vx4b/filter/filter_biquad_sat_s32.S
@@ -23,10 +23,10 @@ int32_t filter_biquad_sat_s32(
 #define FUNCTION_NAME filter_biquad_sat_s32
 
 #define NSTACKVECS      (2)
-#define NSTACKWORDS     (10+2+8*NSTACKVECS)
+#define NSTACKWORDS     (4+8*NSTACKVECS)
 
-#define STACK_TMP_VR    (NSTACKWORDS - 16-2)
-#define STACK_TMP_VD    (NSTACKWORDS -  8-2)
+#define STACK_TMP_VR    (NSTACKWORDS - 16-1)
+#define STACK_TMP_VD    (NSTACKWORDS -  8-1)
 
 #define FILT_N          0
 #define FILT_STATE      1
@@ -36,41 +36,41 @@ int32_t filter_biquad_sat_s32(
 #define STATE_START     10
 
 
-#define state       x10      // ![0x%08X]
-#define sample      x11      // ![%d]
-#define coef        x12      // ![0x%08X]
-#define tmp         x13      // ![%d]
+#define state       a0      // ![0x%08X]
+#define sample      a1      // ![%d]
+#define coef        a2      // ![0x%08X]
+#define tmp         a3      // ![%d]
 #define _32         x18      // ![%d]
 #define _36         x19      // ![%d]
-#define filter      x24     // ![0x%08X]
+#define filter      s8     // ![0x%08X]
     
 .text
-.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.globl FUNCTION_NAME;
 .type FUNCTION_NAME,@function
 .p2align 4
 
 FUNCTION_NAME:
-        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
-        xm.stdsp  s3,s2,8
-    {   li t3, 0                              ;   sw s8, 4                          (sp)}
-    {   mv filter, a0                          ;   xm.vsetc t3}
-    {   xm.ldcu tmp, FILT_STATE + STATE_START       ; nop                                           }
+        xm.entsp (NSTACKWORDS)*4
+        xm.stdsp  s3,s2,0
+    { li t3, 0                          ; sw s8, 8                         (sp) }
+    { mv filter, a0                     ; xm.vsetc t3                           }
+    { xm.ldcu tmp, FILT_STATE + STATE_START ; nop                               }
         sh2add state, tmp, filter          // state <-- &(filter->state[1][1])
-    {   xm.ldcu tmp, FILT_COEF + COEF_START         ;   xm.vclrdr                                  }
+    { xm.ldcu tmp, FILT_COEF + COEF_START ; xm.vclrdr                           }
         sh2add coef, tmp, filter            // coef <-- &(filter->coef[4][0])
 
-    {   li _36, 36                             ;   li _32, 32                             }
+    { li _36, 36                        ; li _32, 32                            }
 
 // Deal with the b2 and -a2 coefficients before b1 and -a1, so we can overwrite them easily.
 
-    {   sub state, state, _36                   ;   xm.vldc state}
-    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
-    {   add state, state, _32                   ;   xm.vldc state}
-    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
-    {   sub state, state, _36                   ;   xm.vldc state}
-    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
-    {   nop                                     ;   xm.vldc state}
-    {   sub coef, coef, _32                     ;   xm.vlmacc0 coef}
+    { sub state, state, _36             ; xm.vldc state                         }
+    { sub coef, coef, _32               ; xm.vlmacc0 coef                       }
+    { add state, state, _32             ; xm.vldc state                         }
+    { sub coef, coef, _32               ; xm.vlmacc0 coef                       }
+    { sub state, state, _36             ; xm.vldc state                         }
+    { sub coef, coef, _32               ; xm.vlmacc0 coef                       }
+    { nop                               ; xm.vldc state                         }
+    { sub coef, coef, _32               ; xm.vlmacc0 coef                       }
 
     // Now acc[k] =  b1[k] * x[n-1][k] + b2[k] * x[n-2][k] - a1[k] * y[n-1][k] - a2[k] * y[n-2][k]
     // state = &(filter->state[0][0])
@@ -81,35 +81,34 @@ FUNCTION_NAME:
 
     // Move filter->state[0][:] to filter->state[1][:]
 
-    {   add t3, state, s3                      ;   xm.vldc state}
-    {   add t3, t3, _32                        ;   xm.vstc t3}
-    {   add tmp, state, _32                    ;   lw s2,(FILT_N)*4                 ( filter)}
-    {   li s3, 6                               ;   lw tmp,0                         ( tmp)}
-    {   li tmp, 6*8                            ;   sw tmp,0                         ( t3)}
+    { add t3, state, s3                 ; xm.vldc state                         }
+    { add t3, t3, _32                   ; xm.vstc t3                            }
+    { add tmp, state, _32               ; lw s2,(FILT_N)*4            ( filter) }
+    { li s3, 6                          ; lw tmp,0                       ( tmp) }
+    { li tmp, 6*8                       ; sw tmp,0                        ( t3) }
         mul N, s2, s3
 
     // Place the newest input sample in state[0][0]
-    {   sub N, tmp, N                           ;   sw sample,0                    ( state)}
+    { sub N, tmp, N                     ; sw sample,0                  ( state) }
 
 #undef sample
-#define zeros x11
+#define zeros a1
 
     // Overwrite state[0][1:9] with 0's
-    lui t3, %hi(vpu_vec_zero)
-        addi t3,t3, %lo(vpu_vec_zero)
-    {   addi zeros, t3, 0                       ;   li _32, 32}
-    {   addi t3, state, 4                       ;   xm.vldc t3}
-    {   nop                                     ;   xm.vstc t3}
+    la t3, vpu_vec_zero
+    { addi zeros, t3, 0                 ; li _32, 32                            }
+    { addi t3, state, 4                 ; xm.vldc t3                            }
+    { nop                               ; xm.vstc t3                            }
 
     // vC[:] <-- coef[b0][:]
-    {  nop                                      ;   xm.vldc coef}
+    { nop                               ; xm.vldc coef                          }
 
 #undef coef
-#define state_p1 x12
+#define state_p1 a2
 
-    {   addi state_p1, t3, 0                    ;   addi t3,sp, (STACK_TMP_VR)*4              }
+    { addi state_p1, t3, 0              ; addi t3,sp, (STACK_TMP_VR)*4          }
 
-    // Every element in x28[0:8] except for x28[0] is zero, so a VLMACC shouldn't affect them.
+    // Every element in t3[0:8] except for t3[0] is zero, so a VLMACC shouldn't affect them.
     // Subsequent VLMACCs will corrupt the accumulators, but The Mask will stop that from being a
     // problem. Smokin'!
 
@@ -126,90 +125,90 @@ FUNCTION_NAME:
     // wouldn't need to recalculate the stack pointer every time, doesn't            _       _
     // matter here as we're not using most of the resourse line instructions anyway   \(`~`)/
 
-    {   xm.mkmski tmp, 4                            ;   xm.vlmacc0 state}
-    {   add t3, t3, _32                             ;   xm.vstr t3}
-    {   nop                                         ;   xm.vstd t3}
+    { xm.mkmski tmp, 4                  ; xm.vlmacc0 state                      }
+    { add t3, t3, _32                   ; xm.vstr t3                            }
+    { nop                               ; xm.vstd t3                            }
         xm.vlsat zeros
         xm.vstrpv state_p1, tmp
 li N, (0)
-    { nop                                           ;   xm.bru N /* Do N-1 remaining biquads */    }
+    { nop                               ; xm.bru N /* Do N-1 remaining biquads */ }
 
-    {   sub t3, t3, _32                             ;   xm.vldd t3}
-    {   nop                                         ;   xm.vldr t3}
-    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
-    {   add t3, t3, _32                             ;   xm.vstr t3}
-    {   nop                                         ;   xm.vstd t3}
+    { sub t3, t3, _32                   ; xm.vldd t3                            }
+    { nop                               ; xm.vldr t3                            }
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
+    { add t3, t3, _32                   ; xm.vstr t3                            }
+    { nop                               ; xm.vstd t3                            }
         xm.vlsat zeros
         xm.vstrpv state_p1, tmp
 
 
-    {   sub t3, t3, _32                             ;   xm.vldd t3}
-    {   nop                                         ;   xm.vldr t3}
-    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
-    {   add t3, t3, _32                             ;   xm.vstr t3}
-    {   nop                                         ;   xm.vstd t3}
+    { sub t3, t3, _32                   ; xm.vldd t3                            }
+    { nop                               ; xm.vldr t3                            }
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
+    { add t3, t3, _32                   ; xm.vstr t3                            }
+    { nop                               ; xm.vstd t3                            }
         xm.vlsat zeros
         xm.vstrpv state_p1, tmp
 
 
-    {   sub t3, t3, _32                             ;   xm.vldd t3}
-    {   nop                                         ;   xm.vldr t3}
-    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
-    {   add t3, t3, _32                             ;   xm.vstr t3}
-    {   nop                                         ;   xm.vstd t3}
+    { sub t3, t3, _32                   ; xm.vldd t3                            }
+    { nop                               ; xm.vldr t3                            }
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
+    { add t3, t3, _32                   ; xm.vstr t3                            }
+    { nop                               ; xm.vstd t3                            }
         xm.vlsat zeros
         xm.vstrpv state_p1, tmp
 
 
-    {   sub t3, t3, _32                             ;   xm.vldd t3}
-    {   nop                                         ;   xm.vldr t3}
-    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
-    {   add t3, t3, _32                             ;   xm.vstr t3}
-    {   nop                                         ;   xm.vstd t3}
+    { sub t3, t3, _32                   ; xm.vldd t3                            }
+    { nop                               ; xm.vldr t3                            }
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
+    { add t3, t3, _32                   ; xm.vstr t3                            }
+    { nop                               ; xm.vstd t3                            }
         xm.vlsat zeros
         xm.vstrpv state_p1, tmp
 
 
-    {   sub t3, t3, _32                             ;   xm.vldd t3}
-    {   nop                                         ;   xm.vldr t3}
-    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
-    {   add t3, t3, _32                             ;   xm.vstr t3}
-    {   nop                                         ;   xm.vstd t3}
+    { sub t3, t3, _32                   ; xm.vldd t3                            }
+    { nop                               ; xm.vldr t3                            }
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
+    { add t3, t3, _32                   ; xm.vstr t3                            }
+    { nop                               ; xm.vstd t3                            }
         xm.vlsat zeros
         xm.vstrpv state_p1, tmp
 
 
-    {   sub t3, t3, _32                             ;   xm.vldd t3}
-    {   nop                                         ;   xm.vldr t3}
-    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
-    {   add t3, t3, _32                             ;   xm.vstr t3}
-    {   nop                                         ;   xm.vstd t3}
+    { sub t3, t3, _32                   ; xm.vldd t3                            }
+    { nop                               ; xm.vldr t3                            }
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
+    { add t3, t3, _32                   ; xm.vstr t3                            }
+    { nop                               ; xm.vstd t3                            }
         xm.vlsat zeros
         xm.vstrpv state_p1, tmp
 
 
-    {   sub t3, t3, _32                             ;   xm.vldd t3}
-    {   nop                                         ;   xm.vldr t3}
-    {   slli tmp, tmp, 4                            ;   xm.vlmacc0 state}
-    {   add t3, t3, _32                             ;   xm.vstr t3}
-    {   nop                                         ;   xm.vstd t3}
+    { sub t3, t3, _32                   ; xm.vldd t3                            }
+    { nop                               ; xm.vldr t3                            }
+    { slli tmp, tmp, 4                  ; xm.vlmacc0 state                      }
+    { add t3, t3, _32                   ; xm.vstr t3                            }
+    { nop                               ; xm.vstd t3                            }
         xm.vlsat zeros
         xm.vstrpv state_p1, tmp
 
     // Final vstrpv should have written the output to filt->state[0][N]. filt->state should
     // still be pointing at filt->state[0][0]
 
-    { nop                                           ;   lw N,(FILT_N)*4                   ( filter)}
+    { nop                               ; lw N,(FILT_N)*4              ( filter) }
     xm.ldw a0,N                        ( state)
-    { nop                                           ;   lw s8, 4                          (sp)}
-        xm.lddsp  s3,s2,8
-        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
-
-//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
-.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
-.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
-.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
-.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+    { nop                               ; lw s8, 8                          (sp) }
+        xm.lddsp  s3,s2,0
+        xm.retsp (NSTACKWORDS)*4
+
+//.cc_bottom FUNCTION_NAME.function; 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords; 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores; 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers; 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends; 
 .L_size_end:
     .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
 
diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S
index cf0ae599..e6edd65c 100644
--- a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S
+++ b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s16.S
@@ -23,8 +23,8 @@ int16_t filter_fir_s16(
 
 #define FUNCTION_NAME filter_fir_s16
 
-#define NSTACKVECS      (2)
-#define NSTACKWORDS     (12+8*NSTACKVECS)
+#define NSTACKVECS      (3)
+#define NSTACKWORDS     (8+8*NSTACKVECS)
 
 #define FILT_N          0
 #define FILT_SHIFT      1
@@ -32,79 +32,91 @@ int16_t filter_fir_s16(
 #define FILT_STATE      3
 
 
-#define STACK_VEC_TMP   (NSTACKWORDS-8)
-#define STACK_VEC_VR    (NSTACKWORDS-16)
+#define STACK_VEC_TMP2  (NSTACKWORDS-8-1)
+#define STACK_VEC_VR    (NSTACKWORDS-16-1)
+#define STACK_VEC_TMP   (NSTACKWORDS-24-1)
 
 #define STACK_FILTER    (8)
 
-#define buff        x10
-#define length      x11
-#define sample      x12
-#define tmpA        x13
+#define buff        a0
+#define length      a1
+#define sample      a2
+#define tmpA        a3
 #define _32         x18
 #define coef        x19
-#define filter      x24
+
+
+#define filter      s8
     
 .text
-.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.globl FUNCTION_NAME;
 .type FUNCTION_NAME,@function
 .p2align 4
 
 FUNCTION_NAME:
-        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
-
-        xm.stdsp  s3,s2,8
-        xm.stdsp  s5,s4,16
-        xm.stdsp  s7,s6,24
-    {   li _32, 32                             ;   sw s8, 4                          (sp)}
-    {   mv filter, a0                          ;   mv sample, a1                          }
-    { nop                                           ;   lw length,(FILT_N)*4              ( filter)}
-    { nop                                           ;   lw buff,(FILT_STATE)*4            ( filter)}
+    xm.entsp (NSTACKWORDS)*4
+
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,0
+
+    { addi s6,sp, (STACK_VEC_TMP2)*4   ; nop                                   }
+    addi s7, s6, (-30)
+
+    { li _32, 32                        ; sw s8, 24                          (sp) }
+    { mv filter, a0                     ; mv sample, a1                         }
+    { nop                               ; lw length,(FILT_N)*4              ( filter) }
+    { nop                               ; lw buff,(FILT_STATE)*4            ( filter) }
         call filter_fir_s16_push_sample_up
-    { nop                                           ;   lw coef,(FILT_COEF)*4             ( filter)}
-    { nop                                           ;   lw buff,(FILT_STATE)*4            ( filter)}
-    { nop                                           ;   lw length,(FILT_N)*4              ( filter)}
-    {   slli t3, _32, 3                         ;   xm.vclrdr                                  }
-    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vsetc t3}
-    {   mv tmpA, length                        ;   xm.vstd t3}
-    {   xm.zexti tmpA, 4                            ;   srli length, length, 4                   }
-    {   slli tmpA, tmpA, 1                       ;   xm.brff length, .L_loop_end                  }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    { nop                               ; lw coef,(FILT_COEF)*4             ( filter) }
+    { nop                               ; lw buff,(FILT_STATE)*4            ( filter) }
+    { nop                               ; lw length,(FILT_N)*4              ( filter) }
+    { slli t3, _32, 3                   ; xm.vclrdr                             }
+    { addi t3,sp, (STACK_VEC_TMP)*4     ; xm.vsetc t3                           }
+    { mv tmpA, length                   ; xm.vstd t3                            }
+    { xm.zexti tmpA, 4                  ; srli length, length, 4                }
+    { slli tmpA, tmpA, 1                ; xm.brff length, .L_loop_end           }
     
     .L_loop_top:
-        {   add buff, buff, _32                     ;   xm.vldc buff}
-        {   addi length, length, -1                   ;   xm.vlmaccr0 coef}
+        { add buff, buff, _32           ; xm.vldc buff                          }
+        { addi length, length, -1       ; xm.vlmaccr0 coef                      }
         xm.vlmaccr1 coef
-        {   add coef, coef, _32                     ;   xm.bt length, .L_loop_top                  }
+        { nop                           ; xm.vstd s6                            }    
+        { nop                           ; xm.vldd s7                            }
+        { nop                           ; xm.vstr s6                            }
+        { nop                           ; xm.vldr s7                            }
+
+        { add coef, coef, _32           ; xm.bt length, .L_loop_top             }
     .L_loop_end:
 
-    {   addi a2,sp, (STACK_VEC_VR)*4               ;   xm.mkmsk tmpA, tmpA                            }
-    {   mv t3, buff                           ;   xm.vstr a2}
-    {   addi t3,sp, (STACK_VEC_TMP)*4             ;   xm.vldr t3}
+    { addi a2,sp, (STACK_VEC_VR)*4      ; xm.mkmsk tmpA, tmpA                   }
+    { mv t3, buff                       ; xm.vstr a2                            }
+    { addi t3,sp, (STACK_VEC_TMP)*4     ; xm.vldr t3                            }
         xm.vstrpv t3, tmpA
-    {   li tmpA, 0                             ;   lw a2,(FILT_SHIFT)*4                  ( filter)}
-    {   addi t3,sp, (STACK_VEC_VR)*4              ;   xm.vldc t3}
-    { nop                                           ;   xm.vldr t3}
-    { nop                                           ;   xm.vlmaccr0 coef}
+    { li tmpA, 0                        ; lw a2,(FILT_SHIFT)*4                  ( filter) }
+    { addi t3,sp, (STACK_VEC_VR)*4      ; xm.vldc t3                            }
+    { nop                               ; xm.vldr t3                            }
+    { nop                               ; xm.vlmaccr0 coef                      }
     xm.vlmaccr1 coef
-    //{ nop                                           ;   xm.vadddr                                      }
-    xm.st16 x12, tmpA(x28)
+
+    xm.st16 a2, tmpA(t3)
 
     xm.vlsat t3
-    { nop                                           ;   xm.vstr t3}
-    xm.ld16s x10, tmpA(x28) 
+    { nop                               ; xm.vstr t3                            }
+    xm.ld16s a0, tmpA(t3) 
 .L_done:
-    { nop                                           ;   lw s8, 4                          (sp)}
-        xm.lddsp  s7,s6,24
+    { nop                               ; lw s8, 24                          (sp) }
+        xm.lddsp  s7,s6,0
         xm.lddsp  s5,s4,16
         xm.lddsp  s3,s2,8
-        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
-
-//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
-.set FUNCTION_NAME.nstackwords,NSTACKWORDS + filter_fir_s16_push_sample_up.nstackwords;      /* Translation error on this line: unexpected token at position 86. */ 
-.global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 33. */ 
-.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
-.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
-.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+        xm.retsp (NSTACKWORDS)*4
+
+//.cc_bottom FUNCTION_NAME.function; 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS + filter_fir_s16_push_sample_up.nstackwords;     
+.global FUNCTION_NAME.nstackwords; 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores; 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers; 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends; 
 .L_size_end:
     .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
 
diff --git a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S
index c06f7123..c0e310c5 100644
--- a/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S
+++ b/lib_xcore_math/src/arch/vx4b/filter/filter_fir_s32.S
@@ -25,7 +25,7 @@ int32_t filter_fir_s32(
 #define FUNCTION_NAME filter_fir_s32
 
 #define NSTACKVECS      (1)
-#define NSTACKWORDS     (12+8*NSTACKVECS)
+#define NSTACKWORDS     (8+8*NSTACKVECS)
 
 #define FILT_N          0
 #define FILT_HEAD       1
@@ -34,28 +34,28 @@ int32_t filter_fir_s32(
 #define FILT_STATE      4
 
 
-#define STACK_VEC_TMP   (NSTACKWORDS-12)
+#define STACK_VEC_TMP   (NSTACKWORDS-8-1)
 
 
-#define filter      x23
-#define sample      x11
+#define filter      s7
+#define sample      a1
 #define tmp1        x18
 #define tmp2        x22    
     
 .text
-.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.globl FUNCTION_NAME;
 .type FUNCTION_NAME,@function
 .p2align 4
 
 FUNCTION_NAME:
-        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
-        xm.stdsp  s3,s2,8
-        xm.stdsp  s5,s4,16
-        xm.stdsp  s7,s6,24
-    {   li t3, 0                              ;   sw s8, 4                          (sp)}
+    xm.entsp (NSTACKWORDS)*4
+    xm.stdsp  s3,s2,8
+    xm.stdsp  s5,s4,16
+    xm.stdsp  s7,s6,0
+    { li t3, 0                          ; sw s8, 24                          (sp) }
 
     // Set VPU mode to 32-bit
-    {   mv filter, a0                          ;   xm.vsetc t3}
+    { mv filter, a0                     ; xm.vsetc t3                           }
 
 
 // The field filter->head points to where the newest sample will go, which is probably somewhere in the middle of the 
@@ -65,139 +65,133 @@ FUNCTION_NAME:
 
 // I'm just going to create two sets of registers, corresponding to each of the two parts. That's what this is.
 
-#define state_A     x10
+#define state_A     a0
 #define state_B     x19
 
-#define N_A         x12    
-#define N_B         x21
+#define N_A         a2    
+#define N_B         s5
 
-#define coef_A      x11
-#define coef_B      x20
+#define coef_A      a1
+#define coef_B      s4
 
     // Get the current head position, which is also the number of taps in part B
-    { nop                                           ;   lw N_B,(FILT_HEAD)*4              ( filter)}
+    { nop                               ; lw N_B,(FILT_HEAD)*4        ( filter) }
 
     // If N_B is currently zero, then the next head is the final index. Otherwise it's just
     // the head decremented by 1.
-    {   addi t3, N_B, -1                         ;   lw N_A,(FILT_N)*4                 ( filter)}
-    { nop                                           ;   xm.bt N_B, .L_no_reset                     }
-    {   addi t3, N_A, -1                         ; nop                                           }
+    { addi t3, N_B, -1                  ; lw N_A,(FILT_N)*4           ( filter) }
+    { nop                               ; xm.bt N_B, .L_no_reset                }
+    { addi t3, N_A, -1                  ; nop                                   }
 
     .L_no_reset:
-    { nop                                           ;   sw t3,(FILT_HEAD)*4              ( filter)}
+    { nop                               ; sw t3,(FILT_HEAD)*4         ( filter) }
 
     // Store the newest sample in the state. And grab the rest of the state/coef/N values
-    { nop                                           ;   lw state_B,(FILT_STATE)*4         ( filter)}
-        sh2add state_A, N_B, state_B
-    {   sub N_A, N_A, N_B                       ;   sw sample,0                  ( state_A)}
-    {   slli tmp1, N_A, 2                        ;   lw coef_A,(FILT_COEF)*4           ( filter)}
-        sh2add coef_B, N_A, coef_A
+    { nop                               ; lw state_B,(FILT_STATE)*4   ( filter) }
+    sh2add state_A, N_B, state_B
+    { sub N_A, N_A, N_B                 ; sw sample,0                ( state_A) }
+    { slli tmp1, N_A, 2                 ; lw coef_A,(FILT_COEF)*4     ( filter) }
+    sh2add coef_B, N_A, coef_A
 #undef sample
 
     // Each part has its own tail. We'll handle both of those first (by masking the state with zeros), then we'll do the 
     // bulk of the work after
 
-    {   addi s8,sp, (STACK_VEC_TMP)*4             ;   xm.vclrdr                                  }
-    {   mv t3, state_A                        ;   xm.vstd s8}
-    {   xm.zexti tmp1, 5                            ;   xm.vldr t3}
-    {   xm.mkmsk t3, tmp1                         ;   srli N_A, N_A, 3                         }
-        xm.vstrpv s8, t3
-    {   mv t3, state_B                        ;   xm.vldc s8}
-    {   slli tmp2, N_B, 2                        ;   xm.vldr t3}
-    {   xm.zexti tmp2, 5                            ;   xm.vstd s8}
-    {   xm.mkmsk t3, tmp2                         ;   srli N_B, N_B, 3                         }
-        xm.vstrpv s8, t3
-    {   add state_A, state_A, tmp1              ;   xm.vclrdr                                  }
-    {   add coef_A, coef_A, tmp1                ;   xm.vlmaccr0 coef_A}
-    {   add state_B, state_B, tmp2              ;   xm.vldc s8}
-    {   add coef_B, coef_B, tmp2                ;   xm.vlmaccr0 coef_B}
+    { addi s8,sp, (STACK_VEC_TMP)*4     ; xm.vclrdr                             }
+    { mv t3, state_A                    ; xm.vstd s8                            }
+    { xm.zexti tmp1, 5                  ; xm.vldr t3                            }
+    { xm.mkmsk t3, tmp1                 ; srli N_A, N_A, 3                      }
+     xm.vstrpv s8, t3
+    { mv t3, state_B                    ; xm.vldc s8                            }
+    { slli tmp2, N_B, 2                 ; xm.vldr t3                            }
+    { xm.zexti tmp2, 5                  ; xm.vstd s8                            }
+    { xm.mkmsk t3, tmp2                 ; srli N_B, N_B, 3                      }
+    xm.vstrpv s8, t3
+    { add state_A, state_A, tmp1        ; xm.vclrdr                             }
+    { add coef_A, coef_A, tmp1          ; xm.vlmaccr0 coef_A                    }
+    { add state_B, state_B, tmp2        ; xm.vldc s8                            }
+    { add coef_B, coef_B, tmp2          ; xm.vlmaccr0 coef_B                    }
 
 // Now, go back through and do full vectors.
 
 #undef tmp2
 #define _32     x22
 
-    tail .L_part_A_start
     .p2align 4
     .L_part_A_start:
-        {   li _32, 32                             ;   xm.brff N_A, .L_part_A_end                   }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        { li _32, 32                    ; xm.brff N_A, .L_part_A_end            }
         .L_part_A_loop_top:
-            {   add state_A, state_A, _32               ;   xm.vldc state_A}
-            {   addi N_A, N_A, -1                         ;   xm.vlmaccr0 coef_A}
-            {   add coef_A, coef_A, _32                 ;   xm.bt N_A, .L_part_A_loop_top              }
+            { add state_A, state_A, _32 ; xm.vldc state_A                       }
+            { addi N_A, N_A, -1         ; xm.vlmaccr0 coef_A                    }
+            { add coef_A, coef_A, _32   ; xm.bt N_A, .L_part_A_loop_top         }
     .L_part_A_end:
 #undef state_A
 #undef N_A
 #undef coef_A
 
     .L_part_B_start:
-        {   li _32, 32                             ;   xm.brff N_B, .L_part_B_end                   }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        { li _32, 32                    ; xm.brff N_B, .L_part_B_end            }
         .L_part_B_loop_top:
-            {   add state_B, state_B, _32               ;   xm.vldc state_B}
-            {   addi N_B, N_B, -1                         ;   xm.vlmaccr0 coef_B}
-            {   add coef_B, coef_B, _32                 ;   xm.bt N_B, .L_part_B_loop_top              }
+            { add state_B, state_B, _32 ; xm.vldc state_B                       }
+            { addi N_B, N_B, -1         ; xm.vlmaccr0 coef_B                    }
+            { add coef_B, coef_B, _32   ; xm.bt N_B, .L_part_B_loop_top         }
     .L_part_B_end:
     
 #undef state_B
 #undef N_B
 #undef coef_B
 
-// Now combine the 40-bit accumulators, assumes that x24 points to the stack.
+// Now combine the 40-bit accumulators, assumes that s8 points to the stack.
 // (the logic for this is a too complicated to explain here)
-lui t3, %hi(vpu_vec_0x40000000)
-        addi t3,t3, %lo(vpu_vec_0x40000000)
-    { nop                                           ;   lw a2,(FILT_SHIFT)*4              ( filter)}
-    {   addi s2, a2, -1                           ;   xm.vldc t3}
-    {   li s3, 1                               ;   xm.vstr s8}
-lui t3, %hi(vpu_vec_0x80000000)
-        addi t3,t3, %lo(vpu_vec_0x80000000)
-    {   xm.shl s2, s3, s2                          ;   xm.vlmacc0 t3}
-lui t3, %hi(vpu_vec_zero)
-        addi t3,t3, %lo(vpu_vec_zero)
-    {   li t3, 0                              ;   xm.vldr t3}
-    {   xm.slt a3, t3, a2                         ;   xm.vlmaccr0 s8}
-
-    { nop ; xm.vstd x24}
-    { xm.neg x20, x12 ; nop}
-//{   neg s4, a2                              ;   vstd s8}"
-    {   addi s4, s4, 1                           ;   xm.vlmaccr0 s8}
-    { nop                                           ;   xm.vstr s8}
-
-// x11 and x10 will contain a 64-bit result. Left or right-shift that as appropriate.
-
-        xm.lddi  a1,a0, 0(s8)
-    {   addi a1, a1, 8                           ;   xm.brff a3, .L_left_shift                    }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+    la t3, vpu_vec_0x40000000
+    { nop                               ; lw a2,(FILT_SHIFT)*4        ( filter) }
+    { addi s2, a2, -1                   ; xm.vldc t3                            }
+    { li s3, 1                          ; xm.vstr s8                            }
+    la t3, vpu_vec_0x80000000
+    { xm.shl s2, s3, s2                 ; xm.vlmacc0 t3                         }
+    la t3, vpu_vec_zero
+    { li t3, 0                          ; xm.vldr t3                            }
+    { xm.slt a3, t3, a2                 ; xm.vlmaccr0 s8                        }
+    { nop                               ; xm.vstd s8                           }
+    { xm.neg s4, a2                   ; nop                                   }
+    { addi s4, s4, 1                    ; xm.vlmaccr0 s8                        }
+    { nop                               ; xm.vstr s8                            }
+
+// a1 and a0 will contain a 64-bit result. Left or right-shift that as appropriate.
+
+    xm.lddi  a1,a0, 0(s8)
+    { addi a1, a1, 8                    ; xm.brff a3, .L_left_shift             }
 
     .L_right_shift:
-        // (from the block above):  x19 = 1, x18 = 1<<(x12 - 1)
-        // adding x18*x19 (=x18) to x11:x10 effectively rounds it when we extract it.
+        // (from the block above):  x19 = 1, x18 = 1<<(a2 - 1)
+        // adding x18*x19 (=x18) to a1:a0 effectively rounds it when we extract it.
         xm.maccs a1, a0, s2, s3
         xm.lsats a1, a0, a2
         xm.lextract a0, a1, a0, a2, 32
-    { nop                                           ;   xm.bu .L_done                              }
+    { nop                               ; xm.bu .L_done                         }
 
     .L_left_shift:
-        // (from the block above):  x19 = 1, x20 = -x12 + 1, x28 = 0
+        // (from the block above):  x19 = 1, s4 = -a2 + 1, t3 = 0
         // If we're left-shifting (or zero-shifting), we still need to saturate to q31.
         // lsats has a bug which doesn't allow to use it with 0, so we'll have to 
         // add 1 to our shift, left-shift, saturate and extract with 1, no need to round here.
-    {   xm.shl a1, a1, s4                          ; nop                                           }
-        xm.linsert a1, t3, a0, s4, 32
-        xm.lsats a1, t3, s3
-        xm.lextract a0, a1, t3, s3, 32
+    { xm.shl a1, a1, s4                 ; nop                                   }
+    xm.linsert a1, t3, a0, s4, 32
+    xm.lsats a1, t3, s3
+    xm.lextract a0, a1, t3, s3, 32
 
 .L_done:
-        xm.lddsp  s7,s6,24
+        xm.lddsp  s7,s6,0
         xm.lddsp  s5,s4,16
         xm.lddsp  s3,s2,8
-    { nop                                           ;   lw s8, 4                          (sp)}
-        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
-
-//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
-.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
-.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
-.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
-.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+    { nop                               ; lw s8, 24                          (sp) }
+        xm.retsp (NSTACKWORDS)*4
+
+//.cc_bottom FUNCTION_NAME.function; 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords; 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores; 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers; 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends; 
 .L_size_end:
     .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
 
diff --git a/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S b/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S
index 96235de8..514fb590 100644
--- a/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S
+++ b/lib_xcore_math/src/arch/vx4b/filter/push_sample_down_s16.S
@@ -20,100 +20,100 @@ void filter_fir_s16_push_sample_down(
 #define FUNCTION_NAME filter_fir_s16_push_sample_down
 
 #define NSTACKVECS      (1)
-#define NSTACKWORDS     (12+8*NSTACKVECS)
+#define NSTACKWORDS     (8+8*NSTACKVECS)
 
 
 
-#define STACK_VEC_TMP   (NSTACKWORDS-8)
+#define STACK_VEC_TMP   (NSTACKWORDS-8-1)
 
 
-#define buff        x10
-#define length      x11
-#define value       x12
-#define _60         x13
+#define buff        a0
+#define length      a1
+#define value       a2
+#define _60         a3
 #define mask        x18
 #define tail_start  x19
-#define buff_end    x20
-#define buffD       x21
-#define tmp         x24
+#define buff_end    s4
+#define buffD       s5
+#define tmp         s8
     
 .text
-.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.globl FUNCTION_NAME;
 .type FUNCTION_NAME,@function
 .p2align 4
 
 FUNCTION_NAME:
-        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.entsp (NSTACKWORDS)*4
         xm.stdsp  s3,s2,8
         xm.stdsp  s5,s4,16
-        xm.stdsp  s7,s6,24
-    {   li a3, 32                              ;   sw s8, 4                          (sp)}
+        xm.stdsp  s7,s6,0
+    { li a3, 32                         ; sw s8, 24                          (sp) }
 
-    {   slli t3, a3, 3                          ;   li mask, 28 /*28 samples at a time*/   }
-    {   xm.mkmsk mask, mask                        ;   xm.vsetc t3}
+    { slli t3, a3, 3                    ; li mask, 28 /*28 samples at a time*/  }
+    { xm.mkmsk mask, mask               ; xm.vsetc t3                           }
 
     // We're going to be moving 28 samples per loop iteration. The last address at which we 
     // can move 28 samples is  56 bytes before the end of the buffer. The end of the buffer is
     // at  buff + 2*length. 
     
-    {   slli tail_start, length, 1               ;   li t3, 56                             }
-    {   add buff_end, buff, tail_start          ;   slli mask, mask, 4                       }
-    {   sub tail_start, buff_end, t3           ;   addi _60, t3, 4                         }
+    { slli tail_start, length, 1        ; li t3, 56                             }
+    { add buff_end, buff, tail_start    ; slli mask, mask, 4                    }
+    { sub tail_start, buff_end, t3      ; addi _60, t3, 4                       }
 
-    {   mv t3, buff                           ;   xm.sltu tmp, tail_start, buff               }
-    {   li tmp, 28                             ;   xm.bt tmp, .L_loop_end                     }
-    {   add buffD, buff, tmp                    ;   xm.bu .L_loop_top                          }
+    { mv t3, buff                       ; xm.sltu tmp, tail_start, buff         }
+    { li tmp, 28                        ; xm.bt tmp, .L_loop_end                }
+    { add buffD, buff, tmp              ; xm.bu .L_loop_top                     }
 
     .p2align 4 // Does this loop have an FNOP after the first iteration? It all fits in the instruction buffer..
     .L_loop_top:
-        {   addi buff, t3, -4                        ;   xm.vldr t3}
-        {   add t3, buff, _60                      ;   xm.vldd buffD}
-        {   addi buffD, buffD, -4                     ;   xm.vlmaccr0 buff}
+        { addi buff, t3, -4             ; xm.vldr t3                            }
+        { add t3, buff, _60             ; xm.vldd buffD                         }
+        { addi buffD, buffD, -4         ; xm.vlmaccr0 buff                      }
         xm.vlmaccr1 buff
-        {   xm.sltu tmp, tail_start, t3                ;   xm.vstd buffD}
+        { xm.sltu tmp, tail_start, t3   ; xm.vstd buffD                         }
             xm.vstrpv buff , mask
-        {   add buffD, buffD, _60                   ;   xm.bt tmp, .L_loop_end   } 
-        {nop ;   xm.bu .L_loop_top }             /* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+        { add buffD, buffD, _60         ; xm.bt tmp, .L_loop_end                } 
+        { nop                           ; xm.bu .L_loop_top                     }             
     .L_loop_end:
 
 #undef _60
 
-    // x28 holds the address of the next sample to be moved.
-    {   sub length, buff_end, t3               ;   li tmp, 29                         }
-    {   xm.sltu tmp, length, tmp                    ;   li a3, 28                          }
-    { nop                                           ;   xm.bt tmp, .L_skippp                   }
-    {   addi buff, t3, -4                        ;   xm.vldr t3}
-    {   nop                        ;   xm.vlmaccr0 t3}
+    // t3 holds the address of the next sample to be moved.
+    { sub length, buff_end, t3          ; li tmp, 29                            }
+    { xm.sltu tmp, length, tmp          ; li a3, 28                             }
+    { nop                               ; xm.bt tmp, .L_skippp                  }
+    { addi buff, t3, -4                 ; xm.vldr t3                            }
+    { nop                               ; xm.vlmaccr0 t3                        }
     xm.vlmaccr1 t3
-    {   add t3, t3, a3                        ;   nop}
+    { add t3, t3, a3                    ; nop                                   }
         xm.vstrpv buff, mask
 
     
     .L_skippp:
-    {   sub length, buff_end, t3               ; nop                                       }
-    {   li a3, 0                               ;   xm.vldr t3}
-    {   xm.mkmsk tmp, length                       ;   addi buff, t3, -4                    }
-    {   nop                   ;   xm.vlmaccr0 t3}   
+    { sub length, buff_end, t3          ; nop                                   }
+    { li a3, 0                          ; xm.vldr t3                            }
+    { xm.mkmsk tmp, length              ; addi buff, t3, -4                     }
+    { nop                               ; xm.vlmaccr0 t3                        }   
      xm.vlmaccr1 t3            
-    {   add t3, t3, length                    ;   nop}
-    {   addi t3, t3, -2                         ;   slli tmp, tmp, 4                     }
+    { add t3, t3, length                ; nop                                   }
+    { addi t3, t3, -2                   ; slli tmp, tmp, 4                      }
         xm.vstrpv buff, tmp
 
         xm.st16 value,  a3(t3)
         //xm.st16 value,  t3(a3)
 
 .L_done:
-        xm.lddsp  s7,s6,24
+        xm.lddsp  s7,s6,0
         xm.lddsp  s5,s4,16
         xm.lddsp  s3,s2,8
-    { nop                                           ;   lw s8, 4                          (sp)}
-        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
-
-//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
-.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
-.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
-.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
-.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+    { nop                               ; lw s8, 24                          (sp) }
+        xm.retsp (NSTACKWORDS)*4
+
+//.cc_bottom FUNCTION_NAME.function; 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords; 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores; 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers; 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends; 
 .L_size_end:
     .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
 
diff --git a/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S b/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S
index c7073b05..ffdeb33e 100644
--- a/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S
+++ b/lib_xcore_math/src/arch/vx4b/filter/push_sample_up_s16.S
@@ -20,45 +20,45 @@ void filter_fir_s16_push_sample_up(
 #define FUNCTION_NAME filter_fir_s16_push_sample_up
 
 #define NSTACKVECS      (1)
-#define NSTACKWORDS     (12+8*NSTACKVECS)
+#define NSTACKWORDS     (8+8*NSTACKVECS)
 
 
 
-#define STACK_VEC_TMP   (NSTACKWORDS-8)
+#define STACK_VEC_TMP   (NSTACKWORDS-8-1)
 
 
-#define buff_start  x10
-#define length      x11
-#define value       x12
-#define tmpB        x13
+#define buff_start  a0
+#define length      a1
+#define value       a2
+#define tmpB        a3
 #define mask        x18
 #define buffR       x19
-#define tmpC        x20
-#define buffD       x21
-#define tmp         x24
+#define tmpC        s4
+#define buffD       s5
+#define tmp         s8
     
 .text
-.globl FUNCTION_NAME; /* Translation error on this line: unexpected token at position 20. */ 
+.globl FUNCTION_NAME;
 .type FUNCTION_NAME,@function
 .p2align 4
 
 FUNCTION_NAME:
-        xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+        xm.entsp (NSTACKWORDS)*4
         xm.stdsp  s3,s2,8
         xm.stdsp  s5,s4,16
-        xm.stdsp  s7,s6,24
-    {   li tmpB, 32                            ;   sw s8, 4                          (sp)}
+        xm.stdsp  s7,s6,0
+    { li tmpB, 32                       ; sw s8, 24                          (sp) }
 
-    {   slli t3, tmpB, 3                        ;   xm.mkmski mask, 32                          }
-    {   mv tmp, length                         ;   xm.vsetc t3}
+    { slli t3, tmpB, 3                  ; xm.mkmski mask, 32                    }
+    { mv tmp, length                    ; xm.vsetc t3                           }
 
 // If the number of samples is odd, pretend it was one larger. If it's even, move the
 // final sample without the VPU.
 
     xm.zexti tmp, 1
     xm.eq buffR, length, 1 
-    {   add length, length, tmp                 ;   xm.bt buffR, .L_write_new_sample           }
-    {   addi tmp, length, -2                      ;   xm.bt tmp, .L_odd_samps                    }
+    { add length, length, tmp           ; xm.bt buffR, .L_write_new_sample      }
+    { addi tmp, length, -2              ; xm.bt tmp, .L_odd_samps               }
 .L_even_samps:
    // xm.ld16s buffD, buff_start(tmp)
     xm.ld16s buffD, tmp(buff_start)
@@ -67,78 +67,78 @@ FUNCTION_NAME:
     xm.st16 buffD, tmp(buff_start)
 .L_odd_samps:
 
-    {   slli mask, mask, 4                       ;   slli length, length, 1                   }
+    { slli mask, mask, 4                ; slli length, length, 1                }
 
 // buffR <-- first byte after buff[]
 // mask <-- 0xFFFFFFF0
-    {   add buffR, buff_start, length           ; nop                                           }
+    { add buffR, buff_start, length     ; nop                                   }
 
 // Move buffD and buffR to point to:
-    {   sub buffR, buffR, tmpB                  ;   li tmpB, 28                            }
-    {   sub buffD, buffR, tmpB                  ;   srli mask, mask, 2                       }
+    { sub buffR, buffR, tmpB            ; li tmpB, 28                           }
+    { sub buffD, buffR, tmpB            ; srli mask, mask, 2                    }
 
 // If (buffD < buff_start) then skip the loop.
-    {   mv t3, buffR                          ;   xm.sltu tmp, buffD, buff_start              }
-    {   li tmpB, 56                            ;   xm.bt tmp, .L_loop_end                     }
-    { nop                                           ;   xm.bu .L_loop_top                          }
+    { mv t3, buffR                      ; xm.sltu tmp, buffD, buff_start        }
+    { li tmpB, 56                       ; xm.bt tmp, .L_loop_end                }
+    { nop                               ; xm.bu .L_loop_top                     }
 
 // Do the loop. Align to 16 bytes so that we hopefully don't have FNOPs after the first
 // iteration.
     .p2align 4
     .L_loop_top:
-        {   mv buffR, buffD                        ;   xm.vldr t3}
-        {   sub buffD, buffD, tmpB                  ;   xm.vldd buffD}
-        {   xm.sltu tmp, buffD, buff_start              ;   xm.vlmaccr0 t3}
+        { mv buffR, buffD               ; xm.vldr t3                            }
+        { sub buffD, buffD, tmpB        ; xm.vldd buffD                         }
+        { xm.sltu tmp, buffD, buff_start ; xm.vlmaccr0 t3                        }
         xm.vlmaccr1 t3
         xm.vstrpv t3, mask
-        { nop                                           ;   xm.vstd buffR}
-      //  {   sub t3, t3, tmpB                      ;   xm.brff tmp, .L_loop_top                     }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
-         {   sub t3, t3, tmpB                      ;   xm.bt tmp, .L_loop_end        }
-         {xm.bu .L_loop_top ;nop}
+        { nop                           ; xm.vstd buffR                         }
+      //  {   sub t3, t3, tmpB                      ;   xm.brff tmp, .L_loop_top                     }
+         { sub t3, t3, tmpB             ; xm.bt tmp, .L_loop_end                }
+         { xm.bu .L_loop_top            ; nop                                   }
     .L_loop_end:
 
 
-    // If (x28 < buff_start ) we CANNOT do another vector (just vR[]) using the same
+    // If (t3 < buff_start ) we CANNOT do another vector (just vR[]) using the same
     // mask. Otherwise, we can.
 
-    {   xm.sltu tmp, t3, buff_start                ; nop                                           }
-    {   mv buffR, t3                          ;   xm.bt tmp, .L_skippp                       }
-    {   li tmpB, 28                            ;   xm.vldr t3}
-    {   sub t3, t3, tmpB                     ;   xm.vlmaccr0 buffR}
+    { xm.sltu tmp, t3, buff_start       ; nop                                   }
+    { mv buffR, t3                      ; xm.bt tmp, .L_skippp                  }
+    { li tmpB, 28                       ; xm.vldr t3                            }
+    { sub t3, t3, tmpB                  ; xm.vlmaccr0 buffR                     }
     xm.vlmaccr1 buffR
     xm.vstrpv buffR, mask
 
 .L_skippp:
     // Now we have less than 1 vector (14 samples) to shift. They'll be at the end of
-    // the vector when we load x28. Everything after buff_start.
+    // the vector when we load t3. Everything after buff_start.
 
-    {   sub length, buff_start, t3             ;   xm.mkmski tmpC, 2                           }
-    {   xm.mkmski mask, 32                          ;   xm.bitrev tmpC, tmpC                       }
+    { sub length, buff_start, t3        ; xm.mkmski tmpC, 2                     }
+    { xm.mkmski mask, 32                ; xm.bitrev tmpC, tmpC                  }
     
-    { xm.shl mask, mask, length; xm.vldr t3}
+    { xm.shl mask, mask, length         ; xm.vldr t3                            }
 
     xm.andnot mask, tmpC 
-    {nop; xm.vlmaccr0 x28                          }
-    xm.vlmaccr1 x28   
+    { nop                               ; xm.vlmaccr0 t3                       }
+    xm.vlmaccr1 t3   
 
     xm.vstrpv t3, mask
 
 .L_write_new_sample:
-    {   li tmpC, 0                             ; nop                                           }
+    { li tmpC, 0                        ; nop                                   }
    // xm.st16 value, buff_start(tmpC)
      xm.st16 value, tmpC(buff_start)
 .L_done:
-        xm.lddsp  s7,s6,24
+        xm.lddsp  s7,s6,0
         xm.lddsp  s5,s4,16
         xm.lddsp  s3,s2,8
-    { nop                                           ;   lw s8, 4                          (sp)}
-        xm.retsp (NSTACKWORDS)*4/* Multiple XAT warnings: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in retsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate", 'RETSP operand may need scaling' */
-
-//.cc_bottom FUNCTION_NAME.function;  /* Translation error on this line: unexpected token at position 33. */ 
-.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords;  /* Translation error on this line: unexpected token at position 42. */ 
-.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores;  /* Translation error on this line: unexpected token at position 29. */ 
-.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers;  /* Translation error on this line: unexpected token at position 30. */ 
-.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends;  /* Translation error on this line: unexpected token at position 32. */ 
+    { nop                               ; lw s8, 24                          (sp) }
+        xm.retsp (NSTACKWORDS)*4
+
+//.cc_bottom FUNCTION_NAME.function; 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS;     .global FUNCTION_NAME.nstackwords; 
+.set FUNCTION_NAME.maxcores,1;                  .global FUNCTION_NAME.maxcores; 
+.set FUNCTION_NAME.maxtimers,0;                 .global FUNCTION_NAME.maxtimers; 
+.set FUNCTION_NAME.maxchanends,0;               .global FUNCTION_NAME.maxchanends; 
 .L_size_end:
     .size FUNCTION_NAME, .L_size_end - FUNCTION_NAME
 
diff --git a/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S b/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S
index 49ee937e..1228631e 100644
--- a/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S
+++ b/lib_xcore_math/src/arch/vx4b/filter/vect_s32_convolve_valid.S
@@ -18,70 +18,70 @@ headroom_t vect_s32_convolve_valid(
 // #include "../asm_helper.h"
 
 #define NSTACKVECTS     (2)
-#define NSTACKWORDS     (16 + 8*NSTACKVECTS+4)
+#define NSTACKWORDS     (8 + 8*NSTACKVECTS+4)
 
 #define FUNCTION_NAME   vect_s32_convolve_valid
 
-#define STACK_VEC_TMP   (NSTACKWORDS-8-4)
+#define STACK_VEC_TMP   (NSTACKWORDS-8-2)
 
 
-#define sig_out     x10
-#define sig_in      x11
-#define filter      x12
-#define len         x13
+#define sig_out     a0
+#define sig_in      a1
+#define filter      a2
+#define len         a3
 
 #define tmpA        x18
 #define _32         x19
-#define vec_tmp     x20
-#define tmpB        x21
+#define vec_tmp     s4
+#define tmpB        s5
 
 
 #define P           filter    // P = (filter_taps >> 1)
 
 
 
-.text; .issue_mode dual /* Translation error on this line: unexpected token at position 5. */ 
+.text; .issue_mode dual
 .p2align 2
 
 
 FUNCTION_NAME:
 
-    xm.entsp (NSTACKWORDS)*4/* XAT Warning: "Falling back on assumption: the int < 253 for the integer value of the item at position 0 in the instruction's operands in dualentsp NSTACKWORDS\nMessage: 0th operand fits in 6 bit unsigned immediate" */
+    xm.entsp (NSTACKWORDS)*4
     xm.stdsp  s3,s2,8
     xm.stdsp  s5,s4,16
     xm.stdsp  s7,s6,24
 
   ////// Set mode to 32-bit
-  { li t3, 0                                ; sw s8, 4                            (sp)}
-  { addi vec_tmp,sp, (STACK_VEC_TMP)*4           ; xm.vsetc t3}
+  { li t3, 0                            ; sw s8, 4                            (sp) }
+  { addi vec_tmp,sp, (STACK_VEC_TMP)*4  ; xm.vsetc t3                           }
   
   ////// Move the filter coefficients into vC[]
   mv tmpB, a4
-  { mv t3, filter                           ; nop}
-  { slli tmpA, tmpB, 2                         ; xm.vclrdr                                    }
-  { xm.mkmsk tmpA, tmpA                          ; xm.vstd vec_tmp}
-  { srli P, tmpB, 1                            ; xm.vldr t3}
+  { mv t3, filter                       ; nop                                   }
+  { slli tmpA, tmpB, 2                  ; xm.vclrdr                             }
+  { xm.mkmsk tmpA, tmpA                 ; xm.vstd vec_tmp                       }
+  { srli P, tmpB, 1                     ; xm.vldr t3                            }
     xm.vstrpv vec_tmp, tmpA
-  { sub len, len, P                          ; xm.vldc vec_tmp}
-  { sub len, len, P                          ; li _32, 32                               }
+  { sub len, len, P                     ; xm.vldc vec_tmp                       }
+  { sub len, len, P                     ; li _32, 32                            }
   
   // Number of output elements is  sig_in_length - (2 * (filter_taps >> 1)) = sig_in_length - 2*P
 
-  { srli t3, len, 3                           ; add sig_in, sig_in, _32                   }
-  { addi sig_in, sig_in, -4                     ; xm.brff t3, .L_loop_bot                       }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { srli t3, len, 3                     ; add sig_in, sig_in, _32               }
+  { addi sig_in, sig_in, -4             ; xm.brff t3, .L_loop_bot               }
 
   .L_loop_top:
-    { addi len, len, -8                           ; xm.vclrdr                                    }
-    { addi t3, sig_in, -4                        ; xm.vlmaccr0 sig_in}
-    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
-    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
-    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
-    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
-    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
-    { addi t3, t3, -4                           ; xm.vlmaccr0 t3}
-    { srli t3, len, 3                           ; xm.vlmaccr0 t3}
-    { add sig_in, sig_in, _32                   ; xm.vstr sig_out}
-    { add sig_out, sig_out, _32                 ; xm.bt t3, .L_loop_top                       }
+    { addi len, len, -8                 ; xm.vclrdr                             }
+    { addi t3, sig_in, -4               ; xm.vlmaccr0 sig_in                    }
+    { addi t3, t3, -4                   ; xm.vlmaccr0 t3                        }
+    { addi t3, t3, -4                   ; xm.vlmaccr0 t3                        }
+    { addi t3, t3, -4                   ; xm.vlmaccr0 t3                        }
+    { addi t3, t3, -4                   ; xm.vlmaccr0 t3                        }
+    { addi t3, t3, -4                   ; xm.vlmaccr0 t3                        }
+    { addi t3, t3, -4                   ; xm.vlmaccr0 t3                        }
+    { srli t3, len, 3                   ; xm.vlmaccr0 t3                        }
+    { add sig_in, sig_in, _32           ; xm.vstr sig_out                       }
+    { add sig_out, sig_out, _32         ; xm.bt t3, .L_loop_top                 }
   .L_loop_bot:
 
 // If there is a tail, then len will be non-zero.
@@ -90,36 +90,36 @@ FUNCTION_NAME:
 // by definition, be fewer than 8 elements.  So sig_in[] needs to be offset:
 //  sig_in <-- sig_in - 4*(8 - len) = sig_in - 32 + 4*len
 
-  { slli len, len, 2                           ; xm.brff len, .L_finish                         }/* XAT Warning: 'Instruction xm.brff can only branch forwards; this branch may need revising' */
+  { slli len, len, 2                    ; xm.brff len, .L_finish                }
 
-  { sub sig_in, sig_in, _32                   ; xm.vclrdr                                    }
-  { xm.mkmsk tmpA, len                           ; add sig_in, sig_in, len                   }
+  { sub sig_in, sig_in, _32             ; xm.vclrdr                             }
+  { xm.mkmsk tmpA, len                  ; add sig_in, sig_in, len               }
 
   .L_tail_loop:
-    { addi len, len, -4                         ; xm.vlmaccr0 sig_in}
-    { addi sig_in, sig_in, -4                   ; xm.bt len, .L_tail_loop                      }
+    { addi len, len, -4                 ; xm.vlmaccr0 sig_in                    }
+    { addi sig_in, sig_in, -4           ; xm.bt len, .L_tail_loop               }
   .L_tail_loop_bot:
 
     xm.vstrpv sig_out, tmpA
-  { nop                                           ; xm.vstr vec_tmp}
+  { nop                                 ; xm.vstr vec_tmp                       }
 
 .L_finish:
         xm.lddsp  s3,s2,8
         xm.lddsp  s5,s4,16
         xm.lddsp  s7,s6,24
-    {   li a0, 31                              ;   xm.vgetc t3}
-    {   xm.zexti t3, 5                             ;   lw s8, 4                          (sp)}
-    {   sub a0, a0, t3                         ;   xm.retsp (NSTACKWORDS)*4                       } 
+    { li a0, 31                         ; xm.vgetc t3                           }
+    { xm.zexti t3, 5                    ; lw s8, 4                          (sp) }
+    { sub a0, a0, t3                    ; xm.retsp (NSTACKWORDS)*4              } 
     
 .L_func_end:
 
 
 .global FUNCTION_NAME
 .type FUNCTION_NAME,@function
-.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords /* Translation error on this line: unexpected token at position 42. */ 
-.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores /* Translation error on this line: unexpected token at position 29. */ 
-.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers /* Translation error on this line: unexpected token at position 30. */ 
-.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends /* Translation error on this line: unexpected token at position 32. */ 
+.set FUNCTION_NAME.nstackwords,NSTACKWORDS; .global FUNCTION_NAME.nstackwords
+.set FUNCTION_NAME.maxcores,1;              .global FUNCTION_NAME.maxcores
+.set FUNCTION_NAME.maxtimers,0;             .global FUNCTION_NAME.maxtimers
+.set FUNCTION_NAME.maxchanends,0;           .global FUNCTION_NAME.maxchanends
 .size FUNCTION_NAME, .L_func_end - FUNCTION_NAME
 
 
diff --git a/tests/filter_tests/src/filter/test_filter_fir_s16.c b/tests/filter_tests/src/filter/test_filter_fir_s16.c
index 26f7ce36..289433d8 100644
--- a/tests/filter_tests/src/filter/test_filter_fir_s16.c
+++ b/tests/filter_tests/src/filter/test_filter_fir_s16.c
@@ -56,7 +56,11 @@ TEST(filter_fir_s16, case0)
         for(int i = 0; i < 20; i++){
             exp += N;  // old sample (i) leaves as new sample (N+i) comes in.
             int16_t res = filter_fir_s16(&filter, N+i);
-            TEST_ASSERT_EQUAL(exp, res);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(1, exp, res);
+            #else
+                TEST_ASSERT_EQUAL(exp, res);
+            #endif
         }
 
     }
@@ -92,7 +96,11 @@ TEST(filter_fir_s16, case1)
             int16_t exp = N*(N-1) / 2;
 
             int16_t res = filter_fir_s16(&filter, 1);
-            TEST_ASSERT_EQUAL(exp, res);
+            #if defined(__VX4B__)
+                TEST_ASSERT_INT16_WITHIN(1, exp, res);
+            #else
+                TEST_ASSERT_EQUAL(exp, res);
+            #endif
         }
 
     }
@@ -169,7 +177,11 @@ TEST(filter_fir_s16, case2)
         // Apply the filter
         int16_t res = filter_fir_s16(&filter, new_sample);
 
-        TEST_ASSERT_EQUAL_MESSAGE(expected16, res, msg_buff);
+        #if defined(__VX4B__)
+            TEST_ASSERT_INT16_WITHIN_MESSAGE(2, expected16, res, msg_buff);
+        #else
+            TEST_ASSERT_EQUAL_MESSAGE(expected16, res, msg_buff);
+        #endif
     }
 
 }
diff --git a/tests/filter_tests/src/filter/test_filter_fir_s32.c b/tests/filter_tests/src/filter/test_filter_fir_s32.c
index 2b961aac..d059b957 100644
--- a/tests/filter_tests/src/filter/test_filter_fir_s32.c
+++ b/tests/filter_tests/src/filter/test_filter_fir_s32.c
@@ -307,7 +307,7 @@ TEST(filter_fir_s32, case3)
 
         int32_t expected32;
 
-        if(filter.shift >= 0){
+        if(filter.shift > 0){
             expected32 = (int32_t) ((expected64 + (1LL << (filter.shift-1))) >> filter.shift);
         } else {
             expected32 = (int32_t) (expected64 << -filter.shift);