#***************************************************************************/
#*                                                                         */
#* DESCRIPTION:                                                            */
#*                                                                         */
#* MPC57xx digital signal processing on LSP instruction set                */
#*=========================================================================*/
#*                                                                         */
#* REV      AUTHOR         DATE       DESCRIPTION OF CHANGE                */
#* ---   -----------     ----------   ---------------------                */
#* 0.1   Karl-F. Leiss   03/Feb/12    Initial version (not functional ! )  */
#* 0.2   Karl-F. Leiss   08/Feb/12    Support of even and odd number of    */
#*                                    taps, N must be multiple of 2        */
#***************************************************************************/

  .globl fir_frac16
 # .type fir_frac16,@function
  .section .vletext  , axv
  .vle

  # N - number of output samples
  # ntaps - number of filter coefficients
  # x - input array of size N+ntaps-1
  # y - output array of size N
  # hr - array of coefficients of length ntaps, coefficients are stored in reversed order 
            
  # void fir_frac16_c(unsigned short N, unsigned short ntaps, short *x, short *y, short *h);
  #fir_frac16                       (N, ntaps, SDADC1_RESULTS, LSP_FIR_OUT, hr1);
  .align 16
fir_frac16:
#<# register definition
.equ N, r3
.equ ntaps, r4
.equ x, r5
.equ y, r6
.equ h, r7

.equ inner_loop_cnt, ntaps
.equ y_stop, N  
.equ xp01, r25 
.equ hp01, r26   
.equ i, r27
.equ x_ptr, r28
.equ h_ptr, r29
.equ des, r30
.equ des1, r31

#># 


  # store nonvolatile registers
  e_stwu      r1, -28(r1)                # stwu - store with update - r1 contains stack pointer. In this case local DMEM
  # store r25 to r31 onto stack
  e_stmw      xp01, 0(r1)                # stmw - store multiple word - r 14 ro r31 are volatile and as such must be saved and restored if used in the routine
 
  # decrement x & y pointer				 
  se_subi     x, 6						# Why subtract 6 from SDADC results pointer?
  se_subi     y, 2						# Why subtract 2 from the Filter output pointer?
  # decrement coeff pionter
  se_subi     h, 4  				    # why do this too?
 
  # remember last two bits of ntaps     #checking if odd or even amount of taps
  e_andi.     i, ntaps, 1 				#Isn't this just the last bit? Is he just storing the remainder for the next instruction?
  
  # init inner loop counter = ntaps/4   # This is actually divide by 2
  e_srwi     inner_loop_cnt, ntaps, 1  
  # init address of output data y_stop at which end of outer loop is detected,
  # outer loop executed N/2 (0x100/2) times, y_stop = y + N/2*2*OUT_SIZE   (Isn't it just N?)  
  e_slwi        y_stop, N, 1    #Shift left is multiply by 2
  se_add      y_stop, y  		# This appears to be twice the size (x0200 rather than 0x100)
  
  se_cmpi    i,1                # He is just checking for an even or odd number of taps?
  e_beq        __fir_outer_loop_odd

__fir_outer_loop_even:
  # zero accumulator
  e_li        des, 0
  e_li        des1, 0
  # latch x address
  se_mr       h_ptr, h 
  # increment x        x = r5
  se_addi     x, 2
  # latch x address
  se_mr       x_ptr, x     
  # store cnt value to CTR
  mtctr       inner_loop_cnt
__fir_inner_loop_even:
  # load two half words from input vector
  zlwhu       xp01, 4(x_ptr)                      #r25 = xp01, x_ptr = r28 
  # load two half words from coefficient vector
  zlwhu       hp01, 4(h_ptr)  
  # multiply and accumulate positive first two taps
  zvmhulsfaas des, hp01, xp01  
  # branch if not CTR is not zero      
  e_bdnz        __fir_inner_loop_even
  # add the two accumulators
  se_add      des, des1   
  # store to ouput vector
  zstheu      des, 2(y)    
  # if (y == y_stop) end of outer loop
  cmp         cr0, y, y_stop            
  e_bne         cr0, __fir_outer_loop_even
  # restore non-volatile regs
  e_lmw       xp01, 0(r1)
  # delete stack frame 
  e_addi      r1, r1, 28              
  se_blr
     
__fir_outer_loop_odd:
  # zero accumulator
  e_li        des, 0                                #acc = r30
  e_li        des1, 0								#acc 1 = r31
  # latch x address                                 #Shouldnt this be latch H address?
  se_mr       h_ptr, h                              #h = co-efficients hptr =29 h = 7 
  # increment x
  se_addi     x, 2                                   #x = r5
  # latch x address
  se_mr       x_ptr, x     
  # store cnt value to CTR (inner loop count = r4)
  mtctr       inner_loop_cnt
__fir_inner_loop_odd:
  # load two half words from input vector            
  zlwhu       xp01, 0x1(x_ptr)                             #r25 = xp01, x_ptr = r28 
  # load two half words from coefficient vector
  zlwhu       hp01, 0x1(h_ptr)                             #hp01 = r26 hptr r29
  # multiply and accumulate positive first two taps
  zvmhulsfaas des, hp01, xp01                             #acc = r30 
  # branch if not CTR is not zero      
  e_bdnz        __fir_inner_loop_odd
  # add the two accumulators
  se_add      des, des1
  # load two half words from input vector
  zlhhe       xp01, 0x1(x_ptr)  
  # load two half words from coefficient vector
  zlhhe       hp01, 0x1(h_ptr)  
  # multiply and accumulate positive first two taps
  zvmhulsfaas des, hp01, xp01  
  # store to ouput vector
  zstheu      des, 0x1(y) 
  # if (y == y_stop) end of outer loop
  cmp         cr0, y, y_stop            
  e_bne         cr0, __fir_outer_loop_odd

  

  # restore non-volatile regs
  e_lmw       xp01, 0(r1)
  # delete stack frame 
  e_addi      r1, r1, 28          
      
  se_blr
