/*
 * Copyright 2020-2024 NXP
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

#include <stdio.h>
#include <stdlib.h>


#include "fsl_device_registers.h"
#include "clock_config.h"
#include "board.h"
#include "fsl_debug_console.h"

#include "fsl_gpio.h"
#include "pin_mux.h"

#include "GlobalDef.h"
#include "CircularBuf.h"
#include "SubFunc.h"
#include "AudioDmaIO.h"

#include "usb_device_config.h"
#include "usb.h"
#include "usb_device.h"

#include "usb_device_class.h"
#include "usb_device_audio.h"
#include "usb_device_descriptor.h"
#include "main.h"
#include "NxpCm7ASRC.h"
#include "AsrcProcess.h"


__attribute__((section("NonCacheable")))
int32_t AudioTxBuf_A[AudioFrameSizeInSamplePerCh * 2 * NUM_I2S_OUTPUT];//reserve space for 2 I2S data lines --- 4ch
__attribute__((section("NonCacheable")))
int32_t AudioTxBuf_B[AudioFrameSizeInSamplePerCh * 2 * NUM_I2S_OUTPUT];
__attribute__((section("NonCacheable")))
int32_t AudioRxBuf_A[AudioFrameSizeInSamplePerCh * 2 * NUM_I2S_INPUT]; //reserve space for 4 I2S data lines --- 8ch
__attribute__((section("NonCacheable")))
int32_t AudioRxBuf_B[AudioFrameSizeInSamplePerCh * 2 * NUM_I2S_INPUT];

__ALIGNED(32) DMA_TCD_Type DmaCh0_SAI1Tx0_TcdA;
__ALIGNED(32) DMA_TCD_Type DmaCh0_SAI1Tx0_TcdB;
__ALIGNED(32) DMA_TCD_Type DmaCh1_SAI1Rx0_TcdA;
__ALIGNED(32) DMA_TCD_Type DmaCh1_SAI1Rx0_TcdB;


volatile int DmaAudioRxBufIsReady=0;
uint32_t I2SDmaIntrCnt;


volatile unsigned char MainAlgorithmProcessingIsAllowed=0;
unsigned int I2SOutputMuteCnt=100;				//this is to mute 400ms on starting output

uint8_t DbgLedUpDnControlFlag = 0;

int UsbDnOneFrameAudioBuf_L[AudioFrameSizeInSamplePerCh];
int UsbDnOneFrameAudioBuf_R[AudioFrameSizeInSamplePerCh];

int32_t *I2SInputPtr[NUM_I2S_INPUT*2];
int32_t *I2SOutputPtr[NUM_I2S_OUTPUT*2];


void ClearI2SDmaDualTxBuf(void)
{
    memset(AudioTxBuf_A,0,sizeof(AudioTxBuf_A));
    memset(AudioTxBuf_B,0,sizeof(AudioTxBuf_B));
}
void ClearI2SDmaDualRxBuf(void)
{
    memset(AudioRxBuf_A,0,sizeof(AudioRxBuf_A));
    memset(AudioRxBuf_B,0,sizeof(AudioRxBuf_B));
}

void StopSai1I2S(void)
{
    CLOCK_DisableClock(kCLOCK_Sai1);
    ClearI2SDmaDualTxBuf();
    ClearI2SDmaDualRxBuf();
}

void InitSai1I2S(void)
{
    uint32_t r;
    SAII2S_RegStructType *p;

    //step 1, check if SAI basic info is correct
    CLOCK_EnableClock(kCLOCK_Sai1);

    p = (SAII2S_RegStructType*) SAI1_BaseRegAddr;
    r = p->VERID;
    if ((r & 0xffff0000) != 0x03000000)
        return;
    r = p->PARAM;
    if ((r & 0x00000f0f) != 0x00000504)
        return;

    //step 2, reset and disable tx and rx
    //CLOCK_EnableClock(kCLOCK_Sai1);

    p->TCSR &= ~((1 << 29) | (1 << 30) | (1 << 31) | (1 << 0)); //clear bit29,30,31 to disable all 3 modes of Tx (disabled first, cause tx is in async mode), and disable DMA request
    p->RCSR &= ~((1 << 29) | (1 << 30) | (1 << 31) | (1 << 0)); //clear bit29,30,31 to disable all 3 modes of Rx (disabled following tx, cause tx will be in sync mode), and disable DMA request

    p->TCSR |= (1 << 24);                     //setup bit 24 to reset tx
    p->RCSR |= (1 << 24);                     //setup bit 24 to reset rx
    p->TCSR |= (1 << 25);                     //setup bit 25 to reset tx fifo
    p->RCSR |= (1 << 25);                     //setup bit 25 to reset rx fifo
    RoughMsDelay(1);
    p->TCSR &= ~(1 << 24);                    //clear bit 24 to release reset tx
    p->RCSR &= ~(1 << 24);                    //clear bit 24 to release reset rx

    //step 3, config I2S format --- tx
    p->TCR1 = 0; //must be 0 here: each time, dma puts 2 elements in, once it drops to 0, ie equal to 0 (both the 2 elements
                 //are sent out), then generate a fifo request to dma

    r = (0 << 30);     //bit 30,31: async mode --- rx is in sync, tx is in async
    r |= 1;            //bit 0~7, div: (1+1)*2=4 ==> mclk 12.288 /4 = 3.072MHz
    r |= (0 << 29);    //bit29: no bit clock swap, tx uses its own bit clock
    r |= (1 << 24);    //bit24: bit clock is generated internally
    r |= (1 << 25);    //bit25: Bit clock is active low with drive outputs on falling edge and sample inputs on rising edge.
    r |= (1 << 26);    //bit26,27: mclk1 ??? is the main clock source for tx
    p->TCR2 = r;

    r = 0; //bit0~4, cleared to 0, writing 0 configures the first word in the frame
    //r|=(1<<16);      //bit16, tx ch1 is enabled ---  this is the last step, this should be the final enalbe
    p->TCR3 = r;

    r = 1;             //bit0: set to 1: LRCK (FrameSync) is generated internally
    r |= (1 << 1);     //bit1: LRCK (FrameSync) is active high --- could be changed later, depends on L is high or R is high
    r |= (1 << 3);     //bit3: I2S format, 1bit shifted away from LRCK edge
    r |= (1 << 4);     //bit4: MSB first
    r |= (31 << 8);    //bit8~12: 31+1 bits are covered by FrameSync (LRCK)
    r |= (1 << 16);    //bit16~20: total 2 words in one frame, left and right
    p->TCR4 = r;

    r = 0;             //first bit shifted
    r |= (31 << 16);   //word 0 width in bits
    r |= (31 << 24);   //word N width in bits (word followed by the first word)
    r |= (31 << 8);    //
    p->TCR5 = r;

    //step 4, config I2S format --- rx
    p->RCR1 = 1; //here must be 1: each time, SAI receives 2 elements (L and R), then it is greater than 1,
                 //then SAI generates a fifo request to dma

    r = (1 << 30);     //bit 30,31: sync mode --- rx is in sync, tx is in async
    r |= 1;            //bit 0~7, div: (1+1)*2=4 ==> mclk 12.288 /4 = 3.072MHz
    r |= (0 << 29);    //bit29: no bit clock swap, tx uses its own bit clock
    r |= (1 << 24);    //bit24: bit clock is generated internally
    r |= (1 << 25);    //bit25: Bit clock is active low with drive outputs on falling edge and sample inputs on rising edge.
    r |= (1 << 26);    //bit26,27: mclk1 ??? is the main clock source for tx
    p->RCR2 = r;

    r = 0; //bit0~4, cleared to 0, writing 0 configures the first word in the frame
    //r|=(1<<16);      //bit16, rx ch1 is enabled ---  this is the last step, this should be the final enalbe
    p->RCR3 = r;

    r = 1;             //bit0: set to 1: LRCK (FrameSync) is generated internally
    r |= (1 << 1);     //bit1: LRCK (FrameSync) is active high --- could be changed later, depends on L is high or R is high
    r |= (1 << 3);     //bit3: I2S format, 1bit shifted away from LRCK edge		//this is for I2S input stream from ADC
    r |= (1 << 4);     //bit4: MSB first
    r |= (31 << 8);    //bit8~12: 31+1 bits are covered by FrameSync (LRCK)
    r |= (1 << 16);    //bit16~20: temperoraily set to 1, later to be changed to more
    p->RCR4 = r;

    r = 0;             //first bit shifted
    r |= (31 << 16);   //word 0 width in bits
    r |= (31 << 24);   //word N width in bits (word followed by the first word)
    r |= (31 << 8);
    p->RCR5 = r;
}

void EnableSai1TxAndRxDmaRequest(uint8_t NeedToEnable)
{
    SAII2S_RegStructType *PtrSAI1Reg;

    PtrSAI1Reg = (SAII2S_RegStructType*) SAI1_BaseRegAddr;

    if (NeedToEnable)
    {
    	PtrSAI1Reg->TCSR|=(1<<20);
		while(1)
    	{
    		//wait till word is started --- to make sure that SAI transfer starts exactly at the point where a new LR clk cycle is just started.
    		if(PtrSAI1Reg->TCSR&(1<<20))
    			break;
    	}

        PtrSAI1Reg->RCR3 |= (0x0f << 16); //bit16~19, rx ch0,1,2 is enabled	--- enable all the 4 rx channels, but only 3 are to be used
		PtrSAI1Reg->TCR3 |= (0x03 << 16); //bit16,17, tx ch0,1 is enabled	--- tx enabled should be later than rx enable
    } else
    {
		PtrSAI1Reg->TCR3 &= ~(0x03 << 16); //bit16,17, tx ch0,1 is disabled
        PtrSAI1Reg->RCR3 &= ~(0x0f << 16); //bit16~19, rx ch0,1,2 is disabled
    }
}

void InitDmaForSaiAudio(void)
{
    DMAMUX_RegStructType *PtrDmaMuxReg;
    DMA_RegStructType *PtrDmaReg;
    SAII2S_RegStructType *PtrSAI1Reg;

    volatile uint32_t r;

    PtrSAI1Reg = (SAII2S_RegStructType*) SAI1_BaseRegAddr;
    PtrDmaMuxReg = (DMAMUX_RegStructType*) DMA_MUX_BaseRegAddr;
    PtrDmaReg = (DMA_RegStructType*) DMA_BaseRegAddr;

    // Enable DMA and DMAMUX clock
    CLOCK_EnableClock(kCLOCK_Dma);      //CCGR5, CG3

    // DMA channel: 0 --> SAI1 Tx0
    // DMA channel: 1 --> SAI1 Rx0,1,2,3
    PtrDmaMuxReg->CHCFG[0] = 0;
    PtrDmaMuxReg->CHCFG[0] |= kDmaRequestMuxSai1Tx;
    PtrDmaMuxReg->CHCFG[0] |= (1 << 31); // enable this DMA channel0 for sai1 tx0

    PtrDmaMuxReg->CHCFG[1] = 0;
    PtrDmaMuxReg->CHCFG[1] |= kDmaRequestMuxSai1Rx;
    PtrDmaMuxReg->CHCFG[1] |= (1 << 31); //enable this DMA channel1 for sai1 rx0

    PtrDmaReg->CR = 0x00000480;          // Channel Group 1 Priority must be set, or it won't start the DMA transfer
                                         // and enable minor loop mapping !!! --- this is for making non-interleaving audio buffer

    PtrDmaReg->INT = 0x0;                // clear existing DMA request on all channels --- both these 2 lines are needed
    PtrDmaReg->CINT = 0x40;              // clear existing DMA request on all channels --- both these 2 lines are needed
    r = PtrDmaReg->INT;                  // to clear existing DMA request, this is also needed, can not remove!!!

    // 1 tx channel (1 I2S output data lines) for DMA ch0: tx data from mem to SAI1 tx0
    PtrDmaReg->TCD[0].SADDR = (uint32_t) AudioTxBuf_A;
    PtrDmaReg->TCD[0].SOFF = AudioFrameSizeInSamplePerCh * 4; //after reading each 32-bit audio sample, addr needs to increased by 4
    PtrDmaReg->TCD[0].SLAST = 0;         //no need to change after major loop is finished, after major loop is finished, load new TCD
    PtrDmaReg->TCD[0].DADDR = (uint32_t) (&PtrSAI1Reg->TDR[0]);



    //this is SAI1 tx DATA reg
    PtrDmaReg->TCD[0].DOFF = 4;      //SAI1 tx0, then tx1
    PtrDmaReg->TCD[0].DLAST_SGA = (uint32_t) (&DmaCh0_SAI1Tx0_TcdB); //?? next TCD address


    PtrDmaReg->TCD[0].ATTR = 0x021A; //each data element size 32 bit, both Dst and Src --- AND! Destination MOD is 3, only 3 bits can be changed
                                     //this should make the destination address 40384020->40384024->40384020->40384024->
    PtrDmaReg->TCD[0].NBYTES_MLOFFYES = (16 | (1 << 31) | (((0 - (AudioFrameSizeInSamplePerCh * 4 - 1) * 4) & 0xfffff) << 10));
    //each DMA request, makes one minor movement, and this minor movement is 8 bytes --> 2 32bit L sample and 2 32bit R sample
    //and minor loop mapping is enabled, and Destination Minor Loop Offset is enable
    //and MLOPP=0-(AudioFrameSizeInSamplePerCh*4-1)*4


    PtrDmaReg->TCD[0].CITER_ELINKNO = AudioFrameSizeInSamplePerCh * 1; //how many minor movements are needed
    PtrDmaReg->TCD[0].BITER_ELINKNO = AudioFrameSizeInSamplePerCh * 1; //this is for reloading CITER, when a major transfer is finished

    PtrDmaReg->TCD[0].CSR = 0x02;    //no auto clear REQ
    PtrDmaReg->TCD[0].CSR |= 0x0010; //enable ESG, scatter/gathering processing

    DmaCh0_SAI1Tx0_TcdA = PtrDmaReg->TCD[0];
    DmaCh0_SAI1Tx0_TcdB = PtrDmaReg->TCD[0];
    DmaCh0_SAI1Tx0_TcdB.DLAST_SGA = (uint32_t) (&DmaCh0_SAI1Tx0_TcdA);
    DmaCh0_SAI1Tx0_TcdB.SADDR = (uint32_t) AudioTxBuf_B;

    // 4 Rx channels (4 I2S input data lines) for DMA ch1: rx data from SAI1 rx0,1,2,3 to memory
    PtrDmaReg->TCD[1].SADDR = (uint32_t) (&PtrSAI1Reg->RDR[0]);
    PtrDmaReg->TCD[1].SOFF = 4; //SAI1 rx DATA reg, address +4, goes to the next chanel
    PtrDmaReg->TCD[1].SLAST = 0; //no need to change after major loop is finished, after major loop is finished, load new TCD
    PtrDmaReg->TCD[1].DADDR = (uint32_t) AudioRxBuf_A;
    //this is SAI1 tx DATA reg
    PtrDmaReg->TCD[1].DOFF = AudioFrameSizeInSamplePerCh * 4; //SAI1 rx destination ptr needs to increase addr
    PtrDmaReg->TCD[1].DLAST_SGA = (uint32_t) (&DmaCh1_SAI1Rx0_TcdB); //?? next TCD address

    PtrDmaReg->TCD[1].ATTR = 0x2202; //each data element size 32 bit, both Dst and Src --- AND! Destination MOD is 4, only 4 bits can be changed
    //PtrDmaReg->TCD[1].NBYTES_MLNO=8;    //each DMA request, makes one minor movement, and this minor movement is 8 bytes --> 1 32bit L sample and 1 32bit R sample

    PtrDmaReg->TCD[1].NBYTES_MLOFFYES = (32 | (1 << 30) | (((0 - (AudioFrameSizeInSamplePerCh * 8 - 1) * 4) & 0xfffff) << 10));
    //each DMA request, makes one minor movement, and this minor movement is 24 bytes --> 3 32bit L sample and 3 32bit R sample
    //and minor loop mapping is enabled, and Destination Minor Loop Offset is enable
    //and MLOPP=0-(AudioFrameSizeInSamplePerCh*4-1)*4

    PtrDmaReg->TCD[1].CITER_ELINKNO = AudioFrameSizeInSamplePerCh * 1; //how many minor movements are needed
    PtrDmaReg->TCD[1].BITER_ELINKNO = AudioFrameSizeInSamplePerCh * 1; //this is for reloading CITER, when a major transfer is finished

    PtrDmaReg->TCD[1].CSR = 0x02;     //no auto clear REQ
    PtrDmaReg->TCD[1].CSR |= 0x0010;  //enable ESG, scatter/gathering processing

    DmaCh1_SAI1Rx0_TcdA = PtrDmaReg->TCD[1];
    DmaCh1_SAI1Rx0_TcdB = PtrDmaReg->TCD[1];
    DmaCh1_SAI1Rx0_TcdB.DLAST_SGA = (uint32_t) (&DmaCh1_SAI1Rx0_TcdA);
    DmaCh1_SAI1Rx0_TcdB.DADDR = (uint32_t) AudioRxBuf_B;

//////////////////////////////////////////////////////////////////

    PtrDmaReg->ERQ = 0x03;             //enable ch0 and ch1 hardware DMA request

    PtrSAI1Reg->TCSR |= (1 << 25);     //setup bit 25 to reset tx fifo
    PtrSAI1Reg->RCSR |= (1 << 25);     //setup bit 25 to reset rx fifo

    PtrSAI1Reg->TCR3 |= (0xf << 24);   //reset tx fifo for all the 4 tx channels of SAI1 --- this is done when SAI tx is not enabled yet
    PtrSAI1Reg->TCSR |= (1 << 18);     //clear FIFO error flag

    PtrSAI1Reg->RCR3 |= (0xf << 24);   //reset rx fifo for all the 4 tx channels of SAI1 --- this is done when SAI tx is not enabled yet
    PtrSAI1Reg->RCSR |= (1 << 18);     //clear FIFO error flag

    PtrSAI1Reg->RCSR |= (1 << 31);     //enable SAI1 Rx
    PtrSAI1Reg->TCSR |= (1 << 31);     //enable SAI1 Tx

    PtrSAI1Reg->RCSR |= (1 << 0);      //enable DMA request from SAI1 fifo
    PtrSAI1Reg->TCSR |= (1 << 0);      //enable DMA request from SAI1 fifo

    //EnableSai1TxAndRxDmaRequest(1);
    EnableSai1TxAndRxDmaRequest(0);

    //only need to enable dma rx interrupt
    __NVIC_ClearPendingIRQ(DMA0_DMA16_IRQn); //before enable irq, need to clear existing pending IRQ
    __NVIC_ClearPendingIRQ(DMA1_DMA17_IRQn); //before enable irq, need to clear existing pending IRQ
    EnableIRQ(DMA0_DMA16_IRQn);
    EnableIRQ(DMA1_DMA17_IRQn);
}

__attribute__((__section__("CodeQuickAccess")))
void AudioRxTxDmaIntrProcess(void)
{
	DmaAudioRxBufIsReady=1;
}


__attribute__((__section__("CodeQuickAccess")))
void AudioRxTxIOProcess(void)
{
    DMA_RegStructType *PtrDmaReg;
    PtrDmaReg = (DMA_RegStructType*) DMA_BaseRegAddr;

    GET_CYCLE_COUNTER(CycCntA);

    //heart beat blinking display
    I2SDmaIntrCnt++;
    if (!(I2SDmaIntrCnt % 4000))
    {
        if (DbgLedUpDnControlFlag)
        {
        	DbgLedOn;
        }else
        {
        	DbgLedOff;
        }
        DbgLedUpDnControlFlag = 1 - DbgLedUpDnControlFlag;
    }

    //----------------------------------------------------------------------------------------------------------
    //---step1: get USB audio down streaming data and put to UsbDnOneFrameAudioBuf_L, UsbDnOneFrameAudioBuf_R

    int *TmpPtr_S32;

    if ((UsbAudioDnStreamingIsStarted==0) && (g_composite.audioUnified.startPlayHalfFull == 1))
    {
        g_composite.audioUnified.startPlayHalfFull = 0;
        g_composite.audioUnified.speakerDetachOrNoInput = 1;
    }
    if (g_composite.audioUnified.startPlayHalfFull)
	{
        //take out audio data from audioPlayDataBuff as cir-buffer
		USB_AUDIO_ENTER_CRITICAL();
		if (CirAudioBuf_SpaceOccupiedInSamples_S64(&UsbDnStrmCirBuf) > AudioFrameSizeInSamplePerCh)
		{
			TmpPtr_S32=(int *)CirAudioBuf_ReadSamples_GetRdPtr_S64(&UsbDnStrmCirBuf, AudioFrameSizeInSamplePerCh);
	        g_composite.audioUnified.audioSendCount += AudioFrameSizeInSamplePerCh * AUDIO_OUT_FORMAT_CHANNELS * AUDIO_OUT_FORMAT_SIZE;

	        for (int i = 0; i < (AudioFrameSizeInSamplePerCh); i++)
	        {
	        	UsbDnOneFrameAudioBuf_L[i] = *TmpPtr_S32++;
	        	UsbDnOneFrameAudioBuf_R[i] = *TmpPtr_S32++;
	        }
		} else
		{
	        //put zeros to buffer --- AudioFrameSizeInSamplePerCh zeros
	        memset(UsbDnOneFrameAudioBuf_L, 0, sizeof(UsbDnOneFrameAudioBuf_L));
	        memset(UsbDnOneFrameAudioBuf_R, 0, sizeof(UsbDnOneFrameAudioBuf_R));
			PRINTF("E\r\n");
		}
		USB_AUDIO_EXIT_CRITICAL();
    } else
	{
        //put zeros to buffer --- AudioFrameSizeInSamplePerCh zeros
        memset(UsbDnOneFrameAudioBuf_L, 0, sizeof(UsbDnOneFrameAudioBuf_L));
        memset(UsbDnOneFrameAudioBuf_R, 0, sizeof(UsbDnOneFrameAudioBuf_R));
    }

    //----------------------------------------------------------------------------------------------------------
    //---step2: prepare the I2S input and output pointers

    //with the SAI1 DMA configuration:
	//I2S Rx buffer A (or B): --- 4 data lines are enabled, and received --- total 8 ch
	//|---Frame0---|---Frame1---|---Frame2---|---Frame3---|||---Frame4---|---Frame5---|---Frame6---|---Frame7---|
	//| SAI1 Rx0 L | SAI1 Rx1 L | SAI1 Rx2 L | SAI1 Rx3 L ||| SAI1 Rx0 R | SAI1 Rx1 R | SAI1 Rx2 R | SAI1 Rx3 R |

	//I2S Tx buffer A (or B): --- 2 data lines are enabled, and transferred --- total 4 ch
	//|---Frame0---|---Frame1---|||---Frame2---|---Frame3---|
	//| SAI1 Tx0 L | SAI1 Tx1 L ||| SAI1 Tx0 R | SAI1 Tx1 R |

	if(
			//this means DMA CH0 is preparing to use DmaCh0_SAI1Tx0_TcdB for the next transfer, so the DMA is now using buffer A --> software here to use buffer B
			((PtrDmaReg->TCD[0].DLAST_SGA)&0xffff)
			==
			(((unsigned int)(&DmaCh0_SAI1Tx0_TcdB))&0xffff)
	  )
    {
    	//set mic input pointers for mic12  --- from I2S(SAI1) Rx Data line 0 --- from BufB
    	I2SInputPtr[0] = AudioRxBuf_B+0*AudioFrameSizeInSamplePerCh;
    	I2SInputPtr[1] = AudioRxBuf_B+4*AudioFrameSizeInSamplePerCh;
    	//set mic input pointers for mic34  --- from I2S(SAI1) Rx Data line 1 --- from BufB
    	I2SInputPtr[2] = AudioRxBuf_B+1*AudioFrameSizeInSamplePerCh;
    	I2SInputPtr[3] = AudioRxBuf_B+5*AudioFrameSizeInSamplePerCh;

    	//set I2S output pointers --- to I2S(SAI1) Tx Data line0 --- to BufB
    	I2SOutputPtr[0] = AudioTxBuf_B+0*AudioFrameSizeInSamplePerCh;
    	I2SOutputPtr[1] = AudioTxBuf_B+2*AudioFrameSizeInSamplePerCh;
    	//set I2S output pointers --- to I2S(SAI1) Tx Data line1 --- to BufB
    	I2SOutputPtr[2] = AudioTxBuf_B+1*AudioFrameSizeInSamplePerCh;
    	I2SOutputPtr[3] = AudioTxBuf_B+3*AudioFrameSizeInSamplePerCh;
    }else
    {
    	//set mic input pointers for mic12  --- from I2S(SAI1) Rx Data line 0 --- from BufA
    	I2SInputPtr[0] = AudioRxBuf_A+0*AudioFrameSizeInSamplePerCh;
    	I2SInputPtr[1] = AudioRxBuf_A+4*AudioFrameSizeInSamplePerCh;
    	//set mic input pointers for mic34  --- from I2S(SAI1) Rx Data line 1 --- from BufA
    	I2SInputPtr[2] = AudioRxBuf_A+1*AudioFrameSizeInSamplePerCh;
    	I2SInputPtr[3] = AudioRxBuf_A+5*AudioFrameSizeInSamplePerCh;

    	//set I2S output pointers --- to I2S(SAI1) Tx Data line0 --- to BufA
    	I2SOutputPtr[0] = AudioTxBuf_A+0*AudioFrameSizeInSamplePerCh;
    	I2SOutputPtr[1] = AudioTxBuf_A+2*AudioFrameSizeInSamplePerCh;
    	//set I2S output pointers --- to I2S(SAI1) Tx Data line1 --- to BufA
    	I2SOutputPtr[2] = AudioTxBuf_A+1*AudioFrameSizeInSamplePerCh;
    	I2SOutputPtr[3] = AudioTxBuf_A+3*AudioFrameSizeInSamplePerCh;
    }


    //----------------------------------------------------------------------------------------------------------
    //---step3: audio processing---

	if(I2SOutputMuteCnt)
	{
		//mic signal is not stable at the first 300ms, so completely mute the I2S output
		I2SOutputMuteCnt--;

		memset(I2SOutputPtr[0],0,AudioFrameSizeInSamplePerCh*4);
		memset(I2SOutputPtr[1],0,AudioFrameSizeInSamplePerCh*4);
		memset(I2SOutputPtr[2],0,AudioFrameSizeInSamplePerCh*4);
		memset(I2SOutputPtr[3],0,AudioFrameSizeInSamplePerCh*4);

	}else
	{
		//real algorithm processing should be placed here
		MainAlgorithmProcessingIsAllowed=1;
	}

	if(MainAlgorithmProcessingIsAllowed)
	{
		//------audio processing can be placed here-------
		//---beg---
		#if 1
			//process UsbDnOneFrameAudioBuf_L[i] and put the generated audio to I2SOutputPtr[0]
			//process UsbDnOneFrameAudioBuf_R[i] and put the generated audio to I2SOutputPtr[1]
			for(int i=0;i<AudioFrameSizeInSamplePerCh;i++)
			{
				I2SOutputPtr[0][i]=UsbDnOneFrameAudioBuf_L[i];
				I2SOutputPtr[1][i]=UsbDnOneFrameAudioBuf_R[i];
				I2SOutputPtr[2][i]=UsbDnOneFrameAudioBuf_L[i];
				I2SOutputPtr[3][i]=UsbDnOneFrameAudioBuf_R[i];
			}
		#endif
		//---end---
		//------audio processing can be placed here-------


		//--- fill USB up streaming circular buffer
		if(UsbAudioUpStreamingIsStarted)
		{
			USB_AUDIO_ENTER_CRITICAL();
			if( CirAudioBuf_SpaceAvailableInSamples_MultiCh(&UsbUpStrmCirBuf) >= AudioFrameSizeInSamplePerCh)
			{
				T_MultiCh32BitAudioSample AudioSampleBuf_MultiCh[AudioFrameSizeInSamplePerCh];

				#if EnableAsrcLatencyTest==1
					if((AsrcLatencyTestCnt++%20000)<1000)
						for(uint32_t i=0;i<(AudioFrameSizeInSamplePerCh);i++)
						{
							#if UsbAudioFormat_NumUpStreamCh==2
								AudioSampleBuf_MultiCh[i].s[0]=(UsbDnOneFrameAudioBuf_L[i]);
								AudioSampleBuf_MultiCh[i].s[1]=(UsbDnOneFrameAudioBuf_R[i]>>1);
							#endif
							#if UsbAudioFormat_NumUpStreamCh==4
								AudioSampleBuf_MultiCh[i].s[0]=(UsbDnOneFrameAudioBuf_L[i]);
								AudioSampleBuf_MultiCh[i].s[1]=(UsbDnOneFrameAudioBuf_R[i]>>1);
								AudioSampleBuf_MultiCh[i].s[2]=(I2SInputPtr[2][i]);
								AudioSampleBuf_MultiCh[i].s[3]=(I2SInputPtr[3][i]);
							#endif
						}
					else
						for(uint32_t i=0;i<(AudioFrameSizeInSamplePerCh);i++)
						{
							#if UsbAudioFormat_NumUpStreamCh==2
								AudioSampleBuf_MultiCh[i].s[0]=(UsbDnOneFrameAudioBuf_L[i]);
								AudioSampleBuf_MultiCh[i].s[1]=(UsbDnOneFrameAudioBuf_R[i]);
							#endif
							#if UsbAudioFormat_NumUpStreamCh==4
								AudioSampleBuf_MultiCh[i].s[0]=(UsbDnOneFrameAudioBuf_L[i]);
								AudioSampleBuf_MultiCh[i].s[1]=(UsbDnOneFrameAudioBuf_R[i]);
								AudioSampleBuf_MultiCh[i].s[2]=(I2SInputPtr[2][i]);
								AudioSampleBuf_MultiCh[i].s[3]=(I2SInputPtr[3][i]);
							#endif
						}
				#else
					for(uint32_t i=0;i<(AudioFrameSizeInSamplePerCh);i++)
					{
						#if UsbAudioFormat_NumUpStreamCh==2
							AudioSampleBuf_MultiCh[i].s[0]=(UsbDnOneFrameAudioBuf_L[i]);
							AudioSampleBuf_MultiCh[i].s[1]=(UsbDnOneFrameAudioBuf_R[i]);
						#endif
						#if UsbAudioFormat_NumUpStreamCh==4
							AudioSampleBuf_MultiCh[i].s[0]=(UsbDnOneFrameAudioBuf_L[i]);
							AudioSampleBuf_MultiCh[i].s[1]=(UsbDnOneFrameAudioBuf_R[i]);
							AudioSampleBuf_MultiCh[i].s[2]=(I2SInputPtr[2][i]);
							AudioSampleBuf_MultiCh[i].s[3]=(I2SInputPtr[3][i]);
						#endif
					}
				#endif
				CirAudioBuf_WriteSamples_MultiCh(&UsbUpStrmCirBuf, AudioFrameSizeInSamplePerCh, AudioSampleBuf_MultiCh);
			}
			USB_AUDIO_EXIT_CRITICAL();
		}
	}
    //---step3: audio processing---
    //----------------------------------------------------------------------------------------------------------

	//ComWatchValue[0]=BufAmountOfData;
	ComWatchValue[4]=I2SDmaIntrCnt;


	//report cycle count info to host PC through VCOM
	static int CycCntInfoIdx=0;

	//if(!(AudioIoFrameCnt%(5000)))
	//if(!(AudioIoFrameCnt%(2000)))
	//if(!(I2SDmaIntrCnt%(1000)))
	if(!(I2SDmaIntrCnt%(100)))
	//if(!(I2SDmaIntrCnt%(8)))
	{
		CycCntInfoIdx=0;	//this is to only display the first 4 watching values --- close this line to display all the 16 watch values
		VComReportValue_U32_1=ComWatchValue[CycCntInfoIdx*4+0];
		VComReportValue_U32_2=ComWatchValue[CycCntInfoIdx*4+1];
		VComReportValue_U32_3=ComWatchValue[CycCntInfoIdx*4+2];
		VComReportValue_U32_4=ComWatchValue[CycCntInfoIdx*4+3];
		s_sendSize=CycCntInfoIdx+1;				//command to do VCOM send in the main loop!
		CycCntInfoIdx++;						//using CycCntInfoIdx from 0 to 3, can have 16 cycle cnt values to be printed
		if(CycCntInfoIdx>3) CycCntInfoIdx=0;
	}

    GET_CYCLE_COUNTER(CycCntB);

}

