MCUXpresso_MIMXRT1052xxxxB/boards/evkbimxrt1050/audio_examples/maestro_record/vit_proc.c

/*
 * Copyright 2020 NXP
 * All rights reserved.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

#include <stddef.h>
#include <string.h>
#include <math.h>

#include "app_definitions.h"
#include "fsl_debug_console.h"
#include "fsl_os_abstraction.h"

#include "vit_proc.h"
#include "VIT_Model_en.h"
#include "VIT_Model_cn.h"
#include "VIT.h"

#define VIT_CMD_TIME_SPAN 3.0

#if (defined(DEMO_CODEC_CS42448) && (DEMO_CODEC_CS42448 > 0))
#define NUMBER_OF_CHANNELS 2
#define BYTE_DEPTH         4
#elif (defined(PLATFORM_RT1170) || defined(PLATFORM_RT1160))
#define NUMBER_OF_CHANNELS 1
#define BYTE_DEPTH         4
#else
#define NUMBER_OF_CHANNELS 1
#define BYTE_DEPTH         2
#endif

#define MODEL_LOCATION VIT_MODEL_IN_ROM
#if (NUMBER_OF_CHANNELS == 1)
#define VIT_OPERATING_MODE VIT_WAKEWORD_ENABLE | VIT_VOICECMD_ENABLE
#else
#define VIT_OPERATING_MODE VIT_WAKEWORD_ENABLE | VIT_VOICECMD_ENABLE | VIT_AFE_ENABLE
#endif

#if (NUMBER_OF_CHANNELS == 1)
#define VIT_MIC1_MIC2_DISTANCE 0
#else
#define VIT_MIC1_MIC2_DISTANCE 65
#endif
#define VIT_MIC1_MIC3_DISTANCE 0

#if defined(PLATFORM_RT1040)
#define DEVICE_ID VIT_IMXRT1040

#elif defined(PLATFORM_RT1050)
#define DEVICE_ID VIT_IMXRT1050

#elif defined(PLATFORM_RT1060)
#define DEVICE_ID VIT_IMXRT1060

#elif defined(PLATFORM_RT1160)
#define DEVICE_ID VIT_IMXRT1160

#elif defined(PLATFORM_RT1170)
#define DEVICE_ID VIT_IMXRT1170

#else
#error "No platform selected"

#endif

#define MEMORY_ALIGNMENT 8 // in bytes

static VIT_Handle_t VITHandle = PL_NULL;      // VIT handle pointer
static VIT_InstanceParams_st VITInstParams;   // VIT instance parameters structure
static VIT_ControlParams_st VITControlParams; // VIT control parameters structure
static PL_MemoryTable_st VITMemoryTable;      // VIT memory table descriptor
static PL_BOOL InitPhase_Error        = PL_FALSE;
static VIT_DataIn_st VIT_InputBuffers = {PL_NULL, PL_NULL,
                                         PL_NULL}; // Resetting Input Buffer addresses provided to VIT_process() API
static PL_INT8 *pMemory[PL_NR_MEMORY_REGIONS];
#if DEMO_CODEC_CS42448 || PLATFORM_RT1170 || PLATFORM_RT1160
static PL_INT16 DeInterleavedBuffer[VIT_SAMPLES_PER_FRAME * DEMO_CHANNEL_NUM];
#endif

VIT_ReturnStatus_en VIT_ModelInfo(void)
{
    VIT_ReturnStatus_en VIT_Status;
    VIT_ModelInfo_st Model_Info;
    VIT_Status = VIT_GetModelInfo(&Model_Info);
    if (VIT_Status != VIT_SUCCESS)
    {
        PRINTF("VIT_GetModelInfo error: %d\r\n", VIT_Status);
        return VIT_INVALID_MODEL;
    }

    PRINTF("\n  VIT Model info\r\n");
    PRINTF("  VIT Model Release: 0x%04x\r\n", Model_Info.VIT_Model_Release);
    if (Model_Info.pLanguage != PL_NULL)
    {
        PRINTF("  Language supported: %s \r\n", Model_Info.pLanguage);
    }
    PRINTF("  Number of WakeWords supported : %d \r\n", Model_Info.NbOfWakeWords);
    PRINTF("  Number of Commands supported : %d \r\n", Model_Info.NbOfVoiceCmds);

    if (!Model_Info.WW_VoiceCmds_Strings) // Check here if Model is containing WW and CMDs strings
    {
        PRINTF("  VIT_Model integrating WakeWord and Voice Commands strings: NO\r\n");
    }
    else
    {
        const char *ptr;

        PRINTF("  VIT_Model integrating WakeWord and Voice Commands strings: YES\r\n");
        PRINTF("  WakeWords supported : \r\n");
        ptr = Model_Info.pWakeWord;
        if (ptr != PL_NULL)
        {
            for (PL_UINT16 i = 0; i < Model_Info.NbOfWakeWords; i++)
            {
                PRINTF("   '%s' \r\n", ptr);
                ptr += strlen(ptr) + 1; // to consider NULL char
            }
        }
        PRINTF("  Voice commands supported: \r\n");
        ptr = Model_Info.pVoiceCmds_List;
        if (ptr != PL_NULL)
        {
            for (PL_UINT16 i = 0; i < Model_Info.NbOfVoiceCmds; i++)
            {
                PRINTF("   '%s' \r\n", ptr);
                ptr += strlen(ptr) + 1; // to consider NULL char
            }
        }
    }
    /*
     *   VIT Get Library information
     */
    VIT_LibInfo_st Lib_Info;
    VIT_Status = VIT_GetLibInfo(&Lib_Info);
    if (VIT_Status != VIT_SUCCESS)
    {
        PRINTF("VIT_GetLibInfo error: %d\r\n", VIT_Status);
        return VIT_INVALID_STATE;
    }
    PRINTF("\n  VIT Lib Info\r\n");
    PRINTF("  VIT LIB Release: 0x%04x\r\n", Lib_Info.VIT_LIB_Release);
    PRINTF("  VIT Features supported by the lib: 0x%04x\r\n", Lib_Info.VIT_Features_Supported);
    PRINTF("  Number of channels supported by VIT lib: %d\r\n", Lib_Info.NumberOfChannels_Supported);
    if (Lib_Info.WakeWord_In_Text2Model)
    {
        PRINTF("  VIT WakeWord in Text2Model\r\n\r\n");
    }
    else
    {
        PRINTF("  VIT WakeWord in Audio2Model\r\n\r\n");
    }
    /*
     *   Configure VIT Instance Parameters
     */
    // Check that NUMBER_OF_CHANNELS is supported by VIT
    // Retrieve from VIT_GetLibInfo API the number of channel supported by the VIT lib
    PL_UINT16 max_nb_of_Channels = Lib_Info.NumberOfChannels_Supported;
    if (NUMBER_OF_CHANNELS > max_nb_of_Channels)
    {
        PRINTF("VIT lib is supporting only: %d channels\r\n", max_nb_of_Channels);
        return VIT_INVALID_PARAMETER_OUTOFRANGE;
    }
    return VIT_SUCCESS;
}

int VIT_Initialize(void *arg)
{
    VIT_ReturnStatus_en VIT_Status;
    uint16_t i, minIdx; /* loop index */
    int32_t temp32;     /* temporary address */
    int16_t j;          /* loop index */
    uint16_t order[PL_NR_MEMORY_REGIONS];

    switch (Vit_Language)
    {
        case CN:
            VIT_Status = VIT_SetModel(VIT_Model_cn, VIT_MODEL_IN_ROM);
            break;
        default:
            VIT_Status = VIT_SetModel(VIT_Model_en, VIT_MODEL_IN_ROM);
    }
    if (VIT_Status != VIT_SUCCESS)
    {
        return VIT_Status;
    }

    VIT_Status = VIT_ModelInfo();
    if (VIT_Status != VIT_SUCCESS)
    {
        return VIT_Status;
    }
    /*
     *   Configure VIT Instance Parameters
     */
    VITInstParams.SampleRate_Hz   = VIT_SAMPLE_RATE;
    VITInstParams.SamplesPerFrame = VIT_SAMPLES_PER_FRAME;
    VITInstParams.NumberOfChannel = NUMBER_OF_CHANNELS;
    VITInstParams.DeviceId        = DEVICE_ID;

    /*
     *   VIT get memory table: Get size info per memory type
     */
    VIT_Status = VIT_GetMemoryTable(PL_NULL, // VITHandle param should be NULL
                                    &VITMemoryTable, &VITInstParams);
    if (VIT_Status != VIT_SUCCESS)
    {
        PRINTF("VIT_GetMemoryTable error: %d\r\n", VIT_Status);
        return VIT_Status;
    }

    /* Initialize order variable */
    for (i = 0; i < PL_NR_MEMORY_REGIONS; i++)
    {
        order[i] = i;
    }

    /* Sort region indexes by region size */
    for (i = 0; i < (PL_NR_MEMORY_REGIONS - 1); i++)
    {
        minIdx = i;
        for (j = i + 1; j < PL_NR_MEMORY_REGIONS; j++)
            if (VITMemoryTable.Region[order[j]].Size < VITMemoryTable.Region[order[minIdx]].Size)
                minIdx = j;

        /* Swap indexes */
        temp32        = order[minIdx];
        order[minIdx] = order[i];
        order[i]      = temp32;
    }

    /*
     *   Reserve memory space: Malloc for each memory type
     */
    for (j = (PL_NR_MEMORY_REGIONS - 1); j >= 0; j--)
    {
        /* Log the memory size */
        if (VITMemoryTable.Region[order[j]].Size != 0)
        {
            // reserve memory space
            // NB: VITMemoryTable.Region[PL_MEMREGION_PERSISTENT_FAST_DATA] should be allocated
            //      in the fastest memory of the platform (when possible) - this is not the case in this example.
            pMemory[j] = OSA_MemoryAllocate(VITMemoryTable.Region[order[j]].Size + MEMORY_ALIGNMENT);
            if (!pMemory[j])
            {
                return VIT_INVALID_NULLADDRESS;
            }
            VITMemoryTable.Region[order[j]].pBaseAddress = (void *)pMemory[j];
        }
    }

    /*
     *    Create VIT Instance
     */
    VITHandle  = PL_NULL; // force to null address for correct memory initialization
    VIT_Status = VIT_GetInstanceHandle(&VITHandle, &VITMemoryTable, &VITInstParams);
    if (VIT_Status != VIT_SUCCESS)
    {
        InitPhase_Error = PL_TRUE;
        PRINTF("VIT_GetInstanceHandle error: %d\r\n", VIT_Status);
    }

    /*
     *    Test the reset (OPTIONAL)
     */
    if (!InitPhase_Error)
    {
        VIT_Status = VIT_ResetInstance(VITHandle);
        if (VIT_Status != VIT_SUCCESS)
        {
            InitPhase_Error = PL_TRUE;
            PRINTF("VIT_ResetInstance error: %d\r\n", VIT_Status);
        }
    }

    /*
     *   Set and Apply VIT control parameters
     */
    VITControlParams.OperatingMode      = VIT_OPERATING_MODE;
    VITControlParams.MIC1_MIC2_Distance = VIT_MIC1_MIC2_DISTANCE;
    VITControlParams.MIC1_MIC3_Distance = VIT_MIC1_MIC3_DISTANCE;
    VITControlParams.Command_Time_Span  = VIT_CMD_TIME_SPAN;

    if (!InitPhase_Error)
    {
        VIT_Status = VIT_SetControlParameters(VITHandle, &VITControlParams);
        if (VIT_Status != VIT_SUCCESS)
        {
            InitPhase_Error = PL_TRUE;
            PRINTF("VIT_SetControlParameters error: %d\r\n", VIT_Status);
        }
    }
    /*
        //Public call to VIT_GetStatusParameters
        VIT_StatusParams_st* pVIT_StatusParam_Buffer = (VIT_StatusParams_st*)&VIT_StatusParams_Buffer;

        VIT_GetStatusParameters(VITHandle, pVIT_StatusParam_Buffer, sizeof(VIT_StatusParams_Buffer));
        PRINTF("\nVIT Status Params\n");
        PRINTF(" VIT LIB Release   = 0x%04x\n", pVIT_StatusParam_Buffer->VIT_LIB_Release);
        PRINTF(" VIT Model Release = 0x%04x\n", pVIT_StatusParam_Buffer->VIT_MODEL_Release);
        PRINTF(" VIT Features supported by the lib = 0x%04x\n", pVIT_StatusParam_Buffer->VIT_Features_Supported);
        PRINTF(" VIT Features Selected             = 0x%04x\n", pVIT_StatusParam_Buffer->VIT_Features_Selected);
        PRINTF(" Number of channels supported by VIT lib = %d\n", pVIT_StatusParam_Buffer->NumberOfChannels_Supported);
        PRINTF(" Number of channels selected             = %d\n", pVIT_StatusParam_Buffer->NumberOfChannels_Selected);
        PRINTF(" Device Selected: device id = %d\n", pVIT_StatusParam_Buffer->Device_Selected);
        if (pVIT_StatusParam_Buffer->WakeWord_In_Text2Model)
        {
            PRINTF(" VIT WakeWord in Text2Model\n ");
        }
        else
        {
            PRINTF(" VIT WakeWord in Audio2Model\n ");
        }
    */
    return VIT_Status;
}

int VIT_Execute(void *arg, void *inputBuffer, int size)
{
    VIT_ReturnStatus_en VIT_Status;
    VIT_VoiceCommand_st VoiceCommand; // Voice Command info
    VIT_WakeWord_st WakeWord;         // Wakeword info

    VIT_DetectionStatus_en VIT_DetectionResults = VIT_NO_DETECTION; // VIT detection result

    if (size != VIT_SAMPLES_PER_FRAME * NUMBER_OF_CHANNELS * BYTE_DEPTH)
    {
        PRINTF("Input buffer format issue\r\n");
        return VIT_INVALID_FRAME_SIZE;
    }
#if DEMO_CODEC_CS42448 || PLATFORM_RT1170 || PLATFORM_RT1160
    DeInterleave(inputBuffer, DeInterleavedBuffer, VIT_SAMPLES_PER_FRAME, DEMO_CHANNEL_NUM);
#endif
    /*
     *   VIT Process
     */
    // Current VIT library is supporting only one channel
    // VIT_InputBuffers.pBuffer_Chan1 should be set to the input buffer address
    // VIT_InputBuffers.pBuffer_Chan1 setting can be done out of the while loop
    // Application should take care of the ping pong buffers (when present) handling - no pingpong buffer in this
    // example app.
    if (VITInstParams.NumberOfChannel == _1CHAN)
    {
#if PLATFORM_RT1170 || PLATFORM_RT1160
        VIT_InputBuffers.pBuffer_Chan1 = DeInterleavedBuffer;
#else
        VIT_InputBuffers.pBuffer_Chan1 = (PL_INT16 *)inputBuffer; // PCM buffer: 16-bit - 16kHz - mono
#endif
        VIT_InputBuffers.pBuffer_Chan2 = PL_NULL;
        VIT_InputBuffers.pBuffer_Chan3 = PL_NULL;
    }
#if DEMO_CODEC_CS42448
    if (VITInstParams.NumberOfChannel == _2CHAN)
    {
        VIT_InputBuffers.pBuffer_Chan1 =
            &DeInterleavedBuffer[VIT_SAMPLES_PER_FRAME * 4]; // PCM buffer: 16-bit - 16kHz - mono
        VIT_InputBuffers.pBuffer_Chan2 = &DeInterleavedBuffer[VIT_SAMPLES_PER_FRAME * 5];
        VIT_InputBuffers.pBuffer_Chan3 = PL_NULL;
    }
#endif
    VIT_Status = VIT_Process(VITHandle,
                             &VIT_InputBuffers, // temporal audio input data
                             &VIT_DetectionResults);

    if (VIT_Status != VIT_SUCCESS)
    {
        PRINTF("VIT_Process error: %d\r\n", VIT_Status);
        return VIT_Status; // will stop processing VIT and go directly to MEM free
    }

    if (VIT_DetectionResults == VIT_WW_DETECTED)
    {
        // Retrieve id of the WakeWord detected
        // String of the Command can also be retrieved (when WW and CMDs strings are integrated in Model)
        VIT_Status = VIT_GetWakeWordFound(VITHandle, &WakeWord);
        if (VIT_Status != VIT_SUCCESS)
        {
            PRINTF("VIT_GetWakeWordFound error : %d\r\n", VIT_Status);
            return VIT_Status; // will stop processing VIT and go directly to MEM free
        }
        else
        {
            PRINTF(" - WakeWord detected %d", WakeWord.WW_Id);

            // Retrieve WakeWord Name : OPTIONAL
            // Check first if WakeWord string is present
            if (WakeWord.pWW_Name != PL_NULL)
            {
                PRINTF(" %s\r\n", WakeWord.pWW_Name);
            }
        }
    }
    else if (VIT_DetectionResults == VIT_VC_DETECTED)
    {
        // Retrieve id of the Voice Command detected
        // String of the Command can also be retrieved (when WW and CMDs strings are integrated in Model)
        VIT_Status = VIT_GetVoiceCommandFound(VITHandle, &VoiceCommand);
        if (VIT_Status != VIT_SUCCESS)
        {
            PRINTF("VIT_GetVoiceCommandFound error: %d\r\n", VIT_Status);
            return VIT_Status; // will stop processing VIT and go directly to MEM free
        }
        else
        {
            PRINTF(" - Voice Command detected %d", VoiceCommand.Cmd_Id);

            // Retrieve CMD Name: OPTIONAL
            // Check first if CMD string is present
            if (VoiceCommand.pCmd_Name != PL_NULL)
            {
                PRINTF(" %s\r\n", VoiceCommand.pCmd_Name);
            }
            else
            {
                PRINTF("\r\n");
            }
        }
    }
    return VIT_Status;
}

int VIT_Deinit(void)
{
    VIT_ReturnStatus_en VIT_Status; /* Function call status */
                                    // retrieve size of the different MEM tables allocated
    VIT_Status =
        VIT_GetMemoryTable(VITHandle, // Should provide VIT_Handle to retrieve the size of the different MemTabs
                           &VITMemoryTable, &VITInstParams);
    if (VIT_Status != VIT_SUCCESS)
    {
        PRINTF("VIT_GetMemoryTable error: %d\r\n", VIT_Status);
    }

    // Free the MEM tables
    for (int i = 0; i < PL_NR_MEMORY_REGIONS; i++)
    {
        if (pMemory[i] != NULL)
        {
            OSA_MemoryFree((PL_INT8 *)pMemory[i]);
            pMemory[i] = NULL;
        }
    }
    return VIT_Status;
}

//  de-Interleave Multichannel signal
//   example:  A1.B1.C1.A2.B2.C2.A3.B3.C3....An.Bn.Cn   (3 Channels case : A, B, C)
//             will become
//             A1.A2.A3....An.B1.B2.B3....Bn.C1.C2.C3....Cn

// Simple helper function for de-interleaving Multichannel stream
// The caller function shall ensure that all arguments are correct.
// This function assumes the input data as 32 bit width and transforms it into 16 bit width
void DeInterleave(const PL_INT16 *pDataInput, PL_INT16 *pDataOutput, PL_UINT16 FrameSize, PL_UINT16 ChannelNumber)
{
    for (PL_UINT16 ichan = 0; ichan < ChannelNumber; ichan++)
    {
        for (PL_UINT16 i = 0; i < FrameSize; i++)
        {
            /* Select the 16 MSB of the 32 input bits */
            pDataOutput[i + (ichan * FrameSize)] = pDataInput[(i * 2 * ChannelNumber) + (ichan * 2) + 1];
        }
    }
    return;
}

VIT_Initialize_T VIT_Initialize_func = VIT_Initialize;
VIT_Execute_T VIT_Execute_func       = VIT_Execute;
VIT_Deinit_T VIT_Deinit_func         = VIT_Deinit;
VIT_Language_T Vit_Language;