/* * Copyright 2018-2022 NXP * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ #include "fsl_powerquad.h" #include "fsl_powerquad_data.h" #include "arm_math.h" /******************************************************************************* * Definitions ******************************************************************************/ /* Component ID definition, used by tools. */ #ifndef FSL_COMPONENT_ID #define FSL_COMPONENT_ID "platform.drivers.powerquad_cmsis" #endif #define PQ_SET_FIX32_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_FIX16_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_Q31_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(-31) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_Q15_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(-15) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_F32_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_FFT_Q31_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_FFT_Q15_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_MAT_FIX16_WORKAROUND_SCALE_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U #define PQ_SET_MAT_FIX16_WORKAROUND_MULT_CONFIG \ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \ POWERQUAD->TMPBASE = 0xE0000000U /******************************************************************************* * Code ******************************************************************************/ static void _arm_fir_increment(const void *pSrc, uint32_t srcLen, const void *pTap, uint16_t tapLen, void *pDst, uint32_t offset, uint32_t elemSize) { POWERQUAD->INABASE = ((uint32_t)(const uint32_t *)pSrc) - (offset * elemSize); POWERQUAD->INBBASE = (uint32_t)(const uint32_t *)pTap; POWERQUAD->LENGTH = (((uint32_t)tapLen & 0xFFFFUL) << 16U) + (srcLen & 0xFFFFUL); POWERQUAD->OUTBASE = ((uint32_t)(uint32_t *)pDst) - (offset * elemSize); POWERQUAD->MISC = offset; POWERQUAD->CONTROL = (CP_FIR << 4U) | PQ_FIR_INCREMENTAL; } float32_t arm_cos_f32(float32_t x) { float tmp; PQ_CosF32(&x, &tmp); return tmp; } q31_t arm_cos_q31(q31_t x) { /* For PQ: input -1 to 1 means -pi to pi * For CMSIS DSP: input 0 to 1 means pi to 2*pi */ x *= 2; return PQ_CosQ31(x); } q15_t arm_cos_q15(q15_t x) { /* For PQ: input -1 to 1 means -pi to pi * For CMSIS DSP: input 0 to 1 means pi to 2*pi */ x *= 2; return PQ_CosQ15(x); } float32_t arm_sin_f32(float32_t x) { float tmp; PQ_SinF32(&x, &tmp); return tmp; } q31_t arm_sin_q31(q31_t x) { /* For PQ: input -1 to 1 means -pi to pi * For CMSIS DSP: input 0 to 1 means pi to 2*pi */ x *= 2; return PQ_SinQ31(x); } q15_t arm_sin_q15(q15_t x) { /* For PQ: input -1 to 1 means -pi to pi * For CMSIS DSP: input 0 to 1 means pi to 2*pi */ x *= 2; return PQ_SinQ15(x); } arm_status arm_sqrt_q31(q31_t in, q31_t *pOut) { uint32_t cppre; arm_status status; /* If the input is a positive number then compute the signBits. */ if (in > 0) { cppre = POWERQUAD->CPPRE; POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(-31) | POWERQUAD_CPPRE_CPPRE_OUT(31); *pOut = (q31_t)PQ_SqrtFixed((uint32_t)in); POWERQUAD->CPPRE = cppre; status = (ARM_MATH_SUCCESS); } /* If the number is a negative number then store zero as its square root value */ else { *pOut = 0; status = (ARM_MATH_ARGUMENT_ERROR); } return status; } arm_status arm_sqrt_q15(q15_t in, q15_t *pOut) { uint32_t cppre; arm_status status; /* If the input is a positive number then compute the signBits. */ if (in > 0) { cppre = POWERQUAD->CPPRE; POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(-15) | POWERQUAD_CPPRE_CPPRE_OUT(15); *pOut = (q15_t)PQ_SqrtFixed((uint32_t)in); POWERQUAD->CPPRE = cppre; status = (ARM_MATH_SUCCESS); } /* If the number is a negative number then store zero as its square root value */ else { *pOut = 0; status = (ARM_MATH_ARGUMENT_ERROR); } return status; } void arm_cfft_q31(const arm_cfft_instance_q31 *S, q31_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag) { assert(bitReverseFlag == 1U); q31_t *pIn = p1; q31_t *pOut = p1; uint32_t length = S->fftLen; PQ_SET_FFT_Q31_CONFIG; if (ifftFlag == 1U) { PQ_TransformIFFT(POWERQUAD, length, pIn, pOut); } else { PQ_TransformCFFT(POWERQUAD, length, pIn, pOut); } PQ_WaitDone(POWERQUAD); } void arm_cfft_q15(const arm_cfft_instance_q15 *S, q15_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag) { assert(bitReverseFlag == 1U); q15_t *pIn = p1; q15_t *pOut = p1; uint32_t length = S->fftLen; PQ_SET_FFT_Q15_CONFIG; if (ifftFlag == 1U) { PQ_TransformIFFT(POWERQUAD, length, pIn, pOut); } else { PQ_TransformCFFT(POWERQUAD, length, pIn, pOut); } PQ_WaitDone(POWERQUAD); } arm_status arm_rfft_init_q31(arm_rfft_instance_q31 *S, uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag) { /* Only supprt such mode. */ assert(ifftFlagR == 0U); assert(bitReverseFlag == 1U); /* Initialise the default arm status */ arm_status status = ARM_MATH_SUCCESS; /* Initialize the Real FFT length */ S->fftLenReal = (uint16_t)fftLenReal; /* Initialize the Flag for selection of RFFT or RIFFT */ S->ifftFlagR = (uint8_t)ifftFlagR; /* Initialize the Flag for calculation Bit reversal or not */ S->bitReverseFlagR = (uint8_t)bitReverseFlag; /* return the status of RFFT Init function */ return (status); } void arm_rfft_q31(const arm_rfft_instance_q31 *S, q31_t *pSrc, q31_t *pDst) { uint32_t length = S->fftLenReal; PQ_SET_FFT_Q31_CONFIG; /* Calculation of RFFT of input */ PQ_TransformRFFT(POWERQUAD, length, pSrc, pDst); PQ_WaitDone(POWERQUAD); } arm_status arm_rfft_init_q15(arm_rfft_instance_q15 *S, uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag) { /* Only supprt such mode. */ assert(ifftFlagR == 0U); assert(bitReverseFlag == 1U); /* Initialise the default arm status */ arm_status status = ARM_MATH_SUCCESS; /* Initialize the Real FFT length */ S->fftLenReal = (uint16_t)fftLenReal; /* Initialize the Flag for selection of RFFT or RIFFT */ S->ifftFlagR = (uint8_t)ifftFlagR; /* Initialize the Flag for calculation Bit reversal or not */ S->bitReverseFlagR = (uint8_t)bitReverseFlag; /* return the status of RFFT Init function */ return (status); } void arm_rfft_q15(const arm_rfft_instance_q15 *S, q15_t *pSrc, q15_t *pDst) { uint32_t length = S->fftLenReal; PQ_SET_FFT_Q15_CONFIG; /* Calculation of RFFT of input */ PQ_TransformRFFT(POWERQUAD, length, pSrc, pDst); PQ_WaitDone(POWERQUAD); } arm_status arm_dct4_init_q31(arm_dct4_instance_q31 *S, arm_rfft_instance_q31 *S_RFFT, arm_cfft_radix4_instance_q31 *S_CFFT, uint16_t N, uint16_t Nby2, q31_t normalize) { arm_status status = ARM_MATH_SUCCESS; /* Initialize the DCT4 length */ S->N = N; /* Initialize Real FFT Instance */ S->pRfft = S_RFFT; switch (N) { /* Initialize the table modifier values */ case 512U: S->pTwiddle = dct512_twiddle; S->pCosFactor = dct512_cosFactor; break; case 256U: S->pTwiddle = dct256_twiddle; S->pCosFactor = dct256_cosFactor; break; case 128U: S->pTwiddle = dct128_twiddle; S->pCosFactor = dct128_cosFactor; break; case 64U: S->pTwiddle = dct64_twiddle; S->pCosFactor = dct64_cosFactor; break; case 32U: S->pTwiddle = dct32_twiddle; S->pCosFactor = dct32_cosFactor; break; case 16U: S->pTwiddle = dct16_twiddle; S->pCosFactor = dct16_cosFactor; break; default: status = ARM_MATH_ARGUMENT_ERROR; break; } if (ARM_MATH_SUCCESS == status) { /* Initialize the RFFT/RIFFT Function */ status = arm_rfft_init_q31(S->pRfft, S->N, 0, 1); } return status; } void arm_dct4_q31(const arm_dct4_instance_q31 *S, q31_t *pState, q31_t *pInlineBuffer) { /* Calculate DCT-II for N-point input */ uint16_t i; /* Loop counter */ const q31_t *weights; /* Pointer to the Weights table */ q31_t *pOut; /* Temporary pointers for output buffer */ q31_t *pS1, *pbuff; /* Temporary pointers for input buffer and pState buffer */ q31_t in; /* Temporary variable */ const q31_t *cosFact; uint32_t length; uint8_t matRow; uint8_t matCol; uint8_t matLoop; uint16_t lenPerMatLoop; /* * Pre-processing involves multiplying input with cos factor, * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*N)) * where, * r(n) -- output of preprocessing * u(n) -- input to preprocessing(actual Source buffer) */ /* * Use matrix production function for preprocessing. Matrix production * supports 16x16 at the most, so the matrix row is set to 16. */ matRow = 16U; lenPerMatLoop = S->N >= 256U ? 256U : S->N; matCol = (uint8_t)(lenPerMatLoop / 16U); matLoop = (uint8_t)(((S->N - 1U) >> 8U) + 1U); cosFact = S->pCosFactor; pbuff = pInlineBuffer; length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol); while ((matLoop--) != 0U) { PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG; /* cos factor is Q31, convert to float */ PQ_MatrixScale(POWERQUAD, length, 2.0f / 2147483648.0f, cosFact, (void *)(uint32_t *)0xE0000000U); cosFact = &cosFact[lenPerMatLoop]; PQ_WaitDone(POWERQUAD); /* Product. */ PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG; PQ_MatrixProduct(POWERQUAD, length, pbuff, (void *)(uint32_t *)0xE0000000U, pbuff); pbuff = &pbuff[lenPerMatLoop]; PQ_WaitDone(POWERQUAD); } PQ_SET_FFT_Q31_CONFIG; PQ_TransformRDCT(POWERQUAD, S->N, pInlineBuffer, pState); /* ARM calculation while PQ is running. */ /* * Use matrix production function for twiddle multiplication. * Matrix production supports 16x16 at the most. The total elements are 2*N; */ lenPerMatLoop = S->N >= 128U ? 128U : S->N; matCol = (uint8_t)(lenPerMatLoop / 8U); matLoop = (uint8_t)(((S->N - 1U) >> 7U) + 1U); weights = S->pTwiddle; pOut = pState; length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol); PQ_WaitDone(POWERQUAD); while ((matLoop--) != 0U) { PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG; /* Downscale by 1024 * 1024 * 16, because the twiddle are multiplied by 1024 * 1024 * 16. */ PQ_MatrixScale(POWERQUAD, length, 1.0f / 16777216.0f, weights, (void *)(uint32_t *)0xE0000000U); weights = &weights[lenPerMatLoop * 2U]; PQ_WaitDone(POWERQUAD); /* Product. */ PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG; PQ_MatrixProduct(POWERQUAD, length, pOut, (void *)(uint32_t *)0xE0000000U, pOut); PQ_WaitDone(POWERQUAD); for (i = 0; i < lenPerMatLoop / 4U; i++) { pOut[0] -= pOut[1]; pOut = &pOut[2]; pOut[0] -= pOut[1]; pOut = &pOut[2]; pOut[0] -= pOut[1]; pOut = &pOut[2]; pOut[0] -= pOut[1]; pOut = &pOut[2]; } } /* ----------- Post-processing ---------- */ /* DCT-IV can be obtained from DCT-II by the equation, * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) * Hence, Y4(0) = Y2(0)/2 */ /* Getting only real part from the output and Converting to DCT-IV */ /* pbuff initialized to input buffer. */ pbuff = pInlineBuffer; /* pS1 initialized to pState */ pS1 = pState; /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2. Considering the DCT II normalize, here divided by sqrt(2).*/ in = (q31_t)(float)((float)*pS1 / 1.41421356237f); *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; i = S->N / 4U - 1U; while (i > 0U) { in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; i--; } } arm_status arm_dct4_init_q15(arm_dct4_instance_q15 *S, arm_rfft_instance_q15 *S_RFFT, arm_cfft_radix4_instance_q15 *S_CFFT, uint16_t N, uint16_t Nby2, q15_t normalize) { arm_status status = ARM_MATH_SUCCESS; /* Initialize the DCT4 length */ S->N = N; /* Initialize Real FFT Instance */ S->pRfft = S_RFFT; switch (N) { /* Initialize the table modifier values */ case 512U: S->pTwiddle = (void *)dct512_twiddle; S->pCosFactor = (void *)dct512_cosFactor; break; case 256U: S->pTwiddle = (void *)dct256_twiddle; S->pCosFactor = (void *)dct256_cosFactor; break; case 128U: S->pTwiddle = (void *)dct128_twiddle; S->pCosFactor = (void *)dct128_cosFactor; break; case 64U: S->pTwiddle = (void *)dct64_twiddle; S->pCosFactor = (void *)dct64_cosFactor; break; case 32U: S->pTwiddle = (void *)dct32_twiddle; S->pCosFactor = (void *)dct32_cosFactor; break; case 16U: S->pTwiddle = (void *)dct16_twiddle; S->pCosFactor = (void *)dct16_cosFactor; break; default: status = ARM_MATH_ARGUMENT_ERROR; break; } if (ARM_MATH_SUCCESS == status) { /* Initialize the RFFT/RIFFT Function */ status = arm_rfft_init_q15(S->pRfft, S->N, 0, 1); } /* return the status of DCT4 Init function */ return status; } void arm_dct4_q15(const arm_dct4_instance_q15 *S, q15_t *pState, q15_t *pInlineBuffer) { /* Calculate DCT-II for N-point input */ uint16_t i; /* Loop counter */ const q15_t *weights; /* Pointer to the Weights table */ q15_t *pOut; /* Temporary pointers for output buffer */ q15_t *pS1, *pbuff; /* Temporary pointers for input buffer and pState buffer */ q15_t in; /* Temporary variable */ const q15_t *cosFact; uint32_t length; uint8_t matRow; uint8_t matCol; uint8_t matLoop; uint16_t lenPerMatLoop; /* * Pre-processing involves multiplying input with cos factor, * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*N)) * where, * r(n) -- output of preprocessing * u(n) -- input to preprocessing(actual Source buffer) */ /* * Use matrix production function for preprocessing. Matrix production * supports 16x16 at the most, so the matrix row is set to 16. */ matRow = 16U; lenPerMatLoop = S->N >= 256U ? 256U : S->N; matCol = (uint8_t)(lenPerMatLoop / 16U); matLoop = (uint8_t)(((S->N - 1U) >> 8U) + 1U); cosFact = S->pCosFactor; pbuff = pInlineBuffer; length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, 0); while ((matLoop--) != 0U) { PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG; /* cos factor is Q31, convert to float */ PQ_MatrixScale(POWERQUAD, length, 2.0f / 2147483648.0f, cosFact, (void *)(uint32_t *)0xE0000000U); cosFact = &cosFact[2U * lenPerMatLoop]; PQ_WaitDone(POWERQUAD); /* Product. */ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; POWERQUAD->TMPBASE = 0xE0000000U; PQ_MatrixProduct(POWERQUAD, length, pbuff, (void *)(uint32_t *)0xE0000000U, pbuff); PQ_WaitDone(POWERQUAD); pbuff = &pbuff[lenPerMatLoop]; } PQ_SET_FFT_Q15_CONFIG; PQ_TransformRDCT(POWERQUAD, S->N, pInlineBuffer, pState); /* ARM calculation while PQ is running. */ /* * Use matrix production function for twiddle multiplication. * Matrix production supports 16x16 at the most. The total elements are 2*N; */ lenPerMatLoop = S->N >= 128U ? 128U : S->N; matCol = (uint8_t)(lenPerMatLoop / 8U); matLoop = (uint8_t)(((S->N - 1U) >> 7U) + 1U); weights = S->pTwiddle; pOut = pState; length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol); PQ_WaitDone(POWERQUAD); while ((matLoop--) != 0U) { PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG; /* Downscale by 1024 * 1024 * 16, because the twiddle are multiplied by 1024 * 1024 * 16. */ PQ_MatrixScale(POWERQUAD, length, 1.0f / 16777216.0f, weights, (void *)(uint32_t *)0xE0000000U); weights = &weights[lenPerMatLoop * 2U]; PQ_WaitDone(POWERQUAD); /* Product. */ POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; POWERQUAD->TMPBASE = 0xE0000000U; PQ_MatrixProduct(POWERQUAD, length, pOut, (void *)(uint32_t *)0xE0000000U, pOut); PQ_WaitDone(POWERQUAD); for (i = 0; i < lenPerMatLoop / 4U; i++) { pOut[0] -= pOut[1]; pOut = &pOut[2]; pOut[0] -= pOut[1]; pOut = &pOut[2]; pOut[0] -= pOut[1]; pOut = &pOut[2]; pOut[0] -= pOut[1]; pOut = &pOut[2]; } } /* ----------- Post-processing ---------- */ /* DCT-IV can be obtained from DCT-II by the equation, * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) * Hence, Y4(0) = Y2(0)/2 */ /* Getting only real part from the output and Converting to DCT-IV */ /* pbuff initialized to input buffer. */ pbuff = pInlineBuffer; /* pS1 initialized to pState */ pS1 = pState; /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2. Considering the DCT II normalize, here divided by sqrt(2).*/ in = (q15_t)(float)((float)*pS1 / 1.41421356237f); *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; i = S->N / 4U - 1U; while (i > 0U) { in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; in = *pS1 - in; *pbuff++ = in; pS1 = &pS1[2]; i--; } } void arm_fir_init_f32( arm_fir_instance_f32 *S, uint16_t numTaps, const float32_t *pCoeffs, float32_t *pState, uint32_t blockSize) { uint32_t i; /* * CMSIS DSP API filter coefficients stored in time reversed order, but PQ * uses the positive order. PQ does not use pState, so pState pState[1:numTaps] * is used here to save the coefficients in positive order. At the same time, * pState[0] is used to save the offset used for incremetal calculation. * Because the length of pState is (numTaps + blockSize -1), to ensure enough space, * the blockSize should be larger than 1. */ assert(blockSize > 1U); S->numTaps = numTaps; S->pCoeffs = pCoeffs; S->pState = pState; for (i = 0U; i < numTaps; i++) { pState[numTaps - i] = pCoeffs[i]; } *(uint32_t *)(void *)pState = 0U; } void arm_fir_init_q31( arm_fir_instance_q31 *S, uint16_t numTaps, const q31_t *pCoeffs, q31_t *pState, uint32_t blockSize) { uint32_t i; /* * CMSIS DSP API filter coefficients stored in time reversed order, but PQ * uses the positive order. PQ does not use pState, so pState pState[1:numTaps] * is used here to save the coefficients in positive order. At the same time, * pState[0] is used to save the offset used for incremetal calculation. * Because the length of pState is (numTaps + blockSize -1), to ensure enough space, * the blockSize should be larger than 1. */ assert(blockSize > 1U); S->numTaps = numTaps; S->pCoeffs = pCoeffs; S->pState = pState; for (i = 0U; i < numTaps; i++) { pState[numTaps - i] = pCoeffs[i]; } pState[0] = 0; } arm_status arm_fir_init_q15( arm_fir_instance_q15 *S, uint16_t numTaps, const q15_t *pCoeffs, q15_t *pState, uint32_t blockSize) { uint16_t i; /* * CMSIS DSP API filter coefficients stored in time reversed order, but PQ * uses the positive order. PQ does not use pState, so pState pState[2:numTaps] * is used here to save the coefficients in positive order. At the same time, * pState[0:1] is used to save the offset used for incremetal calculation. * Because the length of pState is (numTaps + blockSize -1), to ensure enough space, * the blockSize should be larger than 2. */ assert(blockSize > 2U); S->numTaps = numTaps; S->pCoeffs = pCoeffs; S->pState = pState; for (i = 0U; i < numTaps; i++) { pState[numTaps + 1U - i] = pCoeffs[i]; } *(uint32_t *)(void *)pState = 0U; return ARM_MATH_SUCCESS; } /** * brief Processing function for the floating-point FIR filter. * param[in] S points to an instance of the floating-point FIR structure. * param[in] pSrc points to the block of input data. * param[out] pDst points to the block of output data. * param[in] blockSize number of samples to process. * * Note: Powerquad has a hardware limitation, when using it for FIR increment calculation, the address of pSrc needs to * be a continuous address. */ void arm_fir_f32(const arm_fir_instance_f32 *S, const float32_t *pSrc, float32_t *pDst, uint32_t blockSize) { assert(NULL != S); assert(NULL != pSrc); assert(NULL != pDst); uint32_t curOffset; PQ_SET_F32_CONFIG; curOffset = *(uint32_t *)(void *)(S->pState); if (curOffset == 0U) { PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[1]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR); } else { _arm_fir_increment(pSrc, blockSize, &S->pState[1], S->numTaps, pDst, curOffset, sizeof(*pSrc)); } *(uint32_t *)(void *)(S->pState) = curOffset + blockSize; PQ_WaitDone(POWERQUAD); } void arm_fir_q31(const arm_fir_instance_q31 *S, const q31_t *pSrc, q31_t *pDst, uint32_t blockSize) { assert(NULL != S); assert(NULL != pSrc); assert(NULL != pDst); uint32_t curOffset; PQ_SET_Q31_CONFIG; curOffset = *(uint32_t *)(void *)(S->pState); if (curOffset == 0U) { PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[1]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR); } else { _arm_fir_increment(pSrc, blockSize, &S->pState[1], S->numTaps, pDst, curOffset, sizeof(*pSrc)); } *(uint32_t *)(void *)(S->pState) = curOffset + blockSize; PQ_WaitDone(POWERQUAD); } void arm_fir_q15(const arm_fir_instance_q15 *S, const q15_t *pSrc, q15_t *pDst, uint32_t blockSize) { assert(NULL != S); assert(NULL != pSrc); assert(NULL != pDst); uint32_t curOffset; PQ_SET_Q15_CONFIG; curOffset = *(uint32_t *)(void *)(S->pState); if (curOffset == 0U) { PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[2]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR); } else { _arm_fir_increment(pSrc, blockSize, &S->pState[2], S->numTaps, pDst, curOffset, sizeof(*pSrc)); } *(uint32_t *)(void *)(S->pState) = curOffset + blockSize; PQ_WaitDone(POWERQUAD); } void arm_conv_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB, uint32_t srcBLen, float32_t *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); PQ_SET_F32_CONFIG; PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION); PQ_WaitDone(POWERQUAD); } void arm_conv_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); PQ_SET_Q31_CONFIG; PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION); PQ_WaitDone(POWERQUAD); } void arm_conv_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); PQ_SET_Q15_CONFIG; PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION); PQ_WaitDone(POWERQUAD); } void arm_correlate_f32( const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB, uint32_t srcBLen, float32_t *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); PQ_SET_F32_CONFIG; PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION); PQ_WaitDone(POWERQUAD); } void arm_correlate_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); PQ_SET_Q31_CONFIG; PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION); PQ_WaitDone(POWERQUAD); } void arm_correlate_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); PQ_SET_Q15_CONFIG; PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION); PQ_WaitDone(POWERQUAD); } void arm_mat_init_f32(arm_matrix_instance_f32 *S, uint16_t nRows, uint16_t nColumns, float32_t *pData) { /* Assign Number of Rows */ S->numRows = nRows; /* Assign Number of Columns */ S->numCols = nColumns; /* Assign Data pointer */ S->pData = pData; } void arm_mat_init_q31(arm_matrix_instance_q31 *S, uint16_t nRows, uint16_t nColumns, q31_t *pData) { /* Assign Number of Rows */ S->numRows = nRows; /* Assign Number of Columns */ S->numCols = nColumns; /* Assign Data pointer */ S->pData = pData; } void arm_mat_init_q15(arm_matrix_instance_q15 *S, uint16_t nRows, uint16_t nColumns, q15_t *pData) { /* Assign Number of Rows */ S->numRows = nRows; /* Assign Number of Columns */ S->numCols = nColumns; /* Assign Data pointer */ S->pData = pData; } arm_status arm_mat_add_f32(const arm_matrix_instance_f32 *pSrcA, const arm_matrix_instance_f32 *pSrcB, arm_matrix_instance_f32 *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_F32_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_add_q31(const arm_matrix_instance_q31 *pSrcA, const arm_matrix_instance_q31 *pSrcB, arm_matrix_instance_q31 *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_FIX32_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_add_q15(const arm_matrix_instance_q15 *pSrcA, const arm_matrix_instance_q15 *pSrcB, arm_matrix_instance_q15 *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_FIX16_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_sub_f32(const arm_matrix_instance_f32 *pSrcA, const arm_matrix_instance_f32 *pSrcB, arm_matrix_instance_f32 *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_F32_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_sub_q31(const arm_matrix_instance_q31 *pSrcA, const arm_matrix_instance_q31 *pSrcB, arm_matrix_instance_q31 *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_FIX32_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_sub_q15(const arm_matrix_instance_q15 *pSrcA, const arm_matrix_instance_q15 *pSrcB, arm_matrix_instance_q15 *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_FIX16_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_mult_f32(const arm_matrix_instance_f32 *pSrcA, const arm_matrix_instance_f32 *pSrcB, arm_matrix_instance_f32 *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_F32_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_mult_q31(const arm_matrix_instance_q31 *pSrcA, const arm_matrix_instance_q31 *pSrcB, arm_matrix_instance_q31 *pDst) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; /* * The output prescale does not supprt negative value due to hardware issue, * workaround: * 1. Downscale the matrix B and save the float output value to private memory. * 2. Multiply the float matrix B in private memory with matrix A, output as Q31. * Note: Put matrix B in private memory is faster. */ #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { length = POWERQUAD_MAKE_MATRIX_LEN(pSrcB->numRows, pSrcB->numCols, 0U); PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG; /* Downscale. */ PQ_MatrixScale(POWERQUAD, length, 1.0f / 2147483648.0f, pSrcB->pData, (void *)(uint32_t *)0xE0000000U); length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_WaitDone(POWERQUAD); PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG; PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, (void *)(uint32_t *)0xE0000000U, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_mult_q15(const arm_matrix_instance_q15 *pSrcA, const arm_matrix_instance_q15 *pSrcB, arm_matrix_instance_q15 *pDst, q15_t *pState) { assert(NULL != pSrcA); assert(NULL != pSrcB); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { length = POWERQUAD_MAKE_MATRIX_LEN(pSrcB->numRows, pSrcB->numCols, 0U); PQ_SET_MAT_FIX16_WORKAROUND_SCALE_CONFIG; /* Downscale. */ PQ_MatrixScale(POWERQUAD, length, 1.0f / 32768.0f, pSrcB->pData, (void *)(uint32_t *)0xE0000000U); length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols); PQ_WaitDone(POWERQUAD); PQ_SET_MAT_FIX16_WORKAROUND_MULT_CONFIG; PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, (void *)(uint32_t *)0xE0000000U, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_inverse_f32(const arm_matrix_instance_f32 *src, arm_matrix_instance_f32 *dst) { assert(NULL != src); assert(NULL != dst); arm_status status; uint32_t length; float tmp[1024]; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((src->numRows != src->numCols) || (dst->numRows != dst->numCols) || (src->numRows != dst->numRows)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_F32_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(src->numRows, src->numCols, src->numRows); PQ_MatrixInversion(POWERQUAD, length, src->pData, tmp, dst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_trans_f32(const arm_matrix_instance_f32 *pSrc, arm_matrix_instance_f32 *pDst) { assert(NULL != pSrc); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U); PQ_SetFormat(POWERQUAD, kPQ_CP_MTX, kPQ_Float); PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_trans_q31(const arm_matrix_instance_q31 *pSrc, arm_matrix_instance_q31 *pDst) { assert(NULL != pSrc); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_FIX32_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U); PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_trans_q15(const arm_matrix_instance_q15 *pSrc, arm_matrix_instance_q15 *pDst) { assert(NULL != pSrc); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_FIX16_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U); PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_scale_f32(const arm_matrix_instance_f32 *pSrc, float32_t scale, arm_matrix_instance_f32 *pDst) { assert(NULL != pSrc); assert(NULL != pDst); arm_status status; uint32_t length; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { PQ_SET_F32_CONFIG; length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U); PQ_MatrixScale(POWERQUAD, length, scale, pSrc->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_scale_q31(const arm_matrix_instance_q31 *pSrc, q31_t scaleFract, int32_t shift, arm_matrix_instance_q31 *pDst) { assert(NULL != pSrc); assert(NULL != pDst); arm_status status; uint32_t length; float scaleFloat; pq_config_t config = { kPQ_32Bit, /* inputAFormat */ 0, /* inputAPrescale */ kPQ_32Bit, /* inputBFormat */ 0, /* inputBPrescale */ kPQ_32Bit, /* outputFormat */ (int8_t)shift, /* outputPrescale */ kPQ_Float, /* tmpFormat */ 0, /* tmpPrescale */ kPQ_Float, /* machineFormat */ (uint32_t *)0xe0000000U, /* tmpBase */ }; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U); scaleFloat = PQ_Q31_2_FLOAT(scaleFract); PQ_SetConfig(POWERQUAD, &config); PQ_MatrixScale(POWERQUAD, length, scaleFloat, pSrc->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; } arm_status arm_mat_scale_q15(const arm_matrix_instance_q15 *pSrc, q15_t scaleFract, int32_t shift, arm_matrix_instance_q15 *pDst) { assert(NULL != pSrc); assert(NULL != pDst); arm_status status; uint32_t length; float scaleFloat; pq_config_t config = { kPQ_16Bit, /* inputAFormat */ 0, /* inputAPrescale */ kPQ_16Bit, /* inputBFormat */ 0, /* inputBPrescale */ kPQ_16Bit, /* outputFormat */ (int8_t)shift, /* outputPrescale */ kPQ_Float, /* tmpFormat */ 0, /* tmpPrescale */ kPQ_Float, /* machineFormat */ (uint32_t *)0xe0000000U, /* tmpBase */ }; #ifdef ARM_MATH_MATRIX_CHECK /* Check for matrix mismatch condition */ if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows)) { /* Set status as ARM_MATH_SIZE_MISMATCH */ status = ARM_MATH_SIZE_MISMATCH; } else #endif { length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U); scaleFloat = PQ_Q15_2_FLOAT(scaleFract); PQ_SetConfig(POWERQUAD, &config); PQ_MatrixScale(POWERQUAD, length, scaleFloat, pSrc->pData, pDst->pData); /* Wait for the completion */ PQ_WaitDone(POWERQUAD); status = ARM_MATH_SUCCESS; } /* Return to application */ return status; }