MCUXpresso_LPC55S69/devices/LPC55S69/drivers/fsl_powerquad_cmsis.c

1694 lines
51 KiB
C

/*
* Copyright 2018-2022 NXP
* All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include "fsl_powerquad.h"
#include "fsl_powerquad_data.h"
#include "arm_math.h"
/*******************************************************************************
* Definitions
******************************************************************************/
/* Component ID definition, used by tools. */
#ifndef FSL_COMPONENT_ID
#define FSL_COMPONENT_ID "platform.drivers.powerquad_cmsis"
#endif
#define PQ_SET_FIX32_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_FIX16_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_Q31_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(-31) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_Q15_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(-15) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_F32_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_FFT_Q31_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_FFT_Q15_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_32Bit; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_32Bit; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_32Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_MAT_FIX16_WORKAROUND_SCALE_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
#define PQ_SET_MAT_FIX16_WORKAROUND_MULT_CONFIG \
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float; \
POWERQUAD->TMPBASE = 0xE0000000U
/*******************************************************************************
* Code
******************************************************************************/
static void _arm_fir_increment(const void *pSrc,
uint32_t srcLen,
const void *pTap,
uint16_t tapLen,
void *pDst,
uint32_t offset,
uint32_t elemSize)
{
POWERQUAD->INABASE = ((uint32_t)(const uint32_t *)pSrc) - (offset * elemSize);
POWERQUAD->INBBASE = (uint32_t)(const uint32_t *)pTap;
POWERQUAD->LENGTH = (((uint32_t)tapLen & 0xFFFFUL) << 16U) + (srcLen & 0xFFFFUL);
POWERQUAD->OUTBASE = ((uint32_t)(uint32_t *)pDst) - (offset * elemSize);
POWERQUAD->MISC = offset;
POWERQUAD->CONTROL = (CP_FIR << 4U) | PQ_FIR_INCREMENTAL;
}
float32_t arm_cos_f32(float32_t x)
{
float tmp;
PQ_CosF32(&x, &tmp);
return tmp;
}
q31_t arm_cos_q31(q31_t x)
{
/* For PQ: input -1 to 1 means -pi to pi
* For CMSIS DSP: input 0 to 1 means pi to 2*pi */
x *= 2;
return PQ_CosQ31(x);
}
q15_t arm_cos_q15(q15_t x)
{
/* For PQ: input -1 to 1 means -pi to pi
* For CMSIS DSP: input 0 to 1 means pi to 2*pi */
x *= 2;
return PQ_CosQ15(x);
}
float32_t arm_sin_f32(float32_t x)
{
float tmp;
PQ_SinF32(&x, &tmp);
return tmp;
}
q31_t arm_sin_q31(q31_t x)
{
/* For PQ: input -1 to 1 means -pi to pi
* For CMSIS DSP: input 0 to 1 means pi to 2*pi */
x *= 2;
return PQ_SinQ31(x);
}
q15_t arm_sin_q15(q15_t x)
{
/* For PQ: input -1 to 1 means -pi to pi
* For CMSIS DSP: input 0 to 1 means pi to 2*pi */
x *= 2;
return PQ_SinQ15(x);
}
arm_status arm_sqrt_q31(q31_t in, q31_t *pOut)
{
uint32_t cppre;
arm_status status;
/* If the input is a positive number then compute the signBits. */
if (in > 0)
{
cppre = POWERQUAD->CPPRE;
POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(-31) | POWERQUAD_CPPRE_CPPRE_OUT(31);
*pOut = (q31_t)PQ_SqrtFixed((uint32_t)in);
POWERQUAD->CPPRE = cppre;
status = (ARM_MATH_SUCCESS);
}
/* If the number is a negative number then store zero as its square root value */
else
{
*pOut = 0;
status = (ARM_MATH_ARGUMENT_ERROR);
}
return status;
}
arm_status arm_sqrt_q15(q15_t in, q15_t *pOut)
{
uint32_t cppre;
arm_status status;
/* If the input is a positive number then compute the signBits. */
if (in > 0)
{
cppre = POWERQUAD->CPPRE;
POWERQUAD->CPPRE = POWERQUAD_CPPRE_CPPRE_IN(-15) | POWERQUAD_CPPRE_CPPRE_OUT(15);
*pOut = (q15_t)PQ_SqrtFixed((uint32_t)in);
POWERQUAD->CPPRE = cppre;
status = (ARM_MATH_SUCCESS);
}
/* If the number is a negative number then store zero as its square root value */
else
{
*pOut = 0;
status = (ARM_MATH_ARGUMENT_ERROR);
}
return status;
}
void arm_cfft_q31(const arm_cfft_instance_q31 *S, q31_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag)
{
assert(bitReverseFlag == 1U);
q31_t *pIn = p1;
q31_t *pOut = p1;
uint32_t length = S->fftLen;
PQ_SET_FFT_Q31_CONFIG;
if (ifftFlag == 1U)
{
PQ_TransformIFFT(POWERQUAD, length, pIn, pOut);
}
else
{
PQ_TransformCFFT(POWERQUAD, length, pIn, pOut);
}
PQ_WaitDone(POWERQUAD);
}
void arm_cfft_q15(const arm_cfft_instance_q15 *S, q15_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag)
{
assert(bitReverseFlag == 1U);
q15_t *pIn = p1;
q15_t *pOut = p1;
uint32_t length = S->fftLen;
PQ_SET_FFT_Q15_CONFIG;
if (ifftFlag == 1U)
{
PQ_TransformIFFT(POWERQUAD, length, pIn, pOut);
}
else
{
PQ_TransformCFFT(POWERQUAD, length, pIn, pOut);
}
PQ_WaitDone(POWERQUAD);
}
arm_status arm_rfft_init_q31(arm_rfft_instance_q31 *S, uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag)
{
/* Only supprt such mode. */
assert(ifftFlagR == 0U);
assert(bitReverseFlag == 1U);
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the Real FFT length */
S->fftLenReal = (uint16_t)fftLenReal;
/* Initialize the Flag for selection of RFFT or RIFFT */
S->ifftFlagR = (uint8_t)ifftFlagR;
/* Initialize the Flag for calculation Bit reversal or not */
S->bitReverseFlagR = (uint8_t)bitReverseFlag;
/* return the status of RFFT Init function */
return (status);
}
void arm_rfft_q31(const arm_rfft_instance_q31 *S, q31_t *pSrc, q31_t *pDst)
{
uint32_t length = S->fftLenReal;
PQ_SET_FFT_Q31_CONFIG;
/* Calculation of RFFT of input */
PQ_TransformRFFT(POWERQUAD, length, pSrc, pDst);
PQ_WaitDone(POWERQUAD);
}
arm_status arm_rfft_init_q15(arm_rfft_instance_q15 *S, uint32_t fftLenReal, uint32_t ifftFlagR, uint32_t bitReverseFlag)
{
/* Only supprt such mode. */
assert(ifftFlagR == 0U);
assert(bitReverseFlag == 1U);
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the Real FFT length */
S->fftLenReal = (uint16_t)fftLenReal;
/* Initialize the Flag for selection of RFFT or RIFFT */
S->ifftFlagR = (uint8_t)ifftFlagR;
/* Initialize the Flag for calculation Bit reversal or not */
S->bitReverseFlagR = (uint8_t)bitReverseFlag;
/* return the status of RFFT Init function */
return (status);
}
void arm_rfft_q15(const arm_rfft_instance_q15 *S, q15_t *pSrc, q15_t *pDst)
{
uint32_t length = S->fftLenReal;
PQ_SET_FFT_Q15_CONFIG;
/* Calculation of RFFT of input */
PQ_TransformRFFT(POWERQUAD, length, pSrc, pDst);
PQ_WaitDone(POWERQUAD);
}
arm_status arm_dct4_init_q31(arm_dct4_instance_q31 *S,
arm_rfft_instance_q31 *S_RFFT,
arm_cfft_radix4_instance_q31 *S_CFFT,
uint16_t N,
uint16_t Nby2,
q31_t normalize)
{
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the DCT4 length */
S->N = N;
/* Initialize Real FFT Instance */
S->pRfft = S_RFFT;
switch (N)
{
/* Initialize the table modifier values */
case 512U:
S->pTwiddle = dct512_twiddle;
S->pCosFactor = dct512_cosFactor;
break;
case 256U:
S->pTwiddle = dct256_twiddle;
S->pCosFactor = dct256_cosFactor;
break;
case 128U:
S->pTwiddle = dct128_twiddle;
S->pCosFactor = dct128_cosFactor;
break;
case 64U:
S->pTwiddle = dct64_twiddle;
S->pCosFactor = dct64_cosFactor;
break;
case 32U:
S->pTwiddle = dct32_twiddle;
S->pCosFactor = dct32_cosFactor;
break;
case 16U:
S->pTwiddle = dct16_twiddle;
S->pCosFactor = dct16_cosFactor;
break;
default:
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
if (ARM_MATH_SUCCESS == status)
{
/* Initialize the RFFT/RIFFT Function */
status = arm_rfft_init_q31(S->pRfft, S->N, 0, 1);
}
return status;
}
void arm_dct4_q31(const arm_dct4_instance_q31 *S, q31_t *pState, q31_t *pInlineBuffer)
{
/* Calculate DCT-II for N-point input */
uint16_t i; /* Loop counter */
const q31_t *weights; /* Pointer to the Weights table */
q31_t *pOut; /* Temporary pointers for output buffer */
q31_t *pS1, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q31_t in; /* Temporary variable */
const q31_t *cosFact;
uint32_t length;
uint8_t matRow;
uint8_t matCol;
uint8_t matLoop;
uint16_t lenPerMatLoop;
/*
* Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*N))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
*/
/*
* Use matrix production function for preprocessing. Matrix production
* supports 16x16 at the most, so the matrix row is set to 16.
*/
matRow = 16U;
lenPerMatLoop = S->N >= 256U ? 256U : S->N;
matCol = (uint8_t)(lenPerMatLoop / 16U);
matLoop = (uint8_t)(((S->N - 1U) >> 8U) + 1U);
cosFact = S->pCosFactor;
pbuff = pInlineBuffer;
length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
while ((matLoop--) != 0U)
{
PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
/* cos factor is Q31, convert to float */
PQ_MatrixScale(POWERQUAD, length, 2.0f / 2147483648.0f, cosFact, (void *)(uint32_t *)0xE0000000U);
cosFact = &cosFact[lenPerMatLoop];
PQ_WaitDone(POWERQUAD);
/* Product. */
PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
PQ_MatrixProduct(POWERQUAD, length, pbuff, (void *)(uint32_t *)0xE0000000U, pbuff);
pbuff = &pbuff[lenPerMatLoop];
PQ_WaitDone(POWERQUAD);
}
PQ_SET_FFT_Q31_CONFIG;
PQ_TransformRDCT(POWERQUAD, S->N, pInlineBuffer, pState);
/* ARM calculation while PQ is running. */
/*
* Use matrix production function for twiddle multiplication.
* Matrix production supports 16x16 at the most. The total elements are 2*N;
*/
lenPerMatLoop = S->N >= 128U ? 128U : S->N;
matCol = (uint8_t)(lenPerMatLoop / 8U);
matLoop = (uint8_t)(((S->N - 1U) >> 7U) + 1U);
weights = S->pTwiddle;
pOut = pState;
length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
PQ_WaitDone(POWERQUAD);
while ((matLoop--) != 0U)
{
PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
/* Downscale by 1024 * 1024 * 16, because the twiddle are multiplied by 1024 * 1024 * 16. */
PQ_MatrixScale(POWERQUAD, length, 1.0f / 16777216.0f, weights, (void *)(uint32_t *)0xE0000000U);
weights = &weights[lenPerMatLoop * 2U];
PQ_WaitDone(POWERQUAD);
/* Product. */
PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
PQ_MatrixProduct(POWERQUAD, length, pOut, (void *)(uint32_t *)0xE0000000U, pOut);
PQ_WaitDone(POWERQUAD);
for (i = 0; i < lenPerMatLoop / 4U; i++)
{
pOut[0] -= pOut[1];
pOut = &pOut[2];
pOut[0] -= pOut[1];
pOut = &pOut[2];
pOut[0] -= pOut[1];
pOut = &pOut[2];
pOut[0] -= pOut[1];
pOut = &pOut[2];
}
}
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2. Considering the DCT II normalize, here divided by sqrt(2).*/
in = (q31_t)(float)((float)*pS1 / 1.41421356237f);
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
i = S->N / 4U - 1U;
while (i > 0U)
{
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
i--;
}
}
arm_status arm_dct4_init_q15(arm_dct4_instance_q15 *S,
arm_rfft_instance_q15 *S_RFFT,
arm_cfft_radix4_instance_q15 *S_CFFT,
uint16_t N,
uint16_t Nby2,
q15_t normalize)
{
arm_status status = ARM_MATH_SUCCESS;
/* Initialize the DCT4 length */
S->N = N;
/* Initialize Real FFT Instance */
S->pRfft = S_RFFT;
switch (N)
{
/* Initialize the table modifier values */
case 512U:
S->pTwiddle = (void *)dct512_twiddle;
S->pCosFactor = (void *)dct512_cosFactor;
break;
case 256U:
S->pTwiddle = (void *)dct256_twiddle;
S->pCosFactor = (void *)dct256_cosFactor;
break;
case 128U:
S->pTwiddle = (void *)dct128_twiddle;
S->pCosFactor = (void *)dct128_cosFactor;
break;
case 64U:
S->pTwiddle = (void *)dct64_twiddle;
S->pCosFactor = (void *)dct64_cosFactor;
break;
case 32U:
S->pTwiddle = (void *)dct32_twiddle;
S->pCosFactor = (void *)dct32_cosFactor;
break;
case 16U:
S->pTwiddle = (void *)dct16_twiddle;
S->pCosFactor = (void *)dct16_cosFactor;
break;
default:
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
if (ARM_MATH_SUCCESS == status)
{
/* Initialize the RFFT/RIFFT Function */
status = arm_rfft_init_q15(S->pRfft, S->N, 0, 1);
}
/* return the status of DCT4 Init function */
return status;
}
void arm_dct4_q15(const arm_dct4_instance_q15 *S, q15_t *pState, q15_t *pInlineBuffer)
{
/* Calculate DCT-II for N-point input */
uint16_t i; /* Loop counter */
const q15_t *weights; /* Pointer to the Weights table */
q15_t *pOut; /* Temporary pointers for output buffer */
q15_t *pS1, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q15_t in; /* Temporary variable */
const q15_t *cosFact;
uint32_t length;
uint8_t matRow;
uint8_t matCol;
uint8_t matLoop;
uint16_t lenPerMatLoop;
/*
* Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*N))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
*/
/*
* Use matrix production function for preprocessing. Matrix production
* supports 16x16 at the most, so the matrix row is set to 16.
*/
matRow = 16U;
lenPerMatLoop = S->N >= 256U ? 256U : S->N;
matCol = (uint8_t)(lenPerMatLoop / 16U);
matLoop = (uint8_t)(((S->N - 1U) >> 8U) + 1U);
cosFact = S->pCosFactor;
pbuff = pInlineBuffer;
length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, 0);
while ((matLoop--) != 0U)
{
PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
/* cos factor is Q31, convert to float */
PQ_MatrixScale(POWERQUAD, length, 2.0f / 2147483648.0f, cosFact, (void *)(uint32_t *)0xE0000000U);
cosFact = &cosFact[2U * lenPerMatLoop];
PQ_WaitDone(POWERQUAD);
/* Product. */
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
POWERQUAD->TMPBASE = 0xE0000000U;
PQ_MatrixProduct(POWERQUAD, length, pbuff, (void *)(uint32_t *)0xE0000000U, pbuff);
PQ_WaitDone(POWERQUAD);
pbuff = &pbuff[lenPerMatLoop];
}
PQ_SET_FFT_Q15_CONFIG;
PQ_TransformRDCT(POWERQUAD, S->N, pInlineBuffer, pState);
/* ARM calculation while PQ is running. */
/*
* Use matrix production function for twiddle multiplication.
* Matrix production supports 16x16 at the most. The total elements are 2*N;
*/
lenPerMatLoop = S->N >= 128U ? 128U : S->N;
matCol = (uint8_t)(lenPerMatLoop / 8U);
matLoop = (uint8_t)(((S->N - 1U) >> 7U) + 1U);
weights = S->pTwiddle;
pOut = pState;
length = POWERQUAD_MAKE_MATRIX_LEN(matRow, matCol, matCol);
PQ_WaitDone(POWERQUAD);
while ((matLoop--) != 0U)
{
PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
/* Downscale by 1024 * 1024 * 16, because the twiddle are multiplied by 1024 * 1024 * 16. */
PQ_MatrixScale(POWERQUAD, length, 1.0f / 16777216.0f, weights, (void *)(uint32_t *)0xE0000000U);
weights = &weights[lenPerMatLoop * 2U];
PQ_WaitDone(POWERQUAD);
/* Product. */
POWERQUAD->OUTFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
POWERQUAD->INAFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_16Bit << 4U) | (uint32_t)kPQ_Float;
POWERQUAD->INBFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
POWERQUAD->TMPFORMAT = ((uint32_t)(0) << 8U) | ((uint32_t)kPQ_Float << 4U) | (uint32_t)kPQ_Float;
POWERQUAD->TMPBASE = 0xE0000000U;
PQ_MatrixProduct(POWERQUAD, length, pOut, (void *)(uint32_t *)0xE0000000U, pOut);
PQ_WaitDone(POWERQUAD);
for (i = 0; i < lenPerMatLoop / 4U; i++)
{
pOut[0] -= pOut[1];
pOut = &pOut[2];
pOut[0] -= pOut[1];
pOut = &pOut[2];
pOut[0] -= pOut[1];
pOut = &pOut[2];
pOut[0] -= pOut[1];
pOut = &pOut[2];
}
}
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2. Considering the DCT II normalize, here divided by sqrt(2).*/
in = (q15_t)(float)((float)*pS1 / 1.41421356237f);
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
i = S->N / 4U - 1U;
while (i > 0U)
{
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
in = *pS1 - in;
*pbuff++ = in;
pS1 = &pS1[2];
i--;
}
}
void arm_fir_init_f32(
arm_fir_instance_f32 *S, uint16_t numTaps, const float32_t *pCoeffs, float32_t *pState, uint32_t blockSize)
{
uint32_t i;
/*
* CMSIS DSP API filter coefficients stored in time reversed order, but PQ
* uses the positive order. PQ does not use pState, so pState pState[1:numTaps]
* is used here to save the coefficients in positive order. At the same time,
* pState[0] is used to save the offset used for incremetal calculation.
* Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
* the blockSize should be larger than 1.
*/
assert(blockSize > 1U);
S->numTaps = numTaps;
S->pCoeffs = pCoeffs;
S->pState = pState;
for (i = 0U; i < numTaps; i++)
{
pState[numTaps - i] = pCoeffs[i];
}
*(uint32_t *)(void *)pState = 0U;
}
void arm_fir_init_q31(
arm_fir_instance_q31 *S, uint16_t numTaps, const q31_t *pCoeffs, q31_t *pState, uint32_t blockSize)
{
uint32_t i;
/*
* CMSIS DSP API filter coefficients stored in time reversed order, but PQ
* uses the positive order. PQ does not use pState, so pState pState[1:numTaps]
* is used here to save the coefficients in positive order. At the same time,
* pState[0] is used to save the offset used for incremetal calculation.
* Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
* the blockSize should be larger than 1.
*/
assert(blockSize > 1U);
S->numTaps = numTaps;
S->pCoeffs = pCoeffs;
S->pState = pState;
for (i = 0U; i < numTaps; i++)
{
pState[numTaps - i] = pCoeffs[i];
}
pState[0] = 0;
}
arm_status arm_fir_init_q15(
arm_fir_instance_q15 *S, uint16_t numTaps, const q15_t *pCoeffs, q15_t *pState, uint32_t blockSize)
{
uint16_t i;
/*
* CMSIS DSP API filter coefficients stored in time reversed order, but PQ
* uses the positive order. PQ does not use pState, so pState pState[2:numTaps]
* is used here to save the coefficients in positive order. At the same time,
* pState[0:1] is used to save the offset used for incremetal calculation.
* Because the length of pState is (numTaps + blockSize -1), to ensure enough space,
* the blockSize should be larger than 2.
*/
assert(blockSize > 2U);
S->numTaps = numTaps;
S->pCoeffs = pCoeffs;
S->pState = pState;
for (i = 0U; i < numTaps; i++)
{
pState[numTaps + 1U - i] = pCoeffs[i];
}
*(uint32_t *)(void *)pState = 0U;
return ARM_MATH_SUCCESS;
}
/**
* brief Processing function for the floating-point FIR filter.
* param[in] S points to an instance of the floating-point FIR structure.
* param[in] pSrc points to the block of input data.
* param[out] pDst points to the block of output data.
* param[in] blockSize number of samples to process.
*
* Note: Powerquad has a hardware limitation, when using it for FIR increment calculation, the address of pSrc needs to
* be a continuous address.
*/
void arm_fir_f32(const arm_fir_instance_f32 *S, const float32_t *pSrc, float32_t *pDst, uint32_t blockSize)
{
assert(NULL != S);
assert(NULL != pSrc);
assert(NULL != pDst);
uint32_t curOffset;
PQ_SET_F32_CONFIG;
curOffset = *(uint32_t *)(void *)(S->pState);
if (curOffset == 0U)
{
PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[1]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
}
else
{
_arm_fir_increment(pSrc, blockSize, &S->pState[1], S->numTaps, pDst, curOffset, sizeof(*pSrc));
}
*(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
PQ_WaitDone(POWERQUAD);
}
void arm_fir_q31(const arm_fir_instance_q31 *S, const q31_t *pSrc, q31_t *pDst, uint32_t blockSize)
{
assert(NULL != S);
assert(NULL != pSrc);
assert(NULL != pDst);
uint32_t curOffset;
PQ_SET_Q31_CONFIG;
curOffset = *(uint32_t *)(void *)(S->pState);
if (curOffset == 0U)
{
PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[1]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
}
else
{
_arm_fir_increment(pSrc, blockSize, &S->pState[1], S->numTaps, pDst, curOffset, sizeof(*pSrc));
}
*(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
PQ_WaitDone(POWERQUAD);
}
void arm_fir_q15(const arm_fir_instance_q15 *S, const q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
{
assert(NULL != S);
assert(NULL != pSrc);
assert(NULL != pDst);
uint32_t curOffset;
PQ_SET_Q15_CONFIG;
curOffset = *(uint32_t *)(void *)(S->pState);
if (curOffset == 0U)
{
PQ_FIR(POWERQUAD, pSrc, (int32_t)blockSize, &(S->pState[2]), (int32_t)S->numTaps, pDst, PQ_FIR_FIR);
}
else
{
_arm_fir_increment(pSrc, blockSize, &S->pState[2], S->numTaps, pDst, curOffset, sizeof(*pSrc));
}
*(uint32_t *)(void *)(S->pState) = curOffset + blockSize;
PQ_WaitDone(POWERQUAD);
}
void arm_conv_f32(const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB, uint32_t srcBLen, float32_t *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
PQ_SET_F32_CONFIG;
PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
PQ_WaitDone(POWERQUAD);
}
void arm_conv_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
PQ_SET_Q31_CONFIG;
PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
PQ_WaitDone(POWERQUAD);
}
void arm_conv_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
PQ_SET_Q15_CONFIG;
PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CONVOLUTION);
PQ_WaitDone(POWERQUAD);
}
void arm_correlate_f32(
const float32_t *pSrcA, uint32_t srcALen, const float32_t *pSrcB, uint32_t srcBLen, float32_t *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
PQ_SET_F32_CONFIG;
PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
PQ_WaitDone(POWERQUAD);
}
void arm_correlate_q31(const q31_t *pSrcA, uint32_t srcALen, const q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
PQ_SET_Q31_CONFIG;
PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
PQ_WaitDone(POWERQUAD);
}
void arm_correlate_q15(const q15_t *pSrcA, uint32_t srcALen, const q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
PQ_SET_Q15_CONFIG;
PQ_FIR(POWERQUAD, pSrcA, (int32_t)srcALen, pSrcB, (int32_t)srcBLen, pDst, PQ_FIR_CORRELATION);
PQ_WaitDone(POWERQUAD);
}
void arm_mat_init_f32(arm_matrix_instance_f32 *S, uint16_t nRows, uint16_t nColumns, float32_t *pData)
{
/* Assign Number of Rows */
S->numRows = nRows;
/* Assign Number of Columns */
S->numCols = nColumns;
/* Assign Data pointer */
S->pData = pData;
}
void arm_mat_init_q31(arm_matrix_instance_q31 *S, uint16_t nRows, uint16_t nColumns, q31_t *pData)
{
/* Assign Number of Rows */
S->numRows = nRows;
/* Assign Number of Columns */
S->numCols = nColumns;
/* Assign Data pointer */
S->pData = pData;
}
void arm_mat_init_q15(arm_matrix_instance_q15 *S, uint16_t nRows, uint16_t nColumns, q15_t *pData)
{
/* Assign Number of Rows */
S->numRows = nRows;
/* Assign Number of Columns */
S->numCols = nColumns;
/* Assign Data pointer */
S->pData = pData;
}
arm_status arm_mat_add_f32(const arm_matrix_instance_f32 *pSrcA,
const arm_matrix_instance_f32 *pSrcB,
arm_matrix_instance_f32 *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_F32_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_add_q31(const arm_matrix_instance_q31 *pSrcA,
const arm_matrix_instance_q31 *pSrcB,
arm_matrix_instance_q31 *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_FIX32_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_add_q15(const arm_matrix_instance_q15 *pSrcA,
const arm_matrix_instance_q15 *pSrcB,
arm_matrix_instance_q15 *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_FIX16_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_MatrixAddition(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_sub_f32(const arm_matrix_instance_f32 *pSrcA,
const arm_matrix_instance_f32 *pSrcB,
arm_matrix_instance_f32 *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_F32_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_sub_q31(const arm_matrix_instance_q31 *pSrcA,
const arm_matrix_instance_q31 *pSrcB,
arm_matrix_instance_q31 *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_FIX32_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_sub_q15(const arm_matrix_instance_q15 *pSrcA,
const arm_matrix_instance_q15 *pSrcB,
arm_matrix_instance_q15 *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_FIX16_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_MatrixSubtraction(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_mult_f32(const arm_matrix_instance_f32 *pSrcA,
const arm_matrix_instance_f32 *pSrcB,
arm_matrix_instance_f32 *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_F32_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, pSrcB->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_mult_q31(const arm_matrix_instance_q31 *pSrcA,
const arm_matrix_instance_q31 *pSrcB,
arm_matrix_instance_q31 *pDst)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
/*
* The output prescale does not supprt negative value due to hardware issue,
* workaround:
* 1. Downscale the matrix B and save the float output value to private memory.
* 2. Multiply the float matrix B in private memory with matrix A, output as Q31.
* Note: Put matrix B in private memory is faster.
*/
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcB->numRows, pSrcB->numCols, 0U);
PQ_SET_MAT_FIX32_WORKAROUND_SCALE_CONFIG;
/* Downscale. */
PQ_MatrixScale(POWERQUAD, length, 1.0f / 2147483648.0f, pSrcB->pData, (void *)(uint32_t *)0xE0000000U);
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_WaitDone(POWERQUAD);
PQ_SET_MAT_FIX32_WORKAROUND_MULT_CONFIG;
PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, (void *)(uint32_t *)0xE0000000U, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_mult_q15(const arm_matrix_instance_q15 *pSrcA,
const arm_matrix_instance_q15 *pSrcB,
arm_matrix_instance_q15 *pDst,
q15_t *pState)
{
assert(NULL != pSrcA);
assert(NULL != pSrcB);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrcA->numRows != pSrcB->numRows) || (pSrcA->numCols != pSrcB->numCols) || (pSrcA->numRows != pDst->numRows) ||
(pSrcA->numCols != pDst->numCols))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcB->numRows, pSrcB->numCols, 0U);
PQ_SET_MAT_FIX16_WORKAROUND_SCALE_CONFIG;
/* Downscale. */
PQ_MatrixScale(POWERQUAD, length, 1.0f / 32768.0f, pSrcB->pData, (void *)(uint32_t *)0xE0000000U);
length = POWERQUAD_MAKE_MATRIX_LEN(pSrcA->numRows, pSrcA->numCols, pSrcB->numCols);
PQ_WaitDone(POWERQUAD);
PQ_SET_MAT_FIX16_WORKAROUND_MULT_CONFIG;
PQ_MatrixMultiplication(POWERQUAD, length, pSrcA->pData, (void *)(uint32_t *)0xE0000000U, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_inverse_f32(const arm_matrix_instance_f32 *src, arm_matrix_instance_f32 *dst)
{
assert(NULL != src);
assert(NULL != dst);
arm_status status;
uint32_t length;
float tmp[1024];
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((src->numRows != src->numCols) || (dst->numRows != dst->numCols) || (src->numRows != dst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_F32_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(src->numRows, src->numCols, src->numRows);
PQ_MatrixInversion(POWERQUAD, length, src->pData, tmp, dst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_trans_f32(const arm_matrix_instance_f32 *pSrc, arm_matrix_instance_f32 *pDst)
{
assert(NULL != pSrc);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
PQ_SetFormat(POWERQUAD, kPQ_CP_MTX, kPQ_Float);
PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_trans_q31(const arm_matrix_instance_q31 *pSrc, arm_matrix_instance_q31 *pDst)
{
assert(NULL != pSrc);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_FIX32_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_trans_q15(const arm_matrix_instance_q15 *pSrc, arm_matrix_instance_q15 *pDst)
{
assert(NULL != pSrc);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_FIX16_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
PQ_MatrixTranspose(POWERQUAD, length, pSrc->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_scale_f32(const arm_matrix_instance_f32 *pSrc, float32_t scale, arm_matrix_instance_f32 *pDst)
{
assert(NULL != pSrc);
assert(NULL != pDst);
arm_status status;
uint32_t length;
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
PQ_SET_F32_CONFIG;
length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
PQ_MatrixScale(POWERQUAD, length, scale, pSrc->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_scale_q31(const arm_matrix_instance_q31 *pSrc,
q31_t scaleFract,
int32_t shift,
arm_matrix_instance_q31 *pDst)
{
assert(NULL != pSrc);
assert(NULL != pDst);
arm_status status;
uint32_t length;
float scaleFloat;
pq_config_t config = {
kPQ_32Bit, /* inputAFormat */
0, /* inputAPrescale */
kPQ_32Bit, /* inputBFormat */
0, /* inputBPrescale */
kPQ_32Bit, /* outputFormat */
(int8_t)shift, /* outputPrescale */
kPQ_Float, /* tmpFormat */
0, /* tmpPrescale */
kPQ_Float, /* machineFormat */
(uint32_t *)0xe0000000U, /* tmpBase */
};
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
scaleFloat = PQ_Q31_2_FLOAT(scaleFract);
PQ_SetConfig(POWERQUAD, &config);
PQ_MatrixScale(POWERQUAD, length, scaleFloat, pSrc->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}
arm_status arm_mat_scale_q15(const arm_matrix_instance_q15 *pSrc,
q15_t scaleFract,
int32_t shift,
arm_matrix_instance_q15 *pDst)
{
assert(NULL != pSrc);
assert(NULL != pDst);
arm_status status;
uint32_t length;
float scaleFloat;
pq_config_t config = {
kPQ_16Bit, /* inputAFormat */
0, /* inputAPrescale */
kPQ_16Bit, /* inputBFormat */
0, /* inputBPrescale */
kPQ_16Bit, /* outputFormat */
(int8_t)shift, /* outputPrescale */
kPQ_Float, /* tmpFormat */
0, /* tmpPrescale */
kPQ_Float, /* machineFormat */
(uint32_t *)0xe0000000U, /* tmpBase */
};
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) || (pSrc->numRows != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif
{
length = POWERQUAD_MAKE_MATRIX_LEN(pSrc->numRows, pSrc->numCols, 0U);
scaleFloat = PQ_Q15_2_FLOAT(scaleFract);
PQ_SetConfig(POWERQUAD, &config);
PQ_MatrixScale(POWERQUAD, length, scaleFloat, pSrc->pData, pDst->pData);
/* Wait for the completion */
PQ_WaitDone(POWERQUAD);
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return status;
}