File ns_mfcc.c
File List > neuralSPOT > neuralspot > ns-audio > src > ns_mfcc.c
Go to the documentation of this file
/*
* MFCC utlities - based on the original C++ version by Arm
* this code has been refactored into C with no malloc()
*
* Copyright (C) 2018 Arm Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Description: MFCC feature extraction to match with TensorFlow MFCC Op
*/
#include "am_bsp.h"
#include "am_mcu_apollo.h"
#include "am_util.h"
#include "float.h"
#include "ns_audio_features_common.h"
#include "ns_audio_mfcc.h"
#include "ns_core.h"
const ns_core_api_t ns_mfcc_V0_0_1 = {.apiId = NS_MFCC_API_ID, .version = NS_MFCC_V0_0_1};
const ns_core_api_t ns_mfcc_V1_0_0 = {.apiId = NS_MFCC_API_ID, .version = NS_MFCC_V1_0_0};
const ns_core_api_t ns_mfcc_oldest_supported_version = {
.apiId = NS_MFCC_API_ID, .version = NS_MFCC_V0_0_1};
const ns_core_api_t ns_mfcc_current_version = {.apiId = NS_MFCC_API_ID, .version = NS_MFCC_V1_0_0};
// float g_mfccFrame[MFCC_FRAME_LEN_POW2];
// float g_mfccBuffer[MFCC_FRAME_LEN_POW2];
// float g_mfccEnergies[MFCC_NUM_FBANK_BINS];
// float g_mfccWindowFunction[MFCC_FRAME_LEN];
// float g_mfccDCTMatrix[MFCC_NUM_FBANK_BINS * MFCC_NUM_MFCC_FEATURES];
// WARNING this is some hacky, finicky pointer math. Sizes and order have to match
// Fiddle with this at your own peril
static void ns_mfcc_map_arena(ns_mfcc_cfg_t *cfg) {
cfg->mfccFrame = (float *)cfg->arena;
cfg->mfccDCTMatrix = (float *)(cfg->mfccFrame + cfg->frame_len_pow2 * sizeof(float));
cfg->mfccBuffer =
(float *)(cfg->mfccDCTMatrix + cfg->num_fbank_bins * cfg->num_coeffs * sizeof(float));
cfg->mfccEnergies = (float *)(cfg->mfccBuffer + cfg->frame_len_pow2 * sizeof(float));
cfg->mfccWindowFunction = (float *)(cfg->mfccEnergies + cfg->num_fbank_bins * sizeof(float));
}
// --------------------
#ifdef MFCC_DEBUG
float g_audioMax;
float g_audioMin;
float g_audioSum;
float g_audioMaxInt;
float g_audioMinInt;
float g_audioSumInt;
#endif
arm_rfft_fast_instance_f32 g_mfccRfft;
static void create_dct_matrix(ns_mfcc_cfg_t *cfg, int32_t input_length, int32_t coefficient_count) {
int32_t k, n;
float normalizer;
arm_sqrt_f32(2.0 / (float)input_length, &normalizer);
for (k = 0; k < coefficient_count; k++) {
for (n = 0; n < input_length; n++) {
cfg->mfccDCTMatrix[k * input_length + n] =
normalizer * cos(((double)M_PI) / input_length * (n + 0.5) * k);
}
}
}
uint32_t ns_mfcc_init(ns_mfcc_cfg_t *c) {
int i;
#ifndef NS_DISABLE_API_VALIDATION
if (c == NULL) {
return NS_STATUS_INVALID_HANDLE;
}
if (ns_core_check_api(c->api, &ns_mfcc_oldest_supported_version, &ns_mfcc_current_version)) {
return NS_STATUS_INVALID_VERSION;
}
#endif
ns_mfcc_map_arena(c);
c->fbc.arena_fbanks = (uint8_t *)(c->mfccWindowFunction) + c->frame_len * sizeof(float);
c->fbc.sample_frequency = c->sample_frequency;
c->fbc.num_fbank_bins = c->num_fbank_bins;
c->fbc.low_freq = c->low_freq;
c->fbc.high_freq = c->high_freq;
c->fbc.frame_len_pow2 = c->frame_len_pow2;
ns_fbanks_init(&(c->fbc));
for (i = 0; i < c->frame_len; i++) {
c->mfccWindowFunction[i] = 0.5 - 0.5 * cos(M_2PI * ((float)i) / (c->frame_len));
}
create_dct_matrix(c, c->num_fbank_bins, c->num_coeffs);
arm_rfft_fast_init_f32(&g_mfccRfft, c->frame_len_pow2);
return NS_STATUS_SUCCESS;
}
uint32_t ns_mfcc_compute(ns_mfcc_cfg_t *cfg, const int16_t *audio_data, float *mfcc_out) {
int32_t i, j, bin;
// TensorFlow way of normalizing int16_t data to (-1,1)
for (i = 0; i < cfg->frame_len; i++) {
cfg->mfccFrame[i] = ((float)audio_data[i] / (1 << 15)) * cfg->mfccWindowFunction[i];
}
// Fill up remaining with zeros
memset(
&(cfg->mfccFrame[cfg->frame_len]), 0,
sizeof(float) * (cfg->frame_len_pow2 - cfg->frame_len));
// Compute FFT
arm_rfft_fast_f32(&g_mfccRfft, cfg->mfccFrame, cfg->mfccBuffer, 0);
// Convert to power spectrum
// frame is stored as [real0, realN/2-1, real1, im1, real2, im2, ...]
int32_t half_dim = cfg->frame_len_pow2 / 2;
float first_energy = cfg->mfccBuffer[0] * cfg->mfccBuffer[0],
last_energy = cfg->mfccBuffer[1] * cfg->mfccBuffer[1]; // handle this special case
for (i = 1; i < half_dim; i++) {
float real = cfg->mfccBuffer[i * 2];
float im = cfg->mfccBuffer[i * 2 + 1];
cfg->mfccBuffer[i] = real * real + im * im;
}
cfg->mfccBuffer[0] = first_energy;
cfg->mfccBuffer[half_dim] = last_energy;
float sqrt_data;
// Apply mel filterbanks
for (bin = 0; bin < cfg->num_fbank_bins; bin++) {
j = 0;
float mel_energy = 0;
int32_t first_index = cfg->fbc.mfccFbankFirst[bin];
int32_t last_index = cfg->fbc.mfccFbankLast[bin];
for (i = first_index; i <= last_index; i++) {
arm_sqrt_f32(cfg->mfccBuffer[i], &sqrt_data);
mel_energy += (sqrt_data) * (*(cfg->fbc.melFBank))[bin][j++];
}
cfg->mfccEnergies[bin] = mel_energy;
// avoid log of zero
if (mel_energy == 0.0)
cfg->mfccEnergies[bin] = FLT_MIN;
}
// Take log
for (bin = 0; bin < cfg->num_fbank_bins; bin++)
cfg->mfccEnergies[bin] = logf(cfg->mfccEnergies[bin]);
// Take DCT. Uses matrix mul.
for (i = 0; i < cfg->num_coeffs; i++) {
float sum = 0.0;
for (j = 0; j < cfg->num_fbank_bins; j++) {
sum += cfg->mfccDCTMatrix[i * cfg->num_fbank_bins + j] * cfg->mfccEnergies[j];
}
sum *= (0x1 << cfg->num_dec_bits);
sum = round(sum);
// This is usually done after dequantization anyway, so preserve accuracy
// if(sum >= 127)
// mfcc_out[i] = 127;
// else if(sum <= -128)
// mfcc_out[i] = -128;
// else
mfcc_out[i] = sum;
}
return NS_STATUS_SUCCESS;
}