File ns_perf_profile.c
File List > neuralSPOT > neuralspot > ns-utils > src > ns_perf_profile.c
Go to the documentation of this file
// #if defined(AM_PART_APOLLO3) || defined(AM_PART_APOLLO3P)
// // AP3TODO
// #else
#include "ns_perf_profile.h"
#include "ns_ambiqsuite_harness.h"
#include "ns_core.h"
uint8_t ns_cache_profiler_init(ns_cache_config_t *cfg) {
uint8_t status = AM_HAL_STATUS_SUCCESS;
// RX doesnt have cachectrl yet.
// #ifndef AM_PART_APOLLO5A
#if !defined(AM_PART_APOLLO5A) && !defined(AM_PART_APOLLO5B)
char dummy = 0;
if (cfg->enable) {
am_hal_cachectrl_control(AM_HAL_CACHECTRL_CONTROL_MONITOR_ENABLE, (void *)&dummy);
} else {
am_hal_cachectrl_control(AM_HAL_CACHECTRL_CONTROL_MONITOR_DISABLE, (void *)&dummy);
}
#endif
return status;
}
void ns_capture_cache_stats(ns_cache_dump_t *dump) {
#if defined(AM_PART_APOLLO3) || defined(AM_PART_APOLLO3P)
dump->daccess = CACHECTRL->DMON0;
dump->dtaglookup = CACHECTRL->DMON1;
dump->dhitslookup = CACHECTRL->DMON2;
dump->dhitsline = CACHECTRL->DMON3;
dump->iaccess = CACHECTRL->IMON0;
dump->itaglookup = CACHECTRL->IMON1;
dump->ihitslookup = CACHECTRL->IMON2;
dump->ihitsline = CACHECTRL->IMON3;
#elif defined(AM_PART_APOLLO4B) || defined(AM_PART_APOLLO4P) || defined(AM_PART_APOLLO4L)
dump->daccess = CPU->DMON0;
dump->dtaglookup = CPU->DMON1;
dump->dhitslookup = CPU->DMON2;
dump->dhitsline = CPU->DMON3;
dump->iaccess = CPU->IMON0;
dump->itaglookup = CPU->IMON1;
dump->ihitslookup = CPU->IMON2;
dump->ihitsline = CPU->IMON3;
#endif
}
void ns_delta_cache(ns_cache_dump_t *s, ns_cache_dump_t *e, ns_cache_dump_t *d) {
d->daccess = e->daccess - s->daccess;
d->dtaglookup = e->dtaglookup - s->dtaglookup;
d->dhitslookup = e->dhitslookup - s->dhitslookup;
d->dhitsline = e->dhitsline - s->dhitsline;
d->iaccess = e->iaccess - s->iaccess;
d->itaglookup = e->itaglookup - s->itaglookup;
d->ihitslookup = e->ihitslookup - s->ihitslookup;
d->ihitsline = e->ihitsline - s->ihitsline;
}
void ns_print_cache_stats(ns_cache_dump_t *dump) {
ns_lp_printf("****** Dcache Accesses : %d\r\n", dump->daccess);
ns_lp_printf("****** Dcache Tag Lookups : %d\r\n", dump->dtaglookup);
ns_lp_printf("****** Dcache hits for lookups : %d\r\n", dump->dhitslookup);
ns_lp_printf("****** Dcache hits for lines : %d\r\n", dump->dhitsline);
ns_lp_printf("****** Icache Accesses : %d\r\n", dump->iaccess);
ns_lp_printf("****** Icache Tag Lookups : %d\r\n", dump->itaglookup);
ns_lp_printf("****** Icache hits for lookups : %d\r\n", dump->ihitslookup);
ns_lp_printf("****** Icache hits for lines : %d\r\n", dump->ihitsline);
}
void ns_print_cache_stats_delta(ns_cache_dump_t *start, ns_cache_dump_t *end) {
ns_lp_printf("****** Delta Dcache Accesses : %d\r\n", end->daccess - start->daccess);
ns_lp_printf(
"****** Delta Dcache Tag Lookups : %d\r\n", end->dtaglookup - start->dtaglookup);
ns_lp_printf(
"****** Delta Dcache hits for lookups : %d\r\n", end->dhitslookup - start->dhitslookup);
ns_lp_printf(
"****** Delta Dcache hits for lines : %d\r\n", end->dhitsline - start->dhitsline);
ns_lp_printf("****** Delta Icache Accesses : %d\r\n", end->iaccess - start->iaccess);
ns_lp_printf(
"****** Delta Icache Tag Lookups : %d\r\n", end->itaglookup - start->itaglookup);
ns_lp_printf(
"****** Delta Icache hits for lookups : %d\r\n", end->ihitslookup - start->ihitslookup);
ns_lp_printf(
"****** Delta Icache hits for lines : %d\r\n", end->ihitsline - start->ihitsline);
}
void ns_reset_perf_counters(void) {
DWT->CYCCNT = 0;
DWT->CPICNT = 0;
DWT->EXCCNT = 0;
DWT->SLEEPCNT = 0;
DWT->LSUCNT = 0;
DWT->FOLDCNT = 0;
}
void ns_init_perf_profiler(void) {
DWT->CTRL = 0;
ns_reset_perf_counters();
}
void ns_start_perf_profiler(void) {
am_hal_itm_enable();
// DWT->CTRL = 1;
DWT->CTRL = _VAL2FLD(DWT_CTRL_CYCCNTENA, 1) | _VAL2FLD(DWT_CTRL_CPIEVTENA, 1) |
_VAL2FLD(DWT_CTRL_EXCEVTENA, 1) | _VAL2FLD(DWT_CTRL_SLEEPEVTENA, 1) |
_VAL2FLD(DWT_CTRL_LSUEVTENA, 1) | _VAL2FLD(DWT_CTRL_FOLDEVTENA, 1) |
_VAL2FLD(DWT_CTRL_CYCEVTENA, 1);
}
void ns_stop_perf_profiler(void) { DWT->CTRL = 0; }
void ns_capture_perf_profiler(ns_perf_counters_t *c) {
c->cyccnt = DWT->CYCCNT;
c->cpicnt = DWT->CPICNT;
c->exccnt = DWT->EXCCNT;
c->sleepcnt = DWT->SLEEPCNT;
c->lsucnt = DWT->LSUCNT;
c->foldcnt = DWT->FOLDCNT;
}
static uint32_t ns_delta_byte_counter_wrap(uint32_t e, uint32_t s) {
uint32_t retval = (e < s) ? (e + 256 - s) : e - s;
return retval;
}
void ns_delta_perf(ns_perf_counters_t *s, ns_perf_counters_t *e, ns_perf_counters_t *d) {
d->cyccnt = e->cyccnt - s->cyccnt; // 32 bits, probably won't wrap
d->cpicnt = ns_delta_byte_counter_wrap(e->cpicnt, s->cpicnt);
d->exccnt = ns_delta_byte_counter_wrap(e->exccnt, s->exccnt);
d->sleepcnt = ns_delta_byte_counter_wrap(e->sleepcnt, s->sleepcnt);
d->lsucnt = ns_delta_byte_counter_wrap(e->lsucnt, s->lsucnt);
d->foldcnt = ns_delta_byte_counter_wrap(e->foldcnt, s->foldcnt);
}
void ns_print_perf_profile(ns_perf_counters_t *c) {
uint32_t instruction_count;
instruction_count = c->cyccnt - c->cpicnt - c->exccnt - c->sleepcnt - c->lsucnt + c->foldcnt;
ns_lp_printf("Summary: %d cycles, %d instructions\n", c->cyccnt, instruction_count);
ns_lp_printf("Details\n");
ns_lp_printf("Cycle Count: %d\n", c->cyccnt);
ns_lp_printf("CPI Count: %d\n", c->cpicnt);
ns_lp_printf("Exception Entry/Exits Count: %d\n", c->exccnt);
ns_lp_printf("Sleep Cycles Count: %d\n", c->sleepcnt);
ns_lp_printf("Load/Store Wait Count: %d\n", c->lsucnt);
ns_lp_printf("Folded (cycles saved by zero-cycle instructions) Count: %d\n", c->foldcnt);
}
// #endif
#ifdef AM_PART_APOLLO5B
#define NS_DCU_SWO ( \
AM_HAL_DCU_CPUTRC_DWT_SWO | AM_HAL_DCU_CPUDBG_NON_INVASIVE | \
AM_HAL_DCU_CPUDBG_S_NON_INVASIVE | AM_HAL_DCU_CPUTRC_PERFCNT | \
AM_HAL_DCU_SWD | AM_HAL_DCU_TRACE )
uint32_t ns_perf_enable_pcsamp(void) {
uint32_t ui32Status = AM_HAL_STATUS_SUCCESS;
// Assumes ITM printing is already enabled, so all this does is moodify ITM and DWT config
// Disable ITM and wait for it to be disabled
am_hal_itm_tpiu_pipeline_flush();
ITM->TCR &= ~ITM_TCR_SWOENA_Msk;
ITM->TCR &= ~ITM_TCR_ITMENA_Msk;
ui32Status = am_hal_delay_us_status_change(1000,
(uint32_t)&ITM->TCR,
(ITM_TCR_ITMENA_Msk & ITM_TCR_BUSY_Msk),
0 );
// OK, safe to modify ITM and DWT registers
DWT->CTRL =
_VAL2FLD(DWT_CTRL_PCSAMPLENA, 1) |
_VAL2FLD(DWT_CTRL_CYCTAP, 1) |
_VAL2FLD(DWT_CTRL_CYCCNTENA, 1) |
_VAL2FLD(DWT_CTRL_SYNCTAP, 1) |
_VAL2FLD(DWT_CTRL_POSTINIT, 1) |
_VAL2FLD(DWT_CTRL_POSTPRESET, 3);
ITM->TCR =
_VAL2FLD(ITM_TCR_TRACEBUSID, 0) | // dont change
_VAL2FLD(ITM_TCR_GTSFREQ, 3) | // Doesn't seem to matter
_VAL2FLD(ITM_TCR_TSPRESCALE, 3) | // Doesn't seem to matter
_VAL2FLD(ITM_TCR_STALLENA, 0) |
_VAL2FLD(ITM_TCR_SWOENA, 1) |
_VAL2FLD(ITM_TCR_DWTENA, 1) | // Bit 3, which is TXENA in Arm documents
_VAL2FLD(ITM_TCR_SYNCENA, 1) |
_VAL2FLD(ITM_TCR_TSENA, 0) |
_VAL2FLD(ITM_TCR_ITMENA, 1);
return ui32Status;
}
static uint32_t ns_dwt_itm_enable(void)
{
uint32_t ui32SWOscaler;
uint32_t ui32Status = AM_HAL_STATUS_SUCCESS;
am_hal_debug_enable();
//
// Compute SWOscaler so that the TPIU can be configured.
//
ui32SWOscaler = ( (AM_HAL_CLKGEN_FREQ_MAX_HZ / 2) /
AM_HAL_TPIU_BAUD_DEFAULT ) - 1;
am_hal_tpiu_config(MCUCTRL_DBGCTRL_DBGTPIUCLKSEL_HFRC_48MHz,
0, // FFCR = Disable continuous formatting (EnFCont)
TPI_CSPSR_CWIDTH_1BIT, // CSPSR = TPI_CSPSR_CWIDTH_1BIT
TPI_SPPR_TXMODE_UART, // PinProtocol = TPI_SPPR_TXMODE_UART
// 1, // PinProtocol = TPI_SPPR_TXMODE_UART Doesn't seem to work (Manchester)
ui32SWOscaler);
//
// Set the enable bits in the ITM Trace Privilege Register and the
// ITM Trace Enable Register to enable trace data output.
//
// ITM->TPR = 0;
ITM->TPR = 0xFFFFFFFF; // Doesn't seem to make a difference.
ITM->TER = 0xFFFFFFFF;
//
// Write the fields in the ITM Trace Control Register.
//
ITM->TCR = 0; // Disable the ITM before configuring it
ns_delay_us(100000);
ITM->TCR =
_VAL2FLD(ITM_TCR_TRACEBUSID, 0) | // dont change
_VAL2FLD(ITM_TCR_GTSFREQ, 3) | // Doesn't seem to matter
_VAL2FLD(ITM_TCR_TSPRESCALE, 3) | // Doesn't seem to matter
_VAL2FLD(ITM_TCR_STALLENA, 0) |
_VAL2FLD(ITM_TCR_SWOENA, 1) |
_VAL2FLD(ITM_TCR_DWTENA, 1) | // Bit 3, which is TXENA in Arm documents
_VAL2FLD(ITM_TCR_SYNCENA, 1) |
_VAL2FLD(ITM_TCR_TSENA, 0) |
_VAL2FLD(ITM_TCR_ITMENA, 1);
return ui32Status;
} // ns_dwt_itm_enable()
int32_t ns_itm_pcsamp_enable(void)
{
uint32_t ui32dcuVal;
int32_t i32RetValue = 0;
bool bOffCryptoOnExit = false;
bool bOffOtpOnExit = false;
AM_CRITICAL_BEGIN;
{
if (PWRCTRL->DEVPWRSTATUS_b.PWRSTOTP == 0)
{
bOffOtpOnExit = true;
am_hal_pwrctrl_periph_enable(AM_HAL_PWRCTRL_PERIPH_OTP);
}
if (PWRCTRL->DEVPWRSTATUS_b.PWRSTCRYPTO == 0)
{
bOffCryptoOnExit = true;
am_hal_pwrctrl_periph_enable(AM_HAL_PWRCTRL_PERIPH_CRYPTO);
}
if ((PWRCTRL->DEVPWRSTATUS_b.PWRSTCRYPTO == 1) && (CRYPTO->HOSTCCISIDLE_b.HOSTCCISIDLE == 1))
{
am_hal_dcu_get(&ui32dcuVal);
if ( ((ui32dcuVal & NS_DCU_SWO) != NS_DCU_SWO) &&
(am_hal_dcu_update(true, NS_DCU_SWO) != AM_HAL_STATUS_SUCCESS) )
{
i32RetValue = -1;
}
}
else
{
i32RetValue = -1;
}
}
if (bOffCryptoOnExit == true)
{
am_hal_pwrctrl_periph_disable(AM_HAL_PWRCTRL_PERIPH_CRYPTO);
}
if (bOffOtpOnExit == true)
{
am_hal_pwrctrl_periph_disable(AM_HAL_PWRCTRL_PERIPH_OTP);
}
AM_CRITICAL_END;
if (i32RetValue != 0)
{
return i32RetValue;
}
if ( am_hal_tpiu_enable(AM_HAL_TPIU_BAUD_1M) != AM_HAL_STATUS_SUCCESS )
{
while(1);
}
if ( ns_dwt_itm_enable() != AM_HAL_STATUS_SUCCESS )
{
while(1);
}
if ( am_hal_gpio_pinconfig(AM_BSP_GPIO_ITM_SWO, g_AM_BSP_GPIO_ITM_SWO) )
{
while (1);
}
return i32RetValue;
} // ns_itm_pcsamp_enable()
#endif // AM_PART_APOLLO5B