File ns_pmu_accumulator.c
File List > apollo330 > ns_pmu_accumulator.c
Go to the documentation of this file
#include "ns_pmu_utils.h" /* ns_pmu_event_create, _init, … */
#include "ns_pmu_map.h"
#include <string.h>
#include "ns_pmu_accumulator.h"
#ifndef EVENTS_PER_RUN
#define EVENTS_PER_RUN 4 /* Apollo5B can sample four 32‑bit counters */
#endif
typedef struct {
const char *tag;
uint16_t row;
} tag_map_t;
/* One control block per active matrix ----------------------------------- */
typedef struct {
bool in_use;
bool complete;
uint16_t ops, events;
uint16_t next_event; /* next PMU event index to sample */
uint16_t next_row; /* next unused row id */
uint32_t *sum; /* caller‑supplied [ops*events] */
tag_map_t tag_map[NS_PMU_MAX_OPS];
ns_pmu_config_t cfg; /* scratch per‑run PMU setup */
} acc_t;
static acc_t g_acc[NS_PMU_MAX_ACTIVE_MATRICES] = {0};
/* ---------- internal helpers ------------------------------------------- */
static acc_t *handle_to_acc(ns_pmu_accm_t h)
{
return (h.id < NS_PMU_MAX_ACTIVE_MATRICES && g_acc[h.id].in_use)
? &g_acc[h.id] : NULL;
}
static void cfg_select_events(acc_t *a)
{
a->cfg.api = &ns_pmu_V1_0_0;
ns_pmu_reset_config(&a->cfg);
for (uint8_t i = 0; i < EVENTS_PER_RUN; ++i) {
uint16_t ev = a->next_event + i;
if (ev >= a->events) break;
ns_pmu_event_create(&a->cfg.events[i],
ns_pmu_map[ev].eventId,
NS_PMU_EVENT_COUNTER_SIZE_32);
}
ns_pmu_init(&a->cfg);
}
/* ---------- API implementation ----------------------------------------- */
ns_pmu_accm_t ns_pmu_accm_create(uint16_t ops,
uint16_t events,
void *backing_buf)
{
ns_pmu_accm_t h = { .id = 0xFF };
if (!backing_buf || ops == 0 || ops > NS_PMU_MAX_OPS ||
events == 0 || events > NS_PMU_MAP_SIZE)
return h;
/* Find free slot */
uint8_t idx;
for (idx = 0; idx < NS_PMU_MAX_ACTIVE_MATRICES; ++idx)
if (!g_acc[idx].in_use) break;
if (idx == NS_PMU_MAX_ACTIVE_MATRICES) return h;
acc_t *a = &g_acc[idx];
memset(a, 0, sizeof(*a));
a->in_use = true;
a->ops = ops;
a->events = events;
a->sum = (uint32_t *)backing_buf;
a->next_event= 0;
memset(a->sum, 0, NS_PMU_MATRIX_BYTES(ops,events));
for (uint16_t i = 0; i < NS_PMU_MAX_OPS; ++i) a->tag_map[i].tag = NULL;
cfg_select_events(a); /* arm PMU for first run */
h.id = idx;
return h;
}
void ns_pmu_accm_destroy(ns_pmu_accm_t h)
{
acc_t *a = handle_to_acc(h);
if (a) memset(a, 0, sizeof(*a));
}
void ns_pmu_accm_inference_begin(ns_pmu_accm_t h)
{
/* Nothing to do – cfg is already set by create() or previous end() */
(void)h;
}
void ns_pmu_accm_inference_end(ns_pmu_accm_t h)
{
acc_t *a = handle_to_acc(h);
if (!a || a->complete) return;
a->next_event += EVENTS_PER_RUN;
if (a->next_event >= a->events) {
a->complete = true;
return;
}
cfg_select_events(a); /* prepare for next run */
}
void ns_pmu_accm_op_begin(ns_pmu_accm_t h, uint16_t op_idx)
{
acc_t *a = handle_to_acc(h);
if (!a || op_idx >= a->ops) return;
ns_pmu_reset_counters();
(void)a;
}
void ns_pmu_accm_op_end(ns_pmu_accm_t h, uint16_t op_idx)
{
acc_t *a = handle_to_acc(h);
if (!a || op_idx >= a->ops) return;
ns_pmu_get_counters(&a->cfg);
for (uint8_t i = 0; i < EVENTS_PER_RUN; ++i) {
uint16_t ev = a->next_event + i;
if (ev >= a->events) break;
uint32_t *cell = &a->sum[op_idx * a->events + ev];
*cell += a->cfg.counter[i].counterValue;
}
}
/* ------ Tag resolution -------------------------------------------------- */
static uint16_t tag_lookup(acc_t *a, const char *tag, bool create_if_missing)
{
/* pointer equality first (fast path) */
for (uint16_t i = 0; i < a->next_row; ++i)
if (a->tag_map[i].tag == tag) return a->tag_map[i].row;
/* fall back to strcmp in case compiler duplicated string literals */
for (uint16_t i = 0; i < a->next_row; ++i)
if (strcmp(a->tag_map[i].tag, tag) == 0) return a->tag_map[i].row;
if (!create_if_missing || a->next_row >= a->ops) return UINT16_MAX;
/* new tag */
uint16_t row = a->next_row++;
a->tag_map[row].tag = tag;
a->tag_map[row].row = row;
return row;
}
uint16_t ns_pmu_accm_resolve_tag(ns_pmu_accm_t h, const char *tag)
{
acc_t *a = handle_to_acc(h);
if (!a || !tag) return UINT16_MAX;
return tag_lookup(a, tag, true);
}
uint16_t ns_pmu_accm_find_tag(ns_pmu_accm_t h, const char *tag)
{
acc_t *a = handle_to_acc(h);
if (!a || !tag) return UINT16_MAX;
return tag_lookup(a, tag, false);
}
/* ------ Query ----------------------------------------------------------- */
bool ns_pmu_accm_complete(ns_pmu_accm_t h)
{
acc_t *a = handle_to_acc(h);
return a ? a->complete : false;
}
void ns_pmu_accm_get(ns_pmu_accm_t h, uint32_t **matrix)
{
acc_t *a = handle_to_acc(h);
if (a && matrix) *matrix = a->sum;
}
void ns_pmu_accmprint_matrix(ns_pmu_accm_t h, uint32_t *matrix, uint16_t ops, uint16_t events)
{
acc_t *a = handle_to_acc(h);
// Print the header with the event names
ns_lp_printf("Op: ");
for (uint16_t j = 0; j < events; ++j) {
ns_lp_printf("%s ", ns_pmu_map[j].regname);
}
ns_lp_printf("\n");
if (a && matrix) {
for (uint16_t i = 0; i < ops; ++i) {
ns_lp_printf("%d, ", i);
for (uint16_t j = 0; j < events; ++j) {
ns_lp_printf("%d, ", matrix[i * events + j]);
}
ns_lp_printf("\n");
}
}
}
void ns_pmu_accm_get_layer(ns_pmu_accm_t h,
uint16_t layer,
uint32_t* out,
uint16_t caller_capacity)
{
acc_t *a = handle_to_acc(h);
if (!a || !out || layer >= a->ops) return;
uint16_t n = (caller_capacity < a->events) ? caller_capacity : a->events;
memcpy(out, &a->sum[layer * a->events], n * sizeof(uint32_t));
}