diff --git a/device/device.c b/device/device.c index 0eda15e..89924e8 100644 --- a/device/device.c +++ b/device/device.c @@ -151,10 +151,11 @@ void device_destroy(struct cen64_device *device, const char *cart_path) { uint32_t i; for (i = 0; i < 8 * 1024 * 1024; i++) { - uint64_t sample = get_profile_sample(device->vr4300, i); - if (sample < 10) + const uint64_t sample = get_profile_sample(device->vr4300, i); + const uint64_t l1d_sample = get_profile_sample(device->vr4300, i + (8 * 1024 * 1024)); + if (sample < 10 && l1d_sample < 10) continue; - fprintf(f, "%x %lu\n", i + 0x80000000, sample); + fprintf(f, "%x %lu %lu\n", i + 0x80000000, sample, l1d_sample); } fclose(f); diff --git a/util/cen64-profile2callgrind.cpp b/util/cen64-profile2callgrind.cpp index 601d78f..d078b73 100644 --- a/util/cen64-profile2callgrind.cpp +++ b/util/cen64-profile2callgrind.cpp @@ -130,11 +130,11 @@ int main(int argc, char **argv) { fprintf(out, "# callgrind format\n"); fprintf(out, "cmd: %s\n", argv[2]); - fprintf(out, "events: instructions\n\n"); + fprintf(out, "events: instructions l1d-misses\n\n"); fprintf(out, "ob=%s\n\n", argv[2]); - uint64_t summary = 0; + uint64_t summary = 0, l1d_summary = 0; // We have a list of functions, now turn the samples into C line info f = fopen(argv[1], "r"); @@ -142,11 +142,12 @@ int main(int argc, char **argv) { while (fgets(buf, PATH_MAX, f)) { uint32_t addr; - uint64_t num; - if (sscanf(buf, "%x %lu", &addr, &num) != 2) + uint64_t num, l1d_num; + if (sscanf(buf, "%x %lu %lu", &addr, &num, &l1d_num) != 3) die("Malformed profile file\n"); summary += num; + l1d_summary += l1d_num; found = false; bfd_map_over_sections(bin, findsym, &addr); @@ -161,20 +162,20 @@ int main(int argc, char **argv) { } fprintf(out, "fn=%s\n", funcname); - fprintf(out, "%u %lu\n\n", lineno, num); + fprintf(out, "%u %lu %lu\n\n", lineno, num, l1d_num); } else { fprintf(out, "fl=??\n"); map::const_iterator it = funcs.lower_bound(addr); it--; fprintf(out, "fn=%s\n", it->second.c_str()); - fprintf(out, "0 %lu\n\n", num); + fprintf(out, "0 %lu %lu\n\n", num, l1d_num); } } bfd_close(bin); - fprintf(out, "totals: %lu\n", summary); + fprintf(out, "totals: %lu %lu\n", summary, l1d_summary); fclose(f); fclose(out); diff --git a/vr4300/cpu.c b/vr4300/cpu.c index 019fb59..81a269a 100644 --- a/vr4300/cpu.c +++ b/vr4300/cpu.c @@ -66,7 +66,7 @@ int vr4300_init(struct vr4300 *vr4300, struct bus_controller *bus, bool profilin vr4300->mi_regs[MI_INIT_MODE_REG] = 0x80; if (profiling) - vr4300->profile_samples = calloc(8 * 1024 * 1024, sizeof(uint64_t)); + vr4300->profile_samples = calloc(2 * 8 * 1024 * 1024, sizeof(uint64_t)); else vr4300->profile_samples = NULL; diff --git a/vr4300/pipeline.c b/vr4300/pipeline.c index 368f9e1..e085485 100644 --- a/vr4300/pipeline.c +++ b/vr4300/pipeline.c @@ -298,6 +298,12 @@ static int vr4300_dc_stage(struct vr4300 *vr4300) { request->paddr = paddr; exdc_latch->cached = cached; + if (vr4300->profile_samples) { + uint32_t idx = exdc_latch->common.pc - 0x80000000; + idx &= (8 * 1024 * 1024) - 1; + vr4300->profile_samples[idx + (8 * 1024 * 1024)]++; + } + // Miss: stall for one cycle, then move to the DCM phase. vr4300->pipeline.cycles_to_stall = 0; vr4300->regs[PIPELINE_CYCLE_TYPE] = 6; @@ -316,6 +322,13 @@ static int vr4300_dc_stage(struct vr4300 *vr4300) { request->paddr = paddr; exdc_latch->cached = cached; + + if (vr4300->profile_samples) { + uint32_t idx = exdc_latch->common.pc - 0x80000000; + idx &= (8 * 1024 * 1024) - 1; + vr4300->profile_samples[idx + (8 * 1024 * 1024)]++; + } + VR4300_DCM(vr4300); return 1; }