mirror of
https://github.com/fail0verflow/switch-linux.git
synced 2025-05-04 02:34:21 -04:00
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Bigger kernel side changes: - Add backwards writing capability to the perf ring-buffer code, which is preparation for future advanced features like robust 'overwrite support' and snapshot mode. (Wang Nan) - Add pause and resume ioctls for the perf ringbuffer (Wang Nan) - x86 Intel cstate code cleanups and reorgnization (Thomas Gleixner) - x86 Intel uncore and CPU PMU driver updates (Kan Liang, Peter Zijlstra) - x86 AUX (Intel PT) related enhancements and updates (Alexander Shishkin) - x86 MSR PMU driver enhancements and updates (Huang Rui) - ... and lots of other changes spread out over 40+ commits. Biggest tooling side changes: - 'perf trace' features and enhancements. (Arnaldo Carvalho de Melo) - BPF tooling updates (Wang Nan) - 'perf sched' updates (Jiri Olsa) - 'perf probe' updates (Masami Hiramatsu) - ... plus 200+ other enhancements, fixes and cleanups to tools/ The merge commits, the shortlog and the changelogs contain a lot more details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (249 commits) perf/core: Disable the event on a truncated AUX record perf/x86/intel/pt: Generate PMI in the STOP region as well perf buildid-cache: Use lsdir() for looking up buildid caches perf symbols: Use lsdir() for the search in kcore cache directory perf tools: Use SBUILD_ID_SIZE where applicable perf tools: Fix lsdir to set errno correctly perf trace: Move seccomp args beautifiers to tools/perf/trace/beauty/ perf trace: Move flock op beautifier to tools/perf/trace/beauty/ perf build: Add build-test for debug-frame on arm/arm64 perf build: Add build-test for libunwind cross-platforms support perf script: Fix export of callchains with recursion in db-export perf script: Fix callchain addresses in db-export perf script: Fix symbol insertion behavior in db-export perf symbols: Add dso__insert_symbol function perf scripting python: Use Py_FatalError instead of die() perf tools: Remove xrealloc and ALLOC_GROW perf help: Do not use ALLOC_GROW in add_cmd_list perf pmu: Make pmu_formats_string to check return value of strbuf perf header: Make topology checkers to check return value of strbuf perf tools: Make alias handler to check return value of strbuf ...
This commit is contained in:
commit
36db171cc7
217 changed files with 8344 additions and 2747 deletions
|
@ -60,6 +60,7 @@ show up in /proc/sys/kernel:
|
||||||
- panic_on_warn
|
- panic_on_warn
|
||||||
- perf_cpu_time_max_percent
|
- perf_cpu_time_max_percent
|
||||||
- perf_event_paranoid
|
- perf_event_paranoid
|
||||||
|
- perf_event_max_stack
|
||||||
- pid_max
|
- pid_max
|
||||||
- powersave-nap [ PPC only ]
|
- powersave-nap [ PPC only ]
|
||||||
- printk
|
- printk
|
||||||
|
@ -654,6 +655,19 @@ users (without CAP_SYS_ADMIN). The default value is 2.
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
|
perf_event_max_stack:
|
||||||
|
|
||||||
|
Controls maximum number of stack frames to copy for (attr.sample_type &
|
||||||
|
PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
|
||||||
|
'perf record -g' or 'perf trace --call-graph fp'.
|
||||||
|
|
||||||
|
This can only be done when no events are in use that have callchains
|
||||||
|
enabled, otherwise writing to this file will return -EBUSY.
|
||||||
|
|
||||||
|
The default value is 127.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
pid_max:
|
pid_max:
|
||||||
|
|
||||||
PID allocation wrap value. When the kernel's next PID value
|
PID allocation wrap value. When the kernel's next PID value
|
||||||
|
|
|
@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
|
||||||
info->address &= ~alignment_mask;
|
info->address &= ~alignment_mask;
|
||||||
info->ctrl.len <<= offset;
|
info->ctrl.len <<= offset;
|
||||||
|
|
||||||
if (!bp->overflow_handler) {
|
if (is_default_overflow_handler(bp)) {
|
||||||
/*
|
/*
|
||||||
* Mismatch breakpoints are required for single-stepping
|
* Mismatch breakpoints are required for single-stepping
|
||||||
* breakpoints.
|
* breakpoints.
|
||||||
|
@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
|
||||||
* mismatch breakpoint so we can single-step over the
|
* mismatch breakpoint so we can single-step over the
|
||||||
* watchpoint trigger.
|
* watchpoint trigger.
|
||||||
*/
|
*/
|
||||||
if (!wp->overflow_handler)
|
if (is_default_overflow_handler(wp))
|
||||||
enable_single_step(wp, instruction_pointer(regs));
|
enable_single_step(wp, instruction_pointer(regs));
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
|
|
|
@ -75,7 +75,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||||
|
|
||||||
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
|
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
|
||||||
|
|
||||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
|
while ((entry->nr < sysctl_perf_event_max_stack) &&
|
||||||
tail && !((unsigned long)tail & 0x3))
|
tail && !((unsigned long)tail & 0x3))
|
||||||
tail = user_backtrace(tail, entry);
|
tail = user_backtrace(tail, entry);
|
||||||
}
|
}
|
||||||
|
|
|
@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr,
|
||||||
perf_bp_event(bp, regs);
|
perf_bp_event(bp, regs);
|
||||||
|
|
||||||
/* Do we need to handle the stepping? */
|
/* Do we need to handle the stepping? */
|
||||||
if (!bp->overflow_handler)
|
if (is_default_overflow_handler(bp))
|
||||||
step = 1;
|
step = 1;
|
||||||
unlock:
|
unlock:
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
|
||||||
perf_bp_event(wp, regs);
|
perf_bp_event(wp, regs);
|
||||||
|
|
||||||
/* Do we need to handle the stepping? */
|
/* Do we need to handle the stepping? */
|
||||||
if (!wp->overflow_handler)
|
if (is_default_overflow_handler(wp))
|
||||||
step = 1;
|
step = 1;
|
||||||
|
|
||||||
unlock:
|
unlock:
|
||||||
|
|
|
@ -122,7 +122,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
|
||||||
|
|
||||||
tail = (struct frame_tail __user *)regs->regs[29];
|
tail = (struct frame_tail __user *)regs->regs[29];
|
||||||
|
|
||||||
while (entry->nr < PERF_MAX_STACK_DEPTH &&
|
while (entry->nr < sysctl_perf_event_max_stack &&
|
||||||
tail && !((unsigned long)tail & 0xf))
|
tail && !((unsigned long)tail & 0xf))
|
||||||
tail = user_backtrace(tail, entry);
|
tail = user_backtrace(tail, entry);
|
||||||
} else {
|
} else {
|
||||||
|
@ -132,7 +132,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
|
||||||
|
|
||||||
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
|
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
|
||||||
|
|
||||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
|
while ((entry->nr < sysctl_perf_event_max_stack) &&
|
||||||
tail && !((unsigned long)tail & 0x3))
|
tail && !((unsigned long)tail & 0x3))
|
||||||
tail = compat_user_backtrace(tail, entry);
|
tail = compat_user_backtrace(tail, entry);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -65,7 +65,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||||
|
|
||||||
--frame;
|
--frame;
|
||||||
|
|
||||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
|
while ((entry->nr < sysctl_perf_event_max_stack) && frame)
|
||||||
frame = user_backtrace(frame, entry);
|
frame = user_backtrace(frame, entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
|
||||||
addr = *sp++;
|
addr = *sp++;
|
||||||
if (__kernel_text_address(addr)) {
|
if (__kernel_text_address(addr)) {
|
||||||
perf_callchain_store(entry, addr);
|
perf_callchain_store(entry, addr);
|
||||||
if (entry->nr >= PERF_MAX_STACK_DEPTH)
|
if (entry->nr >= sysctl_perf_event_max_stack)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
|
||||||
}
|
}
|
||||||
do {
|
do {
|
||||||
perf_callchain_store(entry, pc);
|
perf_callchain_store(entry, pc);
|
||||||
if (entry->nr >= PERF_MAX_STACK_DEPTH)
|
if (entry->nr >= sysctl_perf_event_max_stack)
|
||||||
break;
|
break;
|
||||||
pc = unwind_stack(current, &sp, pc, &ra);
|
pc = unwind_stack(current, &sp, pc, &ra);
|
||||||
} while (pc);
|
} while (pc);
|
||||||
|
|
|
@ -247,7 +247,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
|
||||||
sp = regs->gpr[1];
|
sp = regs->gpr[1];
|
||||||
perf_callchain_store(entry, next_ip);
|
perf_callchain_store(entry, next_ip);
|
||||||
|
|
||||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
while (entry->nr < sysctl_perf_event_max_stack) {
|
||||||
fp = (unsigned long __user *) sp;
|
fp = (unsigned long __user *) sp;
|
||||||
if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
|
if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
|
||||||
return;
|
return;
|
||||||
|
@ -453,7 +453,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
|
||||||
sp = regs->gpr[1];
|
sp = regs->gpr[1];
|
||||||
perf_callchain_store(entry, next_ip);
|
perf_callchain_store(entry, next_ip);
|
||||||
|
|
||||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
while (entry->nr < sysctl_perf_event_max_stack) {
|
||||||
fp = (unsigned int __user *) (unsigned long) sp;
|
fp = (unsigned int __user *) (unsigned long) sp;
|
||||||
if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
|
if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -1756,7 +1756,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
} while (entry->nr < PERF_MAX_STACK_DEPTH);
|
} while (entry->nr < sysctl_perf_event_max_stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
|
@ -1790,7 +1790,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
|
||||||
pc = sf.callers_pc;
|
pc = sf.callers_pc;
|
||||||
ufp = (unsigned long)sf.fp + STACK_BIAS;
|
ufp = (unsigned long)sf.fp + STACK_BIAS;
|
||||||
perf_callchain_store(entry, pc);
|
perf_callchain_store(entry, pc);
|
||||||
} while (entry->nr < PERF_MAX_STACK_DEPTH);
|
} while (entry->nr < sysctl_perf_event_max_stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void perf_callchain_user_32(struct perf_callchain_entry *entry,
|
static void perf_callchain_user_32(struct perf_callchain_entry *entry,
|
||||||
|
@ -1822,7 +1822,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
|
||||||
ufp = (unsigned long)sf.fp;
|
ufp = (unsigned long)sf.fp;
|
||||||
}
|
}
|
||||||
perf_callchain_store(entry, pc);
|
perf_callchain_store(entry, pc);
|
||||||
} while (entry->nr < PERF_MAX_STACK_DEPTH);
|
} while (entry->nr < sysctl_perf_event_max_stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -164,10 +164,6 @@ config INSTRUCTION_DECODER
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on KPROBES || PERF_EVENTS || UPROBES
|
depends on KPROBES || PERF_EVENTS || UPROBES
|
||||||
|
|
||||||
config PERF_EVENTS_INTEL_UNCORE
|
|
||||||
def_bool y
|
|
||||||
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
|
|
||||||
|
|
||||||
config OUTPUT_FORMAT
|
config OUTPUT_FORMAT
|
||||||
string
|
string
|
||||||
default "elf32-i386" if X86_32
|
default "elf32-i386" if X86_32
|
||||||
|
@ -1046,6 +1042,8 @@ config X86_THERMAL_VECTOR
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on X86_MCE_INTEL
|
depends on X86_MCE_INTEL
|
||||||
|
|
||||||
|
source "arch/x86/events/Kconfig"
|
||||||
|
|
||||||
config X86_LEGACY_VM86
|
config X86_LEGACY_VM86
|
||||||
bool "Legacy VM86 support"
|
bool "Legacy VM86 support"
|
||||||
default n
|
default n
|
||||||
|
@ -1210,15 +1208,6 @@ config MICROCODE_OLD_INTERFACE
|
||||||
def_bool y
|
def_bool y
|
||||||
depends on MICROCODE
|
depends on MICROCODE
|
||||||
|
|
||||||
config PERF_EVENTS_AMD_POWER
|
|
||||||
depends on PERF_EVENTS && CPU_SUP_AMD
|
|
||||||
tristate "AMD Processor Power Reporting Mechanism"
|
|
||||||
---help---
|
|
||||||
Provide power reporting mechanism support for AMD processors.
|
|
||||||
Currently, it leverages X86_FEATURE_ACC_POWER
|
|
||||||
(CPUID Fn8000_0007_EDX[12]) interface to calculate the
|
|
||||||
average power consumption on Family 15h processors.
|
|
||||||
|
|
||||||
config X86_MSR
|
config X86_MSR
|
||||||
tristate "/dev/cpu/*/msr - Model-specific register support"
|
tristate "/dev/cpu/*/msr - Model-specific register support"
|
||||||
---help---
|
---help---
|
||||||
|
|
36
arch/x86/events/Kconfig
Normal file
36
arch/x86/events/Kconfig
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
menu "Performance monitoring"
|
||||||
|
|
||||||
|
config PERF_EVENTS_INTEL_UNCORE
|
||||||
|
tristate "Intel uncore performance events"
|
||||||
|
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
|
||||||
|
default y
|
||||||
|
---help---
|
||||||
|
Include support for Intel uncore performance events. These are
|
||||||
|
available on NehalemEX and more modern processors.
|
||||||
|
|
||||||
|
config PERF_EVENTS_INTEL_RAPL
|
||||||
|
tristate "Intel rapl performance events"
|
||||||
|
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
|
||||||
|
default y
|
||||||
|
---help---
|
||||||
|
Include support for Intel rapl performance events for power
|
||||||
|
monitoring on modern processors.
|
||||||
|
|
||||||
|
config PERF_EVENTS_INTEL_CSTATE
|
||||||
|
tristate "Intel cstate performance events"
|
||||||
|
depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
|
||||||
|
default y
|
||||||
|
---help---
|
||||||
|
Include support for Intel cstate performance events for power
|
||||||
|
monitoring on modern processors.
|
||||||
|
|
||||||
|
config PERF_EVENTS_AMD_POWER
|
||||||
|
depends on PERF_EVENTS && CPU_SUP_AMD
|
||||||
|
tristate "AMD Processor Power Reporting Mechanism"
|
||||||
|
---help---
|
||||||
|
Provide power reporting mechanism support for AMD processors.
|
||||||
|
Currently, it leverages X86_FEATURE_ACC_POWER
|
||||||
|
(CPUID Fn8000_0007_EDX[12]) interface to calculate the
|
||||||
|
average power consumption on Family 15h processors.
|
||||||
|
|
||||||
|
endmenu
|
|
@ -6,9 +6,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o
|
||||||
ifdef CONFIG_AMD_IOMMU
|
ifdef CONFIG_AMD_IOMMU
|
||||||
obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
|
obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
|
||||||
endif
|
endif
|
||||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o
|
|
||||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o
|
obj-$(CONFIG_CPU_SUP_INTEL) += msr.o
|
||||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
|
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
|
||||||
obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o
|
|
||||||
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
|
|
||||||
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
|
|
||||||
|
|
|
@ -263,6 +263,7 @@ static const struct attribute_group *amd_uncore_attr_groups[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct pmu amd_nb_pmu = {
|
static struct pmu amd_nb_pmu = {
|
||||||
|
.task_ctx_nr = perf_invalid_context,
|
||||||
.attr_groups = amd_uncore_attr_groups,
|
.attr_groups = amd_uncore_attr_groups,
|
||||||
.name = "amd_nb",
|
.name = "amd_nb",
|
||||||
.event_init = amd_uncore_event_init,
|
.event_init = amd_uncore_event_init,
|
||||||
|
@ -274,6 +275,7 @@ static struct pmu amd_nb_pmu = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct pmu amd_l2_pmu = {
|
static struct pmu amd_l2_pmu = {
|
||||||
|
.task_ctx_nr = perf_invalid_context,
|
||||||
.attr_groups = amd_uncore_attr_groups,
|
.attr_groups = amd_uncore_attr_groups,
|
||||||
.name = "amd_l2",
|
.name = "amd_l2",
|
||||||
.event_init = amd_uncore_event_init,
|
.event_init = amd_uncore_event_init,
|
||||||
|
|
|
@ -360,6 +360,9 @@ int x86_add_exclusive(unsigned int what)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
if (x86_pmu.lbr_pt_coexist)
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
|
if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
|
||||||
mutex_lock(&pmc_reserve_mutex);
|
mutex_lock(&pmc_reserve_mutex);
|
||||||
for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
|
for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
|
||||||
|
@ -380,6 +383,9 @@ fail_unlock:
|
||||||
|
|
||||||
void x86_del_exclusive(unsigned int what)
|
void x86_del_exclusive(unsigned int what)
|
||||||
{
|
{
|
||||||
|
if (x86_pmu.lbr_pt_coexist)
|
||||||
|
return;
|
||||||
|
|
||||||
atomic_dec(&x86_pmu.lbr_exclusive[what]);
|
atomic_dec(&x86_pmu.lbr_exclusive[what]);
|
||||||
atomic_dec(&active_events);
|
atomic_dec(&active_events);
|
||||||
}
|
}
|
||||||
|
@ -2277,7 +2283,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
||||||
|
|
||||||
fp = compat_ptr(ss_base + regs->bp);
|
fp = compat_ptr(ss_base + regs->bp);
|
||||||
pagefault_disable();
|
pagefault_disable();
|
||||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
while (entry->nr < sysctl_perf_event_max_stack) {
|
||||||
unsigned long bytes;
|
unsigned long bytes;
|
||||||
frame.next_frame = 0;
|
frame.next_frame = 0;
|
||||||
frame.return_address = 0;
|
frame.return_address = 0;
|
||||||
|
@ -2337,7 +2343,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
pagefault_disable();
|
pagefault_disable();
|
||||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
while (entry->nr < sysctl_perf_event_max_stack) {
|
||||||
unsigned long bytes;
|
unsigned long bytes;
|
||||||
frame.next_frame = NULL;
|
frame.next_frame = NULL;
|
||||||
frame.return_address = 0;
|
frame.return_address = 0;
|
||||||
|
|
9
arch/x86/events/intel/Makefile
Normal file
9
arch/x86/events/intel/Makefile
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o
|
||||||
|
obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
|
||||||
|
obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
|
||||||
|
obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o
|
||||||
|
intel-rapl-objs := rapl.o
|
||||||
|
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
|
||||||
|
intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
|
||||||
|
obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
|
||||||
|
intel-cstate-objs := cstate.o
|
|
@ -171,18 +171,6 @@ static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
|
||||||
memset(page_address(phys->page) + index, 0, phys->size - index);
|
memset(page_address(phys->page) + index, 0, phys->size - index);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
|
|
||||||
{
|
|
||||||
if (buf->snapshot)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (local_read(&buf->data_size) >= bts->handle.size ||
|
|
||||||
bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void bts_update(struct bts_ctx *bts)
|
static void bts_update(struct bts_ctx *bts)
|
||||||
{
|
{
|
||||||
int cpu = raw_smp_processor_id();
|
int cpu = raw_smp_processor_id();
|
||||||
|
@ -213,18 +201,15 @@ static void bts_update(struct bts_ctx *bts)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
|
||||||
|
|
||||||
static void __bts_event_start(struct perf_event *event)
|
static void __bts_event_start(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
||||||
u64 config = 0;
|
u64 config = 0;
|
||||||
|
|
||||||
if (!buf || bts_buffer_is_full(buf, bts))
|
|
||||||
return;
|
|
||||||
|
|
||||||
event->hw.itrace_started = 1;
|
|
||||||
event->hw.state = 0;
|
|
||||||
|
|
||||||
if (!buf->snapshot)
|
if (!buf->snapshot)
|
||||||
config |= ARCH_PERFMON_EVENTSEL_INT;
|
config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||||
if (!event->attr.exclude_kernel)
|
if (!event->attr.exclude_kernel)
|
||||||
|
@ -241,16 +226,41 @@ static void __bts_event_start(struct perf_event *event)
|
||||||
wmb();
|
wmb();
|
||||||
|
|
||||||
intel_pmu_enable_bts(config);
|
intel_pmu_enable_bts(config);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bts_event_start(struct perf_event *event, int flags)
|
static void bts_event_start(struct perf_event *event, int flags)
|
||||||
{
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||||
|
struct bts_buffer *buf;
|
||||||
|
|
||||||
|
buf = perf_aux_output_begin(&bts->handle, event);
|
||||||
|
if (!buf)
|
||||||
|
goto fail_stop;
|
||||||
|
|
||||||
|
if (bts_buffer_reset(buf, &bts->handle))
|
||||||
|
goto fail_end_stop;
|
||||||
|
|
||||||
|
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
|
||||||
|
bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
|
||||||
|
bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
|
||||||
|
|
||||||
|
event->hw.itrace_started = 1;
|
||||||
|
event->hw.state = 0;
|
||||||
|
|
||||||
__bts_event_start(event);
|
__bts_event_start(event);
|
||||||
|
|
||||||
/* PMI handler: this counter is running and likely generating PMIs */
|
/* PMI handler: this counter is running and likely generating PMIs */
|
||||||
ACCESS_ONCE(bts->started) = 1;
|
ACCESS_ONCE(bts->started) = 1;
|
||||||
|
|
||||||
|
return;
|
||||||
|
|
||||||
|
fail_end_stop:
|
||||||
|
perf_aux_output_end(&bts->handle, 0, false);
|
||||||
|
|
||||||
|
fail_stop:
|
||||||
|
event->hw.state = PERF_HES_STOPPED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __bts_event_stop(struct perf_event *event)
|
static void __bts_event_stop(struct perf_event *event)
|
||||||
|
@ -269,15 +279,32 @@ static void __bts_event_stop(struct perf_event *event)
|
||||||
|
|
||||||
static void bts_event_stop(struct perf_event *event, int flags)
|
static void bts_event_stop(struct perf_event *event, int flags)
|
||||||
{
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||||
|
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
||||||
|
|
||||||
/* PMI handler: don't restart this counter */
|
/* PMI handler: don't restart this counter */
|
||||||
ACCESS_ONCE(bts->started) = 0;
|
ACCESS_ONCE(bts->started) = 0;
|
||||||
|
|
||||||
__bts_event_stop(event);
|
__bts_event_stop(event);
|
||||||
|
|
||||||
if (flags & PERF_EF_UPDATE)
|
if (flags & PERF_EF_UPDATE) {
|
||||||
bts_update(bts);
|
bts_update(bts);
|
||||||
|
|
||||||
|
if (buf) {
|
||||||
|
if (buf->snapshot)
|
||||||
|
bts->handle.head =
|
||||||
|
local_xchg(&buf->data_size,
|
||||||
|
buf->nr_pages << PAGE_SHIFT);
|
||||||
|
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
||||||
|
!!local_xchg(&buf->lost, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
||||||
|
cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
|
||||||
|
cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
|
||||||
|
cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void intel_bts_enable_local(void)
|
void intel_bts_enable_local(void)
|
||||||
|
@ -417,34 +444,14 @@ int intel_bts_interrupt(void)
|
||||||
|
|
||||||
static void bts_event_del(struct perf_event *event, int mode)
|
static void bts_event_del(struct perf_event *event, int mode)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
||||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
|
||||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
|
||||||
|
|
||||||
bts_event_stop(event, PERF_EF_UPDATE);
|
bts_event_stop(event, PERF_EF_UPDATE);
|
||||||
|
|
||||||
if (buf) {
|
|
||||||
if (buf->snapshot)
|
|
||||||
bts->handle.head =
|
|
||||||
local_xchg(&buf->data_size,
|
|
||||||
buf->nr_pages << PAGE_SHIFT);
|
|
||||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
|
|
||||||
!!local_xchg(&buf->lost, 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
|
||||||
cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
|
|
||||||
cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
|
|
||||||
cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bts_event_add(struct perf_event *event, int mode)
|
static int bts_event_add(struct perf_event *event, int mode)
|
||||||
{
|
{
|
||||||
struct bts_buffer *buf;
|
|
||||||
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
int ret = -EBUSY;
|
|
||||||
|
|
||||||
event->hw.state = PERF_HES_STOPPED;
|
event->hw.state = PERF_HES_STOPPED;
|
||||||
|
|
||||||
|
@ -454,26 +461,10 @@ static int bts_event_add(struct perf_event *event, int mode)
|
||||||
if (bts->handle.event)
|
if (bts->handle.event)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
buf = perf_aux_output_begin(&bts->handle, event);
|
|
||||||
if (!buf)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
ret = bts_buffer_reset(buf, &bts->handle);
|
|
||||||
if (ret) {
|
|
||||||
perf_aux_output_end(&bts->handle, 0, false);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
|
|
||||||
bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
|
|
||||||
bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
|
|
||||||
|
|
||||||
if (mode & PERF_EF_START) {
|
if (mode & PERF_EF_START) {
|
||||||
bts_event_start(event, 0);
|
bts_event_start(event, 0);
|
||||||
if (hwc->state & PERF_HES_STOPPED) {
|
if (hwc->state & PERF_HES_STOPPED)
|
||||||
bts_event_del(event, 0);
|
return -EINVAL;
|
||||||
return -EBUSY;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
|
||||||
|
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
||||||
|
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
|
||||||
|
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
|
||||||
|
EVENT_EXTRA_END
|
||||||
|
};
|
||||||
|
|
||||||
|
#define GLM_DEMAND_DATA_RD BIT_ULL(0)
|
||||||
|
#define GLM_DEMAND_RFO BIT_ULL(1)
|
||||||
|
#define GLM_ANY_RESPONSE BIT_ULL(16)
|
||||||
|
#define GLM_SNP_NONE_OR_MISS BIT_ULL(33)
|
||||||
|
#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD
|
||||||
|
#define GLM_DEMAND_WRITE GLM_DEMAND_RFO
|
||||||
|
#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
|
||||||
|
#define GLM_LLC_ACCESS GLM_ANY_RESPONSE
|
||||||
|
#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
|
||||||
|
#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM)
|
||||||
|
|
||||||
|
static __initconst const u64 glm_hw_cache_event_ids
|
||||||
|
[PERF_COUNT_HW_CACHE_MAX]
|
||||||
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||||
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||||
|
[C(L1D)] = {
|
||||||
|
[C(OP_READ)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
|
||||||
|
[C(RESULT_MISS)] = 0x0,
|
||||||
|
},
|
||||||
|
[C(OP_WRITE)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
|
||||||
|
[C(RESULT_MISS)] = 0x0,
|
||||||
|
},
|
||||||
|
[C(OP_PREFETCH)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x0,
|
||||||
|
[C(RESULT_MISS)] = 0x0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
[C(L1I)] = {
|
||||||
|
[C(OP_READ)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
|
||||||
|
[C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
|
||||||
|
},
|
||||||
|
[C(OP_WRITE)] = {
|
||||||
|
[C(RESULT_ACCESS)] = -1,
|
||||||
|
[C(RESULT_MISS)] = -1,
|
||||||
|
},
|
||||||
|
[C(OP_PREFETCH)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x0,
|
||||||
|
[C(RESULT_MISS)] = 0x0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
[C(LL)] = {
|
||||||
|
[C(OP_READ)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||||
|
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||||
|
},
|
||||||
|
[C(OP_WRITE)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||||
|
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||||
|
},
|
||||||
|
[C(OP_PREFETCH)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||||
|
[C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
|
||||||
|
},
|
||||||
|
},
|
||||||
|
[C(DTLB)] = {
|
||||||
|
[C(OP_READ)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
|
||||||
|
[C(RESULT_MISS)] = 0x0,
|
||||||
|
},
|
||||||
|
[C(OP_WRITE)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
|
||||||
|
[C(RESULT_MISS)] = 0x0,
|
||||||
|
},
|
||||||
|
[C(OP_PREFETCH)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x0,
|
||||||
|
[C(RESULT_MISS)] = 0x0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
[C(ITLB)] = {
|
||||||
|
[C(OP_READ)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
|
||||||
|
[C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
|
||||||
|
},
|
||||||
|
[C(OP_WRITE)] = {
|
||||||
|
[C(RESULT_ACCESS)] = -1,
|
||||||
|
[C(RESULT_MISS)] = -1,
|
||||||
|
},
|
||||||
|
[C(OP_PREFETCH)] = {
|
||||||
|
[C(RESULT_ACCESS)] = -1,
|
||||||
|
[C(RESULT_MISS)] = -1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
[C(BPU)] = {
|
||||||
|
[C(OP_READ)] = {
|
||||||
|
[C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
|
||||||
|
[C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
|
||||||
|
},
|
||||||
|
[C(OP_WRITE)] = {
|
||||||
|
[C(RESULT_ACCESS)] = -1,
|
||||||
|
[C(RESULT_MISS)] = -1,
|
||||||
|
},
|
||||||
|
[C(OP_PREFETCH)] = {
|
||||||
|
[C(RESULT_ACCESS)] = -1,
|
||||||
|
[C(RESULT_MISS)] = -1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
static __initconst const u64 glm_hw_cache_extra_regs
|
||||||
|
[PERF_COUNT_HW_CACHE_MAX]
|
||||||
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||||
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||||
|
[C(LL)] = {
|
||||||
|
[C(OP_READ)] = {
|
||||||
|
[C(RESULT_ACCESS)] = GLM_DEMAND_READ|
|
||||||
|
GLM_LLC_ACCESS,
|
||||||
|
[C(RESULT_MISS)] = GLM_DEMAND_READ|
|
||||||
|
GLM_LLC_MISS,
|
||||||
|
},
|
||||||
|
[C(OP_WRITE)] = {
|
||||||
|
[C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
|
||||||
|
GLM_LLC_ACCESS,
|
||||||
|
[C(RESULT_MISS)] = GLM_DEMAND_WRITE|
|
||||||
|
GLM_LLC_MISS,
|
||||||
|
},
|
||||||
|
[C(OP_PREFETCH)] = {
|
||||||
|
[C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH|
|
||||||
|
GLM_LLC_ACCESS,
|
||||||
|
[C(RESULT_MISS)] = GLM_DEMAND_PREFETCH|
|
||||||
|
GLM_LLC_MISS,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
|
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
|
||||||
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
|
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
|
||||||
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
|
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
|
||||||
|
@ -3447,7 +3581,7 @@ __init int intel_pmu_init(void)
|
||||||
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
|
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
|
||||||
sizeof(hw_cache_extra_regs));
|
sizeof(hw_cache_extra_regs));
|
||||||
|
|
||||||
intel_pmu_lbr_init_atom();
|
intel_pmu_lbr_init_slm();
|
||||||
|
|
||||||
x86_pmu.event_constraints = intel_slm_event_constraints;
|
x86_pmu.event_constraints = intel_slm_event_constraints;
|
||||||
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
|
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
|
||||||
|
@ -3456,6 +3590,30 @@ __init int intel_pmu_init(void)
|
||||||
pr_cont("Silvermont events, ");
|
pr_cont("Silvermont events, ");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 92: /* 14nm Atom "Goldmont" */
|
||||||
|
case 95: /* 14nm Atom "Goldmont Denverton" */
|
||||||
|
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
|
||||||
|
sizeof(hw_cache_event_ids));
|
||||||
|
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
|
||||||
|
sizeof(hw_cache_extra_regs));
|
||||||
|
|
||||||
|
intel_pmu_lbr_init_skl();
|
||||||
|
|
||||||
|
x86_pmu.event_constraints = intel_slm_event_constraints;
|
||||||
|
x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
|
||||||
|
x86_pmu.extra_regs = intel_glm_extra_regs;
|
||||||
|
/*
|
||||||
|
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
|
||||||
|
* for precise cycles.
|
||||||
|
* :pp is identical to :ppp
|
||||||
|
*/
|
||||||
|
x86_pmu.pebs_aliases = NULL;
|
||||||
|
x86_pmu.pebs_prec_dist = true;
|
||||||
|
x86_pmu.lbr_pt_coexist = true;
|
||||||
|
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||||
|
pr_cont("Goldmont events, ");
|
||||||
|
break;
|
||||||
|
|
||||||
case 37: /* 32nm Westmere */
|
case 37: /* 32nm Westmere */
|
||||||
case 44: /* 32nm Westmere-EP */
|
case 44: /* 32nm Westmere-EP */
|
||||||
case 47: /* 32nm Westmere-EX */
|
case 47: /* 32nm Westmere-EX */
|
||||||
|
|
|
@ -91,6 +91,8 @@
|
||||||
#include <asm/cpu_device_id.h>
|
#include <asm/cpu_device_id.h>
|
||||||
#include "../perf_event.h"
|
#include "../perf_event.h"
|
||||||
|
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
|
||||||
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
|
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
|
||||||
static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
|
static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
|
||||||
struct kobj_attribute *attr, \
|
struct kobj_attribute *attr, \
|
||||||
|
@ -106,22 +108,27 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||||
struct device_attribute *attr,
|
struct device_attribute *attr,
|
||||||
char *buf);
|
char *buf);
|
||||||
|
|
||||||
|
/* Model -> events mapping */
|
||||||
|
struct cstate_model {
|
||||||
|
unsigned long core_events;
|
||||||
|
unsigned long pkg_events;
|
||||||
|
unsigned long quirks;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Quirk flags */
|
||||||
|
#define SLM_PKG_C6_USE_C7_MSR (1UL << 0)
|
||||||
|
|
||||||
struct perf_cstate_msr {
|
struct perf_cstate_msr {
|
||||||
u64 msr;
|
u64 msr;
|
||||||
struct perf_pmu_events_attr *attr;
|
struct perf_pmu_events_attr *attr;
|
||||||
bool (*test)(int idx);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/* cstate_core PMU */
|
/* cstate_core PMU */
|
||||||
|
|
||||||
static struct pmu cstate_core_pmu;
|
static struct pmu cstate_core_pmu;
|
||||||
static bool has_cstate_core;
|
static bool has_cstate_core;
|
||||||
|
|
||||||
enum perf_cstate_core_id {
|
enum perf_cstate_core_events {
|
||||||
/*
|
|
||||||
* cstate_core events
|
|
||||||
*/
|
|
||||||
PERF_CSTATE_CORE_C1_RES = 0,
|
PERF_CSTATE_CORE_C1_RES = 0,
|
||||||
PERF_CSTATE_CORE_C3_RES,
|
PERF_CSTATE_CORE_C3_RES,
|
||||||
PERF_CSTATE_CORE_C6_RES,
|
PERF_CSTATE_CORE_C6_RES,
|
||||||
|
@ -130,69 +137,16 @@ enum perf_cstate_core_id {
|
||||||
PERF_CSTATE_CORE_EVENT_MAX,
|
PERF_CSTATE_CORE_EVENT_MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool test_core(int idx)
|
|
||||||
{
|
|
||||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
|
||||||
boot_cpu_data.x86 != 6)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
switch (boot_cpu_data.x86_model) {
|
|
||||||
case 30: /* 45nm Nehalem */
|
|
||||||
case 26: /* 45nm Nehalem-EP */
|
|
||||||
case 46: /* 45nm Nehalem-EX */
|
|
||||||
|
|
||||||
case 37: /* 32nm Westmere */
|
|
||||||
case 44: /* 32nm Westmere-EP */
|
|
||||||
case 47: /* 32nm Westmere-EX */
|
|
||||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
|
||||||
idx == PERF_CSTATE_CORE_C6_RES)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
case 42: /* 32nm SandyBridge */
|
|
||||||
case 45: /* 32nm SandyBridge-E/EN/EP */
|
|
||||||
|
|
||||||
case 58: /* 22nm IvyBridge */
|
|
||||||
case 62: /* 22nm IvyBridge-EP/EX */
|
|
||||||
|
|
||||||
case 60: /* 22nm Haswell Core */
|
|
||||||
case 63: /* 22nm Haswell Server */
|
|
||||||
case 69: /* 22nm Haswell ULT */
|
|
||||||
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
|
||||||
|
|
||||||
case 61: /* 14nm Broadwell Core-M */
|
|
||||||
case 86: /* 14nm Broadwell Xeon D */
|
|
||||||
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
|
|
||||||
case 79: /* 14nm Broadwell Server */
|
|
||||||
|
|
||||||
case 78: /* 14nm Skylake Mobile */
|
|
||||||
case 94: /* 14nm Skylake Desktop */
|
|
||||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
|
||||||
idx == PERF_CSTATE_CORE_C6_RES ||
|
|
||||||
idx == PERF_CSTATE_CORE_C7_RES)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
case 55: /* 22nm Atom "Silvermont" */
|
|
||||||
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
|
|
||||||
case 76: /* 14nm Atom "Airmont" */
|
|
||||||
if (idx == PERF_CSTATE_CORE_C1_RES ||
|
|
||||||
idx == PERF_CSTATE_CORE_C6_RES)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
|
PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
|
||||||
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
|
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
|
||||||
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
|
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
|
||||||
PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
|
PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
|
||||||
|
|
||||||
static struct perf_cstate_msr core_msr[] = {
|
static struct perf_cstate_msr core_msr[] = {
|
||||||
[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
|
[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 },
|
||||||
[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
|
[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 },
|
||||||
[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
|
[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 },
|
||||||
[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
|
[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 },
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
|
static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
|
||||||
|
@ -234,18 +188,11 @@ static const struct attribute_group *core_attr_groups[] = {
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cstate_core PMU end */
|
|
||||||
|
|
||||||
|
|
||||||
/* cstate_pkg PMU */
|
/* cstate_pkg PMU */
|
||||||
|
|
||||||
static struct pmu cstate_pkg_pmu;
|
static struct pmu cstate_pkg_pmu;
|
||||||
static bool has_cstate_pkg;
|
static bool has_cstate_pkg;
|
||||||
|
|
||||||
enum perf_cstate_pkg_id {
|
enum perf_cstate_pkg_events {
|
||||||
/*
|
|
||||||
* cstate_pkg events
|
|
||||||
*/
|
|
||||||
PERF_CSTATE_PKG_C2_RES = 0,
|
PERF_CSTATE_PKG_C2_RES = 0,
|
||||||
PERF_CSTATE_PKG_C3_RES,
|
PERF_CSTATE_PKG_C3_RES,
|
||||||
PERF_CSTATE_PKG_C6_RES,
|
PERF_CSTATE_PKG_C6_RES,
|
||||||
|
@ -257,69 +204,6 @@ enum perf_cstate_pkg_id {
|
||||||
PERF_CSTATE_PKG_EVENT_MAX,
|
PERF_CSTATE_PKG_EVENT_MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
bool test_pkg(int idx)
|
|
||||||
{
|
|
||||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
|
||||||
boot_cpu_data.x86 != 6)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
switch (boot_cpu_data.x86_model) {
|
|
||||||
case 30: /* 45nm Nehalem */
|
|
||||||
case 26: /* 45nm Nehalem-EP */
|
|
||||||
case 46: /* 45nm Nehalem-EX */
|
|
||||||
|
|
||||||
case 37: /* 32nm Westmere */
|
|
||||||
case 44: /* 32nm Westmere-EP */
|
|
||||||
case 47: /* 32nm Westmere-EX */
|
|
||||||
if (idx == PERF_CSTATE_CORE_C3_RES ||
|
|
||||||
idx == PERF_CSTATE_CORE_C6_RES ||
|
|
||||||
idx == PERF_CSTATE_CORE_C7_RES)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
case 42: /* 32nm SandyBridge */
|
|
||||||
case 45: /* 32nm SandyBridge-E/EN/EP */
|
|
||||||
|
|
||||||
case 58: /* 22nm IvyBridge */
|
|
||||||
case 62: /* 22nm IvyBridge-EP/EX */
|
|
||||||
|
|
||||||
case 60: /* 22nm Haswell Core */
|
|
||||||
case 63: /* 22nm Haswell Server */
|
|
||||||
case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
|
|
||||||
|
|
||||||
case 61: /* 14nm Broadwell Core-M */
|
|
||||||
case 86: /* 14nm Broadwell Xeon D */
|
|
||||||
case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
|
|
||||||
case 79: /* 14nm Broadwell Server */
|
|
||||||
|
|
||||||
case 78: /* 14nm Skylake Mobile */
|
|
||||||
case 94: /* 14nm Skylake Desktop */
|
|
||||||
if (idx == PERF_CSTATE_PKG_C2_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C3_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C6_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C7_RES)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
case 55: /* 22nm Atom "Silvermont" */
|
|
||||||
case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
|
|
||||||
case 76: /* 14nm Atom "Airmont" */
|
|
||||||
if (idx == PERF_CSTATE_CORE_C6_RES)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
case 69: /* 22nm Haswell ULT */
|
|
||||||
if (idx == PERF_CSTATE_PKG_C2_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C3_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C6_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C7_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C8_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C9_RES ||
|
|
||||||
idx == PERF_CSTATE_PKG_C10_RES)
|
|
||||||
return true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
|
PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
|
||||||
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
|
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
|
||||||
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
|
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
|
||||||
|
@ -329,13 +213,13 @@ PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05");
|
||||||
PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
|
PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
|
||||||
|
|
||||||
static struct perf_cstate_msr pkg_msr[] = {
|
static struct perf_cstate_msr pkg_msr[] = {
|
||||||
[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
|
[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 },
|
||||||
[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
|
[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 },
|
||||||
[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
|
[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 },
|
||||||
[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
|
[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 },
|
||||||
[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
|
[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 },
|
||||||
[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
|
[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 },
|
||||||
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
|
[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 },
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
|
static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
|
||||||
|
@ -366,8 +250,6 @@ static const struct attribute_group *pkg_attr_groups[] = {
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cstate_pkg PMU end*/
|
|
||||||
|
|
||||||
static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||||
struct device_attribute *attr,
|
struct device_attribute *attr,
|
||||||
char *buf)
|
char *buf)
|
||||||
|
@ -385,7 +267,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
|
||||||
static int cstate_pmu_event_init(struct perf_event *event)
|
static int cstate_pmu_event_init(struct perf_event *event)
|
||||||
{
|
{
|
||||||
u64 cfg = event->attr.config;
|
u64 cfg = event->attr.config;
|
||||||
int ret = 0;
|
int cpu;
|
||||||
|
|
||||||
if (event->attr.type != event->pmu->type)
|
if (event->attr.type != event->pmu->type)
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
@ -400,26 +282,36 @@ static int cstate_pmu_event_init(struct perf_event *event)
|
||||||
event->attr.sample_period) /* no sampling */
|
event->attr.sample_period) /* no sampling */
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (event->cpu < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
if (event->pmu == &cstate_core_pmu) {
|
if (event->pmu == &cstate_core_pmu) {
|
||||||
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
|
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (!core_msr[cfg].attr)
|
if (!core_msr[cfg].attr)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
event->hw.event_base = core_msr[cfg].msr;
|
event->hw.event_base = core_msr[cfg].msr;
|
||||||
|
cpu = cpumask_any_and(&cstate_core_cpu_mask,
|
||||||
|
topology_sibling_cpumask(event->cpu));
|
||||||
} else if (event->pmu == &cstate_pkg_pmu) {
|
} else if (event->pmu == &cstate_pkg_pmu) {
|
||||||
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
|
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (!pkg_msr[cfg].attr)
|
if (!pkg_msr[cfg].attr)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
event->hw.event_base = pkg_msr[cfg].msr;
|
event->hw.event_base = pkg_msr[cfg].msr;
|
||||||
} else
|
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
|
||||||
|
topology_core_cpumask(event->cpu));
|
||||||
|
} else {
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
|
}
|
||||||
|
|
||||||
/* must be done before validate_group */
|
if (cpu >= nr_cpu_ids)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
event->cpu = cpu;
|
||||||
event->hw.config = cfg;
|
event->hw.config = cfg;
|
||||||
event->hw.idx = -1;
|
event->hw.idx = -1;
|
||||||
|
return 0;
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 cstate_pmu_read_counter(struct perf_event *event)
|
static inline u64 cstate_pmu_read_counter(struct perf_event *event)
|
||||||
|
@ -469,172 +361,91 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if exiting cpu is the designated reader. If so migrate the
|
||||||
|
* events when there is a valid target available
|
||||||
|
*/
|
||||||
static void cstate_cpu_exit(int cpu)
|
static void cstate_cpu_exit(int cpu)
|
||||||
{
|
{
|
||||||
int i, id, target;
|
unsigned int target;
|
||||||
|
|
||||||
/* cpu exit for cstate core */
|
if (has_cstate_core &&
|
||||||
if (has_cstate_core) {
|
cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
|
||||||
id = topology_core_id(cpu);
|
|
||||||
target = -1;
|
|
||||||
|
|
||||||
for_each_online_cpu(i) {
|
target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
|
||||||
if (i == cpu)
|
/* Migrate events if there is a valid target */
|
||||||
continue;
|
if (target < nr_cpu_ids) {
|
||||||
if (id == topology_core_id(i)) {
|
|
||||||
target = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
|
|
||||||
cpumask_set_cpu(target, &cstate_core_cpu_mask);
|
cpumask_set_cpu(target, &cstate_core_cpu_mask);
|
||||||
WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
|
|
||||||
if (target >= 0)
|
|
||||||
perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
|
perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* cpu exit for cstate pkg */
|
if (has_cstate_pkg &&
|
||||||
if (has_cstate_pkg) {
|
cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
|
||||||
id = topology_physical_package_id(cpu);
|
|
||||||
target = -1;
|
|
||||||
|
|
||||||
for_each_online_cpu(i) {
|
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
|
||||||
if (i == cpu)
|
/* Migrate events if there is a valid target */
|
||||||
continue;
|
if (target < nr_cpu_ids) {
|
||||||
if (id == topology_physical_package_id(i)) {
|
|
||||||
target = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
|
|
||||||
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
|
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
|
||||||
WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
|
|
||||||
if (target >= 0)
|
|
||||||
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
|
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cstate_cpu_init(int cpu)
|
static void cstate_cpu_init(int cpu)
|
||||||
{
|
{
|
||||||
int i, id;
|
unsigned int target;
|
||||||
|
|
||||||
/* cpu init for cstate core */
|
/*
|
||||||
if (has_cstate_core) {
|
* If this is the first online thread of that core, set it in
|
||||||
id = topology_core_id(cpu);
|
* the core cpu mask as the designated reader.
|
||||||
for_each_cpu(i, &cstate_core_cpu_mask) {
|
*/
|
||||||
if (id == topology_core_id(i))
|
target = cpumask_any_and(&cstate_core_cpu_mask,
|
||||||
break;
|
topology_sibling_cpumask(cpu));
|
||||||
}
|
|
||||||
if (i >= nr_cpu_ids)
|
|
||||||
cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* cpu init for cstate pkg */
|
if (has_cstate_core && target >= nr_cpu_ids)
|
||||||
if (has_cstate_pkg) {
|
cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
|
||||||
id = topology_physical_package_id(cpu);
|
|
||||||
for_each_cpu(i, &cstate_pkg_cpu_mask) {
|
/*
|
||||||
if (id == topology_physical_package_id(i))
|
* If this is the first online thread of that package, set it
|
||||||
break;
|
* in the package cpu mask as the designated reader.
|
||||||
}
|
*/
|
||||||
if (i >= nr_cpu_ids)
|
target = cpumask_any_and(&cstate_pkg_cpu_mask,
|
||||||
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
|
topology_core_cpumask(cpu));
|
||||||
}
|
if (has_cstate_pkg && target >= nr_cpu_ids)
|
||||||
|
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cstate_cpu_notifier(struct notifier_block *self,
|
static int cstate_cpu_notifier(struct notifier_block *self,
|
||||||
unsigned long action, void *hcpu)
|
unsigned long action, void *hcpu)
|
||||||
{
|
{
|
||||||
unsigned int cpu = (long)hcpu;
|
unsigned int cpu = (long)hcpu;
|
||||||
|
|
||||||
switch (action & ~CPU_TASKS_FROZEN) {
|
switch (action & ~CPU_TASKS_FROZEN) {
|
||||||
case CPU_UP_PREPARE:
|
|
||||||
break;
|
|
||||||
case CPU_STARTING:
|
case CPU_STARTING:
|
||||||
cstate_cpu_init(cpu);
|
cstate_cpu_init(cpu);
|
||||||
break;
|
break;
|
||||||
case CPU_UP_CANCELED:
|
|
||||||
case CPU_DYING:
|
|
||||||
break;
|
|
||||||
case CPU_ONLINE:
|
|
||||||
case CPU_DEAD:
|
|
||||||
break;
|
|
||||||
case CPU_DOWN_PREPARE:
|
case CPU_DOWN_PREPARE:
|
||||||
cstate_cpu_exit(cpu);
|
cstate_cpu_exit(cpu);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static struct notifier_block cstate_cpu_nb = {
|
||||||
* Probe the cstate events and insert the available one into sysfs attrs
|
.notifier_call = cstate_cpu_notifier,
|
||||||
* Return false if there is no available events.
|
.priority = CPU_PRI_PERF + 1,
|
||||||
*/
|
};
|
||||||
static bool cstate_probe_msr(struct perf_cstate_msr *msr,
|
|
||||||
struct attribute **events_attrs,
|
|
||||||
int max_event_nr)
|
|
||||||
{
|
|
||||||
int i, j = 0;
|
|
||||||
u64 val;
|
|
||||||
|
|
||||||
/* Probe the cstate events. */
|
|
||||||
for (i = 0; i < max_event_nr; i++) {
|
|
||||||
if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
|
|
||||||
msr[i].attr = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* List remaining events in the sysfs attrs. */
|
|
||||||
for (i = 0; i < max_event_nr; i++) {
|
|
||||||
if (msr[i].attr)
|
|
||||||
events_attrs[j++] = &msr[i].attr->attr.attr;
|
|
||||||
}
|
|
||||||
events_attrs[j] = NULL;
|
|
||||||
|
|
||||||
return (j > 0) ? true : false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __init cstate_init(void)
|
|
||||||
{
|
|
||||||
/* SLM has different MSR for PKG C6 */
|
|
||||||
switch (boot_cpu_data.x86_model) {
|
|
||||||
case 55:
|
|
||||||
case 76:
|
|
||||||
case 77:
|
|
||||||
pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
|
|
||||||
has_cstate_core = true;
|
|
||||||
|
|
||||||
if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
|
|
||||||
has_cstate_pkg = true;
|
|
||||||
|
|
||||||
return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void __init cstate_cpumask_init(void)
|
|
||||||
{
|
|
||||||
int cpu;
|
|
||||||
|
|
||||||
cpu_notifier_register_begin();
|
|
||||||
|
|
||||||
for_each_online_cpu(cpu)
|
|
||||||
cstate_cpu_init(cpu);
|
|
||||||
|
|
||||||
__perf_cpu_notifier(cstate_cpu_notifier);
|
|
||||||
|
|
||||||
cpu_notifier_register_done();
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct pmu cstate_core_pmu = {
|
static struct pmu cstate_core_pmu = {
|
||||||
.attr_groups = core_attr_groups,
|
.attr_groups = core_attr_groups,
|
||||||
.name = "cstate_core",
|
.name = "cstate_core",
|
||||||
.task_ctx_nr = perf_invalid_context,
|
.task_ctx_nr = perf_invalid_context,
|
||||||
.event_init = cstate_pmu_event_init,
|
.event_init = cstate_pmu_event_init,
|
||||||
.add = cstate_pmu_event_add, /* must have */
|
.add = cstate_pmu_event_add,
|
||||||
.del = cstate_pmu_event_del, /* must have */
|
.del = cstate_pmu_event_del,
|
||||||
.start = cstate_pmu_event_start,
|
.start = cstate_pmu_event_start,
|
||||||
.stop = cstate_pmu_event_stop,
|
.stop = cstate_pmu_event_stop,
|
||||||
.read = cstate_pmu_event_update,
|
.read = cstate_pmu_event_update,
|
||||||
|
@ -646,49 +457,203 @@ static struct pmu cstate_pkg_pmu = {
|
||||||
.name = "cstate_pkg",
|
.name = "cstate_pkg",
|
||||||
.task_ctx_nr = perf_invalid_context,
|
.task_ctx_nr = perf_invalid_context,
|
||||||
.event_init = cstate_pmu_event_init,
|
.event_init = cstate_pmu_event_init,
|
||||||
.add = cstate_pmu_event_add, /* must have */
|
.add = cstate_pmu_event_add,
|
||||||
.del = cstate_pmu_event_del, /* must have */
|
.del = cstate_pmu_event_del,
|
||||||
.start = cstate_pmu_event_start,
|
.start = cstate_pmu_event_start,
|
||||||
.stop = cstate_pmu_event_stop,
|
.stop = cstate_pmu_event_stop,
|
||||||
.read = cstate_pmu_event_update,
|
.read = cstate_pmu_event_update,
|
||||||
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
|
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void __init cstate_pmus_register(void)
|
static const struct cstate_model nhm_cstates __initconst = {
|
||||||
|
.core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
|
||||||
|
BIT(PERF_CSTATE_CORE_C6_RES),
|
||||||
|
|
||||||
|
.pkg_events = BIT(PERF_CSTATE_PKG_C3_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C7_RES),
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct cstate_model snb_cstates __initconst = {
|
||||||
|
.core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
|
||||||
|
BIT(PERF_CSTATE_CORE_C6_RES) |
|
||||||
|
BIT(PERF_CSTATE_CORE_C7_RES),
|
||||||
|
|
||||||
|
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C3_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C7_RES),
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct cstate_model hswult_cstates __initconst = {
|
||||||
|
.core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
|
||||||
|
BIT(PERF_CSTATE_CORE_C6_RES) |
|
||||||
|
BIT(PERF_CSTATE_CORE_C7_RES),
|
||||||
|
|
||||||
|
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C3_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C7_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C8_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C9_RES) |
|
||||||
|
BIT(PERF_CSTATE_PKG_C10_RES),
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct cstate_model slm_cstates __initconst = {
|
||||||
|
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
|
||||||
|
BIT(PERF_CSTATE_CORE_C6_RES),
|
||||||
|
|
||||||
|
.pkg_events = BIT(PERF_CSTATE_PKG_C6_RES),
|
||||||
|
.quirks = SLM_PKG_C6_USE_C7_MSR,
|
||||||
|
};
|
||||||
|
|
||||||
|
#define X86_CSTATES_MODEL(model, states) \
|
||||||
|
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
|
||||||
|
|
||||||
|
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
|
||||||
|
X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
|
||||||
|
X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
|
||||||
|
X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
|
||||||
|
|
||||||
|
X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
|
||||||
|
X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
|
||||||
|
X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
|
||||||
|
|
||||||
|
X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
|
||||||
|
X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
|
||||||
|
|
||||||
|
X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
|
||||||
|
X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
|
||||||
|
|
||||||
|
X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
|
||||||
|
X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
|
||||||
|
X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
|
||||||
|
|
||||||
|
X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
|
||||||
|
|
||||||
|
X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
|
||||||
|
X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
|
||||||
|
X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
|
||||||
|
|
||||||
|
X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
|
||||||
|
X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
|
||||||
|
X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
|
||||||
|
X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
|
||||||
|
|
||||||
|
X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
|
||||||
|
X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
|
||||||
|
{ },
|
||||||
|
};
|
||||||
|
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Probe the cstate events and insert the available one into sysfs attrs
|
||||||
|
* Return false if there are no available events.
|
||||||
|
*/
|
||||||
|
static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
|
||||||
|
struct perf_cstate_msr *msr,
|
||||||
|
struct attribute **attrs)
|
||||||
{
|
{
|
||||||
int err;
|
bool found = false;
|
||||||
|
unsigned int bit;
|
||||||
|
u64 val;
|
||||||
|
|
||||||
|
for (bit = 0; bit < max; bit++) {
|
||||||
|
if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
|
||||||
|
*attrs++ = &msr[bit].attr->attr.attr;
|
||||||
|
found = true;
|
||||||
|
} else {
|
||||||
|
msr[bit].attr = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*attrs = NULL;
|
||||||
|
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init cstate_probe(const struct cstate_model *cm)
|
||||||
|
{
|
||||||
|
/* SLM has different MSR for PKG C6 */
|
||||||
|
if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
|
||||||
|
pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
|
||||||
|
|
||||||
|
has_cstate_core = cstate_probe_msr(cm->core_events,
|
||||||
|
PERF_CSTATE_CORE_EVENT_MAX,
|
||||||
|
core_msr, core_events_attrs);
|
||||||
|
|
||||||
|
has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
|
||||||
|
PERF_CSTATE_PKG_EVENT_MAX,
|
||||||
|
pkg_msr, pkg_events_attrs);
|
||||||
|
|
||||||
|
return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void cstate_cleanup(void)
|
||||||
|
{
|
||||||
|
if (has_cstate_core)
|
||||||
|
perf_pmu_unregister(&cstate_core_pmu);
|
||||||
|
|
||||||
|
if (has_cstate_pkg)
|
||||||
|
perf_pmu_unregister(&cstate_pkg_pmu);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __init cstate_init(void)
|
||||||
|
{
|
||||||
|
int cpu, err;
|
||||||
|
|
||||||
|
cpu_notifier_register_begin();
|
||||||
|
for_each_online_cpu(cpu)
|
||||||
|
cstate_cpu_init(cpu);
|
||||||
|
|
||||||
if (has_cstate_core) {
|
if (has_cstate_core) {
|
||||||
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
|
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
|
||||||
if (WARN_ON(err))
|
if (err) {
|
||||||
pr_info("Failed to register PMU %s error %d\n",
|
has_cstate_core = false;
|
||||||
cstate_core_pmu.name, err);
|
pr_info("Failed to register cstate core pmu\n");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_cstate_pkg) {
|
if (has_cstate_pkg) {
|
||||||
err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
|
err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
|
||||||
if (WARN_ON(err))
|
if (err) {
|
||||||
pr_info("Failed to register PMU %s error %d\n",
|
has_cstate_pkg = false;
|
||||||
cstate_pkg_pmu.name, err);
|
pr_info("Failed to register cstate pkg pmu\n");
|
||||||
|
cstate_cleanup();
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
__register_cpu_notifier(&cstate_cpu_nb);
|
||||||
|
out:
|
||||||
|
cpu_notifier_register_done();
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __init cstate_pmu_init(void)
|
static int __init cstate_pmu_init(void)
|
||||||
{
|
{
|
||||||
|
const struct x86_cpu_id *id;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (cpu_has_hypervisor)
|
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
err = cstate_init();
|
id = x86_match_cpu(intel_cstates_match);
|
||||||
|
if (!id)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
err = cstate_probe((const struct cstate_model *) id->driver_data);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
cstate_cpumask_init();
|
return cstate_init();
|
||||||
|
|
||||||
cstate_pmus_register();
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
module_init(cstate_pmu_init);
|
||||||
|
|
||||||
device_initcall(cstate_pmu_init);
|
static void __exit cstate_pmu_exit(void)
|
||||||
|
{
|
||||||
|
cpu_notifier_register_begin();
|
||||||
|
__unregister_cpu_notifier(&cstate_cpu_nb);
|
||||||
|
cstate_cleanup();
|
||||||
|
cpu_notifier_register_done();
|
||||||
|
}
|
||||||
|
module_exit(cstate_pmu_exit);
|
||||||
|
|
|
@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {
|
||||||
EVENT_CONSTRAINT_END
|
EVENT_CONSTRAINT_END
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct event_constraint intel_glm_pebs_event_constraints[] = {
|
||||||
|
/* Allow all events as PEBS with no flags */
|
||||||
|
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
|
||||||
|
EVENT_CONSTRAINT_END
|
||||||
|
};
|
||||||
|
|
||||||
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
||||||
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
|
||||||
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
||||||
|
|
|
@ -14,7 +14,8 @@ enum {
|
||||||
LBR_FORMAT_EIP_FLAGS = 0x03,
|
LBR_FORMAT_EIP_FLAGS = 0x03,
|
||||||
LBR_FORMAT_EIP_FLAGS2 = 0x04,
|
LBR_FORMAT_EIP_FLAGS2 = 0x04,
|
||||||
LBR_FORMAT_INFO = 0x05,
|
LBR_FORMAT_INFO = 0x05,
|
||||||
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
|
LBR_FORMAT_TIME = 0x06,
|
||||||
|
LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
|
||||||
};
|
};
|
||||||
|
|
||||||
static enum {
|
static enum {
|
||||||
|
@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
|
||||||
abort = !!(info & LBR_INFO_ABORT);
|
abort = !!(info & LBR_INFO_ABORT);
|
||||||
cycles = (info & LBR_INFO_CYCLES);
|
cycles = (info & LBR_INFO_CYCLES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lbr_format == LBR_FORMAT_TIME) {
|
||||||
|
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||||
|
pred = !mis;
|
||||||
|
skip = 1;
|
||||||
|
cycles = ((to >> 48) & LBR_INFO_CYCLES);
|
||||||
|
|
||||||
|
to = (u64)((((s64)to) << 16) >> 16);
|
||||||
|
}
|
||||||
|
|
||||||
if (lbr_flags & LBR_EIP_FLAGS) {
|
if (lbr_flags & LBR_EIP_FLAGS) {
|
||||||
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
mis = !!(from & LBR_FROM_FLAG_MISPRED);
|
||||||
pred = !mis;
|
pred = !mis;
|
||||||
|
@ -1049,6 +1060,24 @@ void __init intel_pmu_lbr_init_atom(void)
|
||||||
pr_cont("8-deep LBR, ");
|
pr_cont("8-deep LBR, ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* slm */
|
||||||
|
void __init intel_pmu_lbr_init_slm(void)
|
||||||
|
{
|
||||||
|
x86_pmu.lbr_nr = 8;
|
||||||
|
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
||||||
|
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
|
||||||
|
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
|
||||||
|
|
||||||
|
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
||||||
|
x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SW branch filter usage:
|
||||||
|
* - compensate for lack of HW filter
|
||||||
|
*/
|
||||||
|
pr_cont("8-deep LBR, ");
|
||||||
|
}
|
||||||
|
|
||||||
/* Knights Landing */
|
/* Knights Landing */
|
||||||
void intel_pmu_lbr_init_knl(void)
|
void intel_pmu_lbr_init_knl(void)
|
||||||
{
|
{
|
||||||
|
|
|
@ -67,11 +67,13 @@ static struct pt_cap_desc {
|
||||||
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
|
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
|
||||||
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
|
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
|
||||||
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
|
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
|
||||||
|
PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)),
|
||||||
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
|
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
|
||||||
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
|
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
|
||||||
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
|
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
|
||||||
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
|
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
|
||||||
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
|
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
|
||||||
|
PT_CAP(num_address_ranges, 1, CR_EAX, 0x3),
|
||||||
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
|
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
|
||||||
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
|
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
|
||||||
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
|
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
|
||||||
|
@ -125,9 +127,46 @@ static struct attribute_group pt_format_group = {
|
||||||
.attrs = pt_formats_attr,
|
.attrs = pt_formats_attr,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static ssize_t
|
||||||
|
pt_timing_attr_show(struct device *dev, struct device_attribute *attr,
|
||||||
|
char *page)
|
||||||
|
{
|
||||||
|
struct perf_pmu_events_attr *pmu_attr =
|
||||||
|
container_of(attr, struct perf_pmu_events_attr, attr);
|
||||||
|
|
||||||
|
switch (pmu_attr->id) {
|
||||||
|
case 0:
|
||||||
|
return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio);
|
||||||
|
case 1:
|
||||||
|
return sprintf(page, "%u:%u\n",
|
||||||
|
pt_pmu.tsc_art_num,
|
||||||
|
pt_pmu.tsc_art_den);
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0,
|
||||||
|
pt_timing_attr_show);
|
||||||
|
PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1,
|
||||||
|
pt_timing_attr_show);
|
||||||
|
|
||||||
|
static struct attribute *pt_timing_attr[] = {
|
||||||
|
&timing_attr_max_nonturbo_ratio.attr.attr,
|
||||||
|
&timing_attr_tsc_art_ratio.attr.attr,
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct attribute_group pt_timing_group = {
|
||||||
|
.attrs = pt_timing_attr,
|
||||||
|
};
|
||||||
|
|
||||||
static const struct attribute_group *pt_attr_groups[] = {
|
static const struct attribute_group *pt_attr_groups[] = {
|
||||||
&pt_cap_group,
|
&pt_cap_group,
|
||||||
&pt_format_group,
|
&pt_format_group,
|
||||||
|
&pt_timing_group,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -140,6 +179,23 @@ static int __init pt_pmu_hw_init(void)
|
||||||
int ret;
|
int ret;
|
||||||
long i;
|
long i;
|
||||||
|
|
||||||
|
rdmsrl(MSR_PLATFORM_INFO, reg);
|
||||||
|
pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if available, read in TSC to core crystal clock ratio,
|
||||||
|
* otherwise, zero for numerator stands for "not enumerated"
|
||||||
|
* as per SDM
|
||||||
|
*/
|
||||||
|
if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
|
||||||
|
u32 eax, ebx, ecx, edx;
|
||||||
|
|
||||||
|
cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
pt_pmu.tsc_art_num = ebx;
|
||||||
|
pt_pmu.tsc_art_den = eax;
|
||||||
|
}
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_VMX)) {
|
if (boot_cpu_has(X86_FEATURE_VMX)) {
|
||||||
/*
|
/*
|
||||||
* Intel SDM, 36.5 "Tracing post-VMXON" says that
|
* Intel SDM, 36.5 "Tracing post-VMXON" says that
|
||||||
|
@ -263,6 +319,75 @@ static bool pt_event_valid(struct perf_event *event)
|
||||||
* These all are cpu affine and operate on a local PT
|
* These all are cpu affine and operate on a local PT
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Address ranges and their corresponding msr configuration registers */
|
||||||
|
static const struct pt_address_range {
|
||||||
|
unsigned long msr_a;
|
||||||
|
unsigned long msr_b;
|
||||||
|
unsigned int reg_off;
|
||||||
|
} pt_address_ranges[] = {
|
||||||
|
{
|
||||||
|
.msr_a = MSR_IA32_RTIT_ADDR0_A,
|
||||||
|
.msr_b = MSR_IA32_RTIT_ADDR0_B,
|
||||||
|
.reg_off = RTIT_CTL_ADDR0_OFFSET,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.msr_a = MSR_IA32_RTIT_ADDR1_A,
|
||||||
|
.msr_b = MSR_IA32_RTIT_ADDR1_B,
|
||||||
|
.reg_off = RTIT_CTL_ADDR1_OFFSET,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.msr_a = MSR_IA32_RTIT_ADDR2_A,
|
||||||
|
.msr_b = MSR_IA32_RTIT_ADDR2_B,
|
||||||
|
.reg_off = RTIT_CTL_ADDR2_OFFSET,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.msr_a = MSR_IA32_RTIT_ADDR3_A,
|
||||||
|
.msr_b = MSR_IA32_RTIT_ADDR3_B,
|
||||||
|
.reg_off = RTIT_CTL_ADDR3_OFFSET,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static u64 pt_config_filters(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct pt_filters *filters = event->hw.addr_filters;
|
||||||
|
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||||
|
unsigned int range = 0;
|
||||||
|
u64 rtit_ctl = 0;
|
||||||
|
|
||||||
|
if (!filters)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
perf_event_addr_filters_sync(event);
|
||||||
|
|
||||||
|
for (range = 0; range < filters->nr_filters; range++) {
|
||||||
|
struct pt_filter *filter = &filters->filter[range];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note, if the range has zero start/end addresses due
|
||||||
|
* to its dynamic object not being loaded yet, we just
|
||||||
|
* go ahead and program zeroed range, which will simply
|
||||||
|
* produce no data. Note^2: if executable code at 0x0
|
||||||
|
* is a concern, we can set up an "invalid" configuration
|
||||||
|
* such as msr_b < msr_a.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* avoid redundant msr writes */
|
||||||
|
if (pt->filters.filter[range].msr_a != filter->msr_a) {
|
||||||
|
wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
|
||||||
|
pt->filters.filter[range].msr_a = filter->msr_a;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pt->filters.filter[range].msr_b != filter->msr_b) {
|
||||||
|
wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
|
||||||
|
pt->filters.filter[range].msr_b = filter->msr_b;
|
||||||
|
}
|
||||||
|
|
||||||
|
rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
|
||||||
|
}
|
||||||
|
|
||||||
|
return rtit_ctl;
|
||||||
|
}
|
||||||
|
|
||||||
static void pt_config(struct perf_event *event)
|
static void pt_config(struct perf_event *event)
|
||||||
{
|
{
|
||||||
u64 reg;
|
u64 reg;
|
||||||
|
@ -272,7 +397,8 @@ static void pt_config(struct perf_event *event)
|
||||||
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
|
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
reg = pt_config_filters(event);
|
||||||
|
reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
|
||||||
|
|
||||||
if (!event->attr.exclude_kernel)
|
if (!event->attr.exclude_kernel)
|
||||||
reg |= RTIT_CTL_OS;
|
reg |= RTIT_CTL_OS;
|
||||||
|
@ -921,24 +1047,80 @@ static void pt_buffer_free_aux(void *data)
|
||||||
kfree(buf);
|
kfree(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static int pt_addr_filters_init(struct perf_event *event)
|
||||||
* pt_buffer_is_full() - check if the buffer is full
|
|
||||||
* @buf: PT buffer.
|
|
||||||
* @pt: Per-cpu pt handle.
|
|
||||||
*
|
|
||||||
* If the user hasn't read data from the output region that aux_head
|
|
||||||
* points to, the buffer is considered full: the user needs to read at
|
|
||||||
* least this region and update aux_tail to point past it.
|
|
||||||
*/
|
|
||||||
static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
|
|
||||||
{
|
{
|
||||||
if (buf->snapshot)
|
struct pt_filters *filters;
|
||||||
return false;
|
int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
|
||||||
|
|
||||||
if (local_read(&buf->data_size) >= pt->handle.size)
|
if (!pt_cap_get(PT_CAP_num_address_ranges))
|
||||||
return true;
|
return 0;
|
||||||
|
|
||||||
return false;
|
filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
|
||||||
|
if (!filters)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
if (event->parent)
|
||||||
|
memcpy(filters, event->parent->hw.addr_filters,
|
||||||
|
sizeof(*filters));
|
||||||
|
|
||||||
|
event->hw.addr_filters = filters;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pt_addr_filters_fini(struct perf_event *event)
|
||||||
|
{
|
||||||
|
kfree(event->hw.addr_filters);
|
||||||
|
event->hw.addr_filters = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int pt_event_addr_filters_validate(struct list_head *filters)
|
||||||
|
{
|
||||||
|
struct perf_addr_filter *filter;
|
||||||
|
int range = 0;
|
||||||
|
|
||||||
|
list_for_each_entry(filter, filters, entry) {
|
||||||
|
/* PT doesn't support single address triggers */
|
||||||
|
if (!filter->range)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
if (!filter->inode && !kernel_ip(filter->offset))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (++range > pt_cap_get(PT_CAP_num_address_ranges))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pt_event_addr_filters_sync(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct perf_addr_filters_head *head = perf_event_addr_filters(event);
|
||||||
|
unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
|
||||||
|
struct pt_filters *filters = event->hw.addr_filters;
|
||||||
|
struct perf_addr_filter *filter;
|
||||||
|
int range = 0;
|
||||||
|
|
||||||
|
if (!filters)
|
||||||
|
return;
|
||||||
|
|
||||||
|
list_for_each_entry(filter, &head->list, entry) {
|
||||||
|
if (filter->inode && !offs[range]) {
|
||||||
|
msr_a = msr_b = 0;
|
||||||
|
} else {
|
||||||
|
/* apply the offset */
|
||||||
|
msr_a = filter->offset + offs[range];
|
||||||
|
msr_b = filter->size + msr_a;
|
||||||
|
}
|
||||||
|
|
||||||
|
filters->filter[range].msr_a = msr_a;
|
||||||
|
filters->filter[range].msr_b = msr_b;
|
||||||
|
filters->filter[range].config = filter->filter ? 1 : 2;
|
||||||
|
range++;
|
||||||
|
}
|
||||||
|
|
||||||
|
filters->nr_filters = range;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -955,7 +1137,7 @@ void intel_pt_interrupt(void)
|
||||||
* after PT has been disabled by pt_event_stop(). Make sure we don't
|
* after PT has been disabled by pt_event_stop(). Make sure we don't
|
||||||
* do anything (particularly, re-enable) for this event here.
|
* do anything (particularly, re-enable) for this event here.
|
||||||
*/
|
*/
|
||||||
if (!ACCESS_ONCE(pt->handle_nmi))
|
if (!READ_ONCE(pt->handle_nmi))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1040,23 +1222,36 @@ EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
|
||||||
|
|
||||||
static void pt_event_start(struct perf_event *event, int mode)
|
static void pt_event_start(struct perf_event *event, int mode)
|
||||||
{
|
{
|
||||||
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||||
struct pt_buffer *buf = perf_get_aux(&pt->handle);
|
struct pt_buffer *buf;
|
||||||
|
|
||||||
if (READ_ONCE(pt->vmx_on))
|
if (READ_ONCE(pt->vmx_on))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!buf || pt_buffer_is_full(buf, pt)) {
|
buf = perf_aux_output_begin(&pt->handle, event);
|
||||||
event->hw.state = PERF_HES_STOPPED;
|
if (!buf)
|
||||||
return;
|
goto fail_stop;
|
||||||
|
|
||||||
|
pt_buffer_reset_offsets(buf, pt->handle.head);
|
||||||
|
if (!buf->snapshot) {
|
||||||
|
if (pt_buffer_reset_markers(buf, &pt->handle))
|
||||||
|
goto fail_end_stop;
|
||||||
}
|
}
|
||||||
|
|
||||||
ACCESS_ONCE(pt->handle_nmi) = 1;
|
WRITE_ONCE(pt->handle_nmi, 1);
|
||||||
event->hw.state = 0;
|
hwc->state = 0;
|
||||||
|
|
||||||
pt_config_buffer(buf->cur->table, buf->cur_idx,
|
pt_config_buffer(buf->cur->table, buf->cur_idx,
|
||||||
buf->output_off);
|
buf->output_off);
|
||||||
pt_config(event);
|
pt_config(event);
|
||||||
|
|
||||||
|
return;
|
||||||
|
|
||||||
|
fail_end_stop:
|
||||||
|
perf_aux_output_end(&pt->handle, 0, true);
|
||||||
|
fail_stop:
|
||||||
|
hwc->state = PERF_HES_STOPPED;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pt_event_stop(struct perf_event *event, int mode)
|
static void pt_event_stop(struct perf_event *event, int mode)
|
||||||
|
@ -1067,7 +1262,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||||
* Protect against the PMI racing with disabling wrmsr,
|
* Protect against the PMI racing with disabling wrmsr,
|
||||||
* see comment in intel_pt_interrupt().
|
* see comment in intel_pt_interrupt().
|
||||||
*/
|
*/
|
||||||
ACCESS_ONCE(pt->handle_nmi) = 0;
|
WRITE_ONCE(pt->handle_nmi, 0);
|
||||||
|
|
||||||
pt_config_stop(event);
|
pt_config_stop(event);
|
||||||
|
|
||||||
|
@ -1090,19 +1285,7 @@ static void pt_event_stop(struct perf_event *event, int mode)
|
||||||
pt_handle_status(pt);
|
pt_handle_status(pt);
|
||||||
|
|
||||||
pt_update_head(pt);
|
pt_update_head(pt);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void pt_event_del(struct perf_event *event, int mode)
|
|
||||||
{
|
|
||||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
|
||||||
struct pt_buffer *buf;
|
|
||||||
|
|
||||||
pt_event_stop(event, PERF_EF_UPDATE);
|
|
||||||
|
|
||||||
buf = perf_get_aux(&pt->handle);
|
|
||||||
|
|
||||||
if (buf) {
|
|
||||||
if (buf->snapshot)
|
if (buf->snapshot)
|
||||||
pt->handle.head =
|
pt->handle.head =
|
||||||
local_xchg(&buf->data_size,
|
local_xchg(&buf->data_size,
|
||||||
|
@ -1112,9 +1295,13 @@ static void pt_event_del(struct perf_event *event, int mode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void pt_event_del(struct perf_event *event, int mode)
|
||||||
|
{
|
||||||
|
pt_event_stop(event, PERF_EF_UPDATE);
|
||||||
|
}
|
||||||
|
|
||||||
static int pt_event_add(struct perf_event *event, int mode)
|
static int pt_event_add(struct perf_event *event, int mode)
|
||||||
{
|
{
|
||||||
struct pt_buffer *buf;
|
|
||||||
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
struct pt *pt = this_cpu_ptr(&pt_ctx);
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
int ret = -EBUSY;
|
int ret = -EBUSY;
|
||||||
|
@ -1122,34 +1309,18 @@ static int pt_event_add(struct perf_event *event, int mode)
|
||||||
if (pt->handle.event)
|
if (pt->handle.event)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
buf = perf_aux_output_begin(&pt->handle, event);
|
|
||||||
ret = -EINVAL;
|
|
||||||
if (!buf)
|
|
||||||
goto fail_stop;
|
|
||||||
|
|
||||||
pt_buffer_reset_offsets(buf, pt->handle.head);
|
|
||||||
if (!buf->snapshot) {
|
|
||||||
ret = pt_buffer_reset_markers(buf, &pt->handle);
|
|
||||||
if (ret)
|
|
||||||
goto fail_end_stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mode & PERF_EF_START) {
|
if (mode & PERF_EF_START) {
|
||||||
pt_event_start(event, 0);
|
pt_event_start(event, 0);
|
||||||
ret = -EBUSY;
|
ret = -EINVAL;
|
||||||
if (hwc->state == PERF_HES_STOPPED)
|
if (hwc->state == PERF_HES_STOPPED)
|
||||||
goto fail_end_stop;
|
goto fail;
|
||||||
} else {
|
} else {
|
||||||
hwc->state = PERF_HES_STOPPED;
|
hwc->state = PERF_HES_STOPPED;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
ret = 0;
|
||||||
|
|
||||||
fail_end_stop:
|
|
||||||
perf_aux_output_end(&pt->handle, 0, true);
|
|
||||||
fail_stop:
|
|
||||||
hwc->state = PERF_HES_STOPPED;
|
|
||||||
fail:
|
fail:
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1159,6 +1330,7 @@ static void pt_event_read(struct perf_event *event)
|
||||||
|
|
||||||
static void pt_event_destroy(struct perf_event *event)
|
static void pt_event_destroy(struct perf_event *event)
|
||||||
{
|
{
|
||||||
|
pt_addr_filters_fini(event);
|
||||||
x86_del_exclusive(x86_lbr_exclusive_pt);
|
x86_del_exclusive(x86_lbr_exclusive_pt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1173,6 +1345,11 @@ static int pt_event_init(struct perf_event *event)
|
||||||
if (x86_add_exclusive(x86_lbr_exclusive_pt))
|
if (x86_add_exclusive(x86_lbr_exclusive_pt))
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
|
if (pt_addr_filters_init(event)) {
|
||||||
|
x86_del_exclusive(x86_lbr_exclusive_pt);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
event->destroy = pt_event_destroy;
|
event->destroy = pt_event_destroy;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1192,7 +1369,7 @@ static __init int pt_init(void)
|
||||||
|
|
||||||
BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
|
BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
|
||||||
|
|
||||||
if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
|
if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
get_online_cpus();
|
get_online_cpus();
|
||||||
|
@ -1226,16 +1403,21 @@ static __init int pt_init(void)
|
||||||
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
|
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
|
||||||
|
|
||||||
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
|
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
|
||||||
pt_pmu.pmu.attr_groups = pt_attr_groups;
|
pt_pmu.pmu.attr_groups = pt_attr_groups;
|
||||||
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
|
pt_pmu.pmu.task_ctx_nr = perf_sw_context;
|
||||||
pt_pmu.pmu.event_init = pt_event_init;
|
pt_pmu.pmu.event_init = pt_event_init;
|
||||||
pt_pmu.pmu.add = pt_event_add;
|
pt_pmu.pmu.add = pt_event_add;
|
||||||
pt_pmu.pmu.del = pt_event_del;
|
pt_pmu.pmu.del = pt_event_del;
|
||||||
pt_pmu.pmu.start = pt_event_start;
|
pt_pmu.pmu.start = pt_event_start;
|
||||||
pt_pmu.pmu.stop = pt_event_stop;
|
pt_pmu.pmu.stop = pt_event_stop;
|
||||||
pt_pmu.pmu.read = pt_event_read;
|
pt_pmu.pmu.read = pt_event_read;
|
||||||
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
|
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
|
||||||
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
|
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
|
||||||
|
pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
|
||||||
|
pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
|
||||||
|
pt_pmu.pmu.nr_addr_filters =
|
||||||
|
pt_cap_get(PT_CAP_num_address_ranges);
|
||||||
|
|
||||||
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
|
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -19,6 +19,40 @@
|
||||||
#ifndef __INTEL_PT_H__
|
#ifndef __INTEL_PT_H__
|
||||||
#define __INTEL_PT_H__
|
#define __INTEL_PT_H__
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PT MSR bit definitions
|
||||||
|
*/
|
||||||
|
#define RTIT_CTL_TRACEEN BIT(0)
|
||||||
|
#define RTIT_CTL_CYCLEACC BIT(1)
|
||||||
|
#define RTIT_CTL_OS BIT(2)
|
||||||
|
#define RTIT_CTL_USR BIT(3)
|
||||||
|
#define RTIT_CTL_CR3EN BIT(7)
|
||||||
|
#define RTIT_CTL_TOPA BIT(8)
|
||||||
|
#define RTIT_CTL_MTC_EN BIT(9)
|
||||||
|
#define RTIT_CTL_TSC_EN BIT(10)
|
||||||
|
#define RTIT_CTL_DISRETC BIT(11)
|
||||||
|
#define RTIT_CTL_BRANCH_EN BIT(13)
|
||||||
|
#define RTIT_CTL_MTC_RANGE_OFFSET 14
|
||||||
|
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
|
||||||
|
#define RTIT_CTL_CYC_THRESH_OFFSET 19
|
||||||
|
#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
|
||||||
|
#define RTIT_CTL_PSB_FREQ_OFFSET 24
|
||||||
|
#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
|
||||||
|
#define RTIT_CTL_ADDR0_OFFSET 32
|
||||||
|
#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET)
|
||||||
|
#define RTIT_CTL_ADDR1_OFFSET 36
|
||||||
|
#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET)
|
||||||
|
#define RTIT_CTL_ADDR2_OFFSET 40
|
||||||
|
#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET)
|
||||||
|
#define RTIT_CTL_ADDR3_OFFSET 44
|
||||||
|
#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET)
|
||||||
|
#define RTIT_STATUS_FILTEREN BIT(0)
|
||||||
|
#define RTIT_STATUS_CONTEXTEN BIT(1)
|
||||||
|
#define RTIT_STATUS_TRIGGEREN BIT(2)
|
||||||
|
#define RTIT_STATUS_BUFFOVF BIT(3)
|
||||||
|
#define RTIT_STATUS_ERROR BIT(4)
|
||||||
|
#define RTIT_STATUS_STOPPED BIT(5)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Single-entry ToPA: when this close to region boundary, switch
|
* Single-entry ToPA: when this close to region boundary, switch
|
||||||
* buffers to avoid losing data.
|
* buffers to avoid losing data.
|
||||||
|
@ -48,15 +82,20 @@ struct topa_entry {
|
||||||
#define PT_CPUID_LEAVES 2
|
#define PT_CPUID_LEAVES 2
|
||||||
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
|
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
|
||||||
|
|
||||||
|
/* TSC to Core Crystal Clock Ratio */
|
||||||
|
#define CPUID_TSC_LEAF 0x15
|
||||||
|
|
||||||
enum pt_capabilities {
|
enum pt_capabilities {
|
||||||
PT_CAP_max_subleaf = 0,
|
PT_CAP_max_subleaf = 0,
|
||||||
PT_CAP_cr3_filtering,
|
PT_CAP_cr3_filtering,
|
||||||
PT_CAP_psb_cyc,
|
PT_CAP_psb_cyc,
|
||||||
|
PT_CAP_ip_filtering,
|
||||||
PT_CAP_mtc,
|
PT_CAP_mtc,
|
||||||
PT_CAP_topa_output,
|
PT_CAP_topa_output,
|
||||||
PT_CAP_topa_multiple_entries,
|
PT_CAP_topa_multiple_entries,
|
||||||
PT_CAP_single_range_output,
|
PT_CAP_single_range_output,
|
||||||
PT_CAP_payloads_lip,
|
PT_CAP_payloads_lip,
|
||||||
|
PT_CAP_num_address_ranges,
|
||||||
PT_CAP_mtc_periods,
|
PT_CAP_mtc_periods,
|
||||||
PT_CAP_cycle_thresholds,
|
PT_CAP_cycle_thresholds,
|
||||||
PT_CAP_psb_periods,
|
PT_CAP_psb_periods,
|
||||||
|
@ -66,6 +105,9 @@ struct pt_pmu {
|
||||||
struct pmu pmu;
|
struct pmu pmu;
|
||||||
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
|
||||||
bool vmx;
|
bool vmx;
|
||||||
|
unsigned long max_nonturbo_ratio;
|
||||||
|
unsigned int tsc_art_num;
|
||||||
|
unsigned int tsc_art_den;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -104,14 +146,40 @@ struct pt_buffer {
|
||||||
struct topa_entry *topa_index[0];
|
struct topa_entry *topa_index[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define PT_FILTERS_NUM 4
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct pt_filter - IP range filter configuration
|
||||||
|
* @msr_a: range start, goes to RTIT_ADDRn_A
|
||||||
|
* @msr_b: range end, goes to RTIT_ADDRn_B
|
||||||
|
* @config: 4-bit field in RTIT_CTL
|
||||||
|
*/
|
||||||
|
struct pt_filter {
|
||||||
|
unsigned long msr_a;
|
||||||
|
unsigned long msr_b;
|
||||||
|
unsigned long config;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct pt_filters - IP range filtering context
|
||||||
|
* @filter: filters defined for this context
|
||||||
|
* @nr_filters: number of defined filters in the @filter array
|
||||||
|
*/
|
||||||
|
struct pt_filters {
|
||||||
|
struct pt_filter filter[PT_FILTERS_NUM];
|
||||||
|
unsigned int nr_filters;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct pt - per-cpu pt context
|
* struct pt - per-cpu pt context
|
||||||
* @handle: perf output handle
|
* @handle: perf output handle
|
||||||
|
* @filters: last configured filters
|
||||||
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
|
||||||
* @vmx_on: 1 if VMX is ON on this cpu
|
* @vmx_on: 1 if VMX is ON on this cpu
|
||||||
*/
|
*/
|
||||||
struct pt {
|
struct pt {
|
||||||
struct perf_output_handle handle;
|
struct perf_output_handle handle;
|
||||||
|
struct pt_filters filters;
|
||||||
int handle_nmi;
|
int handle_nmi;
|
||||||
int vmx_on;
|
int vmx_on;
|
||||||
};
|
};
|
||||||
|
|
|
@ -27,10 +27,14 @@
|
||||||
* event: rapl_energy_dram
|
* event: rapl_energy_dram
|
||||||
* perf code: 0x3
|
* perf code: 0x3
|
||||||
*
|
*
|
||||||
* dram counter: consumption of the builtin-gpu domain (client only)
|
* gpu counter: consumption of the builtin-gpu domain (client only)
|
||||||
* event: rapl_energy_gpu
|
* event: rapl_energy_gpu
|
||||||
* perf code: 0x4
|
* perf code: 0x4
|
||||||
*
|
*
|
||||||
|
* psys counter: consumption of the builtin-psys domain (client only)
|
||||||
|
* event: rapl_energy_psys
|
||||||
|
* perf code: 0x5
|
||||||
|
*
|
||||||
* We manage those counters as free running (read-only). They may be
|
* We manage those counters as free running (read-only). They may be
|
||||||
* use simultaneously by other tools, such as turbostat.
|
* use simultaneously by other tools, such as turbostat.
|
||||||
*
|
*
|
||||||
|
@ -53,6 +57,8 @@
|
||||||
#include <asm/cpu_device_id.h>
|
#include <asm/cpu_device_id.h>
|
||||||
#include "../perf_event.h"
|
#include "../perf_event.h"
|
||||||
|
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* RAPL energy status counters
|
* RAPL energy status counters
|
||||||
*/
|
*/
|
||||||
|
@ -64,13 +70,16 @@
|
||||||
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
|
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
|
||||||
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
|
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
|
||||||
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
|
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
|
||||||
|
#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */
|
||||||
|
#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */
|
||||||
|
|
||||||
#define NR_RAPL_DOMAINS 0x4
|
#define NR_RAPL_DOMAINS 0x5
|
||||||
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||||
"pp0-core",
|
"pp0-core",
|
||||||
"package",
|
"package",
|
||||||
"dram",
|
"dram",
|
||||||
"pp1-gpu",
|
"pp1-gpu",
|
||||||
|
"psys",
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Clients have PP0, PKG */
|
/* Clients have PP0, PKG */
|
||||||
|
@ -89,6 +98,13 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
|
||||||
1<<RAPL_IDX_RAM_NRG_STAT|\
|
1<<RAPL_IDX_RAM_NRG_STAT|\
|
||||||
1<<RAPL_IDX_PP1_NRG_STAT)
|
1<<RAPL_IDX_PP1_NRG_STAT)
|
||||||
|
|
||||||
|
/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
|
||||||
|
#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
|
||||||
|
1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||||
|
1<<RAPL_IDX_RAM_NRG_STAT|\
|
||||||
|
1<<RAPL_IDX_PP1_NRG_STAT|\
|
||||||
|
1<<RAPL_IDX_PSYS_NRG_STAT)
|
||||||
|
|
||||||
/* Knights Landing has PKG, RAM */
|
/* Knights Landing has PKG, RAM */
|
||||||
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
|
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
|
||||||
1<<RAPL_IDX_RAM_NRG_STAT)
|
1<<RAPL_IDX_RAM_NRG_STAT)
|
||||||
|
@ -360,6 +376,10 @@ static int rapl_pmu_event_init(struct perf_event *event)
|
||||||
bit = RAPL_IDX_PP1_NRG_STAT;
|
bit = RAPL_IDX_PP1_NRG_STAT;
|
||||||
msr = MSR_PP1_ENERGY_STATUS;
|
msr = MSR_PP1_ENERGY_STATUS;
|
||||||
break;
|
break;
|
||||||
|
case INTEL_RAPL_PSYS:
|
||||||
|
bit = RAPL_IDX_PSYS_NRG_STAT;
|
||||||
|
msr = MSR_PLATFORM_ENERGY_STATUS;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -414,11 +434,13 @@ RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
|
||||||
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
|
||||||
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
|
||||||
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
|
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
|
||||||
|
RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
|
||||||
|
|
||||||
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
|
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
|
||||||
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
|
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
|
||||||
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
|
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
|
||||||
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
|
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
|
||||||
|
RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we compute in 0.23 nJ increments regardless of MSR
|
* we compute in 0.23 nJ increments regardless of MSR
|
||||||
|
@ -427,6 +449,7 @@ RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890
|
||||||
RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
|
RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
|
||||||
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
|
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
|
||||||
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
|
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
|
||||||
|
RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
|
||||||
|
|
||||||
static struct attribute *rapl_events_srv_attr[] = {
|
static struct attribute *rapl_events_srv_attr[] = {
|
||||||
EVENT_PTR(rapl_cores),
|
EVENT_PTR(rapl_cores),
|
||||||
|
@ -476,6 +499,27 @@ static struct attribute *rapl_events_hsw_attr[] = {
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct attribute *rapl_events_skl_attr[] = {
|
||||||
|
EVENT_PTR(rapl_cores),
|
||||||
|
EVENT_PTR(rapl_pkg),
|
||||||
|
EVENT_PTR(rapl_gpu),
|
||||||
|
EVENT_PTR(rapl_ram),
|
||||||
|
EVENT_PTR(rapl_psys),
|
||||||
|
|
||||||
|
EVENT_PTR(rapl_cores_unit),
|
||||||
|
EVENT_PTR(rapl_pkg_unit),
|
||||||
|
EVENT_PTR(rapl_gpu_unit),
|
||||||
|
EVENT_PTR(rapl_ram_unit),
|
||||||
|
EVENT_PTR(rapl_psys_unit),
|
||||||
|
|
||||||
|
EVENT_PTR(rapl_cores_scale),
|
||||||
|
EVENT_PTR(rapl_pkg_scale),
|
||||||
|
EVENT_PTR(rapl_gpu_scale),
|
||||||
|
EVENT_PTR(rapl_ram_scale),
|
||||||
|
EVENT_PTR(rapl_psys_scale),
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
static struct attribute *rapl_events_knl_attr[] = {
|
static struct attribute *rapl_events_knl_attr[] = {
|
||||||
EVENT_PTR(rapl_pkg),
|
EVENT_PTR(rapl_pkg),
|
||||||
EVENT_PTR(rapl_ram),
|
EVENT_PTR(rapl_ram),
|
||||||
|
@ -592,6 +636,11 @@ static int rapl_cpu_notifier(struct notifier_block *self,
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct notifier_block rapl_cpu_nb = {
|
||||||
|
.notifier_call = rapl_cpu_notifier,
|
||||||
|
.priority = CPU_PRI_PERF + 1,
|
||||||
|
};
|
||||||
|
|
||||||
static int rapl_check_hw_unit(bool apply_quirk)
|
static int rapl_check_hw_unit(bool apply_quirk)
|
||||||
{
|
{
|
||||||
u64 msr_rapl_power_unit_bits;
|
u64 msr_rapl_power_unit_bits;
|
||||||
|
@ -660,7 +709,7 @@ static int __init rapl_prepare_cpus(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init cleanup_rapl_pmus(void)
|
static void cleanup_rapl_pmus(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -691,52 +740,92 @@ static int __init init_rapl_pmus(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
|
#define X86_RAPL_MODEL_MATCH(model, init) \
|
||||||
[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
|
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
|
||||||
[1] = {},
|
|
||||||
|
struct intel_rapl_init_fun {
|
||||||
|
bool apply_quirk;
|
||||||
|
int cntr_mask;
|
||||||
|
struct attribute **attrs;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
|
||||||
|
.apply_quirk = false,
|
||||||
|
.cntr_mask = RAPL_IDX_CLN,
|
||||||
|
.attrs = rapl_events_cln_attr,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
|
||||||
|
.apply_quirk = true,
|
||||||
|
.cntr_mask = RAPL_IDX_SRV,
|
||||||
|
.attrs = rapl_events_srv_attr,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
|
||||||
|
.apply_quirk = false,
|
||||||
|
.cntr_mask = RAPL_IDX_HSW,
|
||||||
|
.attrs = rapl_events_hsw_attr,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
|
||||||
|
.apply_quirk = false,
|
||||||
|
.cntr_mask = RAPL_IDX_SRV,
|
||||||
|
.attrs = rapl_events_srv_attr,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
|
||||||
|
.apply_quirk = true,
|
||||||
|
.cntr_mask = RAPL_IDX_KNL,
|
||||||
|
.attrs = rapl_events_knl_attr,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
|
||||||
|
.apply_quirk = false,
|
||||||
|
.cntr_mask = RAPL_IDX_SKL_CLN,
|
||||||
|
.attrs = rapl_events_skl_attr,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
|
||||||
|
X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
|
||||||
|
X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
|
||||||
|
|
||||||
|
X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
|
||||||
|
X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
|
||||||
|
|
||||||
|
X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
|
||||||
|
X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
|
||||||
|
X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
|
||||||
|
X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
|
||||||
|
|
||||||
|
X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
|
||||||
|
X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
|
||||||
|
X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
|
||||||
|
X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
|
||||||
|
|
||||||
|
X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
|
||||||
|
|
||||||
|
X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
|
||||||
|
X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
|
||||||
|
{},
|
||||||
|
};
|
||||||
|
|
||||||
|
MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
|
||||||
|
|
||||||
static int __init rapl_pmu_init(void)
|
static int __init rapl_pmu_init(void)
|
||||||
{
|
{
|
||||||
bool apply_quirk = false;
|
const struct x86_cpu_id *id;
|
||||||
|
struct intel_rapl_init_fun *rapl_init;
|
||||||
|
bool apply_quirk;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!x86_match_cpu(rapl_cpu_match))
|
id = x86_match_cpu(rapl_cpu_match);
|
||||||
|
if (!id)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
switch (boot_cpu_data.x86_model) {
|
rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
|
||||||
case 42: /* Sandy Bridge */
|
apply_quirk = rapl_init->apply_quirk;
|
||||||
case 58: /* Ivy Bridge */
|
rapl_cntr_mask = rapl_init->cntr_mask;
|
||||||
rapl_cntr_mask = RAPL_IDX_CLN;
|
rapl_pmu_events_group.attrs = rapl_init->attrs;
|
||||||
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
|
|
||||||
break;
|
|
||||||
case 63: /* Haswell-Server */
|
|
||||||
case 79: /* Broadwell-Server */
|
|
||||||
apply_quirk = true;
|
|
||||||
rapl_cntr_mask = RAPL_IDX_SRV;
|
|
||||||
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
|
|
||||||
break;
|
|
||||||
case 60: /* Haswell */
|
|
||||||
case 69: /* Haswell-Celeron */
|
|
||||||
case 70: /* Haswell GT3e */
|
|
||||||
case 61: /* Broadwell */
|
|
||||||
case 71: /* Broadwell-H */
|
|
||||||
rapl_cntr_mask = RAPL_IDX_HSW;
|
|
||||||
rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
|
|
||||||
break;
|
|
||||||
case 45: /* Sandy Bridge-EP */
|
|
||||||
case 62: /* IvyTown */
|
|
||||||
rapl_cntr_mask = RAPL_IDX_SRV;
|
|
||||||
rapl_pmu_events_group.attrs = rapl_events_srv_attr;
|
|
||||||
break;
|
|
||||||
case 87: /* Knights Landing */
|
|
||||||
apply_quirk = true;
|
|
||||||
rapl_cntr_mask = RAPL_IDX_KNL;
|
|
||||||
rapl_pmu_events_group.attrs = rapl_events_knl_attr;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return -ENODEV;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = rapl_check_hw_unit(apply_quirk);
|
ret = rapl_check_hw_unit(apply_quirk);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -756,7 +845,7 @@ static int __init rapl_pmu_init(void)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
__perf_cpu_notifier(rapl_cpu_notifier);
|
__register_cpu_notifier(&rapl_cpu_nb);
|
||||||
cpu_notifier_register_done();
|
cpu_notifier_register_done();
|
||||||
rapl_advertise();
|
rapl_advertise();
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -767,4 +856,14 @@ out:
|
||||||
cpu_notifier_register_done();
|
cpu_notifier_register_done();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
device_initcall(rapl_pmu_init);
|
module_init(rapl_pmu_init);
|
||||||
|
|
||||||
|
static void __exit intel_rapl_exit(void)
|
||||||
|
{
|
||||||
|
cpu_notifier_register_begin();
|
||||||
|
__unregister_cpu_notifier(&rapl_cpu_nb);
|
||||||
|
perf_pmu_unregister(&rapl_pmus->pmu);
|
||||||
|
cleanup_rapl_pmus();
|
||||||
|
cpu_notifier_register_done();
|
||||||
|
}
|
||||||
|
module_exit(intel_rapl_exit);
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include <asm/cpu_device_id.h>
|
||||||
#include "uncore.h"
|
#include "uncore.h"
|
||||||
|
|
||||||
static struct intel_uncore_type *empty_uncore[] = { NULL, };
|
static struct intel_uncore_type *empty_uncore[] = { NULL, };
|
||||||
|
@ -21,6 +22,8 @@ static struct event_constraint uncore_constraint_fixed =
|
||||||
struct event_constraint uncore_constraint_empty =
|
struct event_constraint uncore_constraint_empty =
|
||||||
EVENT_CONSTRAINT(0, 0, 0);
|
EVENT_CONSTRAINT(0, 0, 0);
|
||||||
|
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
|
||||||
static int uncore_pcibus_to_physid(struct pci_bus *bus)
|
static int uncore_pcibus_to_physid(struct pci_bus *bus)
|
||||||
{
|
{
|
||||||
struct pci2phy_map *map;
|
struct pci2phy_map *map;
|
||||||
|
@ -754,7 +757,7 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
|
||||||
pmu->registered = false;
|
pmu->registered = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
|
static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
|
||||||
{
|
{
|
||||||
struct intel_uncore_pmu *pmu = type->pmus;
|
struct intel_uncore_pmu *pmu = type->pmus;
|
||||||
struct intel_uncore_box *box;
|
struct intel_uncore_box *box;
|
||||||
|
@ -770,7 +773,7 @@ static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init uncore_exit_boxes(void *dummy)
|
static void uncore_exit_boxes(void *dummy)
|
||||||
{
|
{
|
||||||
struct intel_uncore_type **types;
|
struct intel_uncore_type **types;
|
||||||
|
|
||||||
|
@ -787,7 +790,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
|
||||||
kfree(pmu->boxes);
|
kfree(pmu->boxes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init uncore_type_exit(struct intel_uncore_type *type)
|
static void uncore_type_exit(struct intel_uncore_type *type)
|
||||||
{
|
{
|
||||||
struct intel_uncore_pmu *pmu = type->pmus;
|
struct intel_uncore_pmu *pmu = type->pmus;
|
||||||
int i;
|
int i;
|
||||||
|
@ -804,7 +807,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
|
||||||
type->events_group = NULL;
|
type->events_group = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init uncore_types_exit(struct intel_uncore_type **types)
|
static void uncore_types_exit(struct intel_uncore_type **types)
|
||||||
{
|
{
|
||||||
for (; *types; types++)
|
for (; *types; types++)
|
||||||
uncore_type_exit(*types);
|
uncore_type_exit(*types);
|
||||||
|
@ -989,46 +992,6 @@ static int __init uncore_pci_init(void)
|
||||||
size_t size;
|
size_t size;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
switch (boot_cpu_data.x86_model) {
|
|
||||||
case 45: /* Sandy Bridge-EP */
|
|
||||||
ret = snbep_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 62: /* Ivy Bridge-EP */
|
|
||||||
ret = ivbep_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 63: /* Haswell-EP */
|
|
||||||
ret = hswep_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 79: /* BDX-EP */
|
|
||||||
case 86: /* BDX-DE */
|
|
||||||
ret = bdx_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 42: /* Sandy Bridge */
|
|
||||||
ret = snb_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 58: /* Ivy Bridge */
|
|
||||||
ret = ivb_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 60: /* Haswell */
|
|
||||||
case 69: /* Haswell Celeron */
|
|
||||||
ret = hsw_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 61: /* Broadwell */
|
|
||||||
ret = bdw_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 87: /* Knights Landing */
|
|
||||||
ret = knl_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
case 94: /* SkyLake */
|
|
||||||
ret = skl_uncore_pci_init();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return -ENODEV;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
size = max_packages * sizeof(struct pci_extra_dev);
|
size = max_packages * sizeof(struct pci_extra_dev);
|
||||||
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
|
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
|
||||||
if (!uncore_extra_pci_dev) {
|
if (!uncore_extra_pci_dev) {
|
||||||
|
@ -1060,7 +1023,7 @@ err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init uncore_pci_exit(void)
|
static void uncore_pci_exit(void)
|
||||||
{
|
{
|
||||||
if (pcidrv_registered) {
|
if (pcidrv_registered) {
|
||||||
pcidrv_registered = false;
|
pcidrv_registered = false;
|
||||||
|
@ -1287,46 +1250,6 @@ static int __init uncore_cpu_init(void)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
switch (boot_cpu_data.x86_model) {
|
|
||||||
case 26: /* Nehalem */
|
|
||||||
case 30:
|
|
||||||
case 37: /* Westmere */
|
|
||||||
case 44:
|
|
||||||
nhm_uncore_cpu_init();
|
|
||||||
break;
|
|
||||||
case 42: /* Sandy Bridge */
|
|
||||||
case 58: /* Ivy Bridge */
|
|
||||||
case 60: /* Haswell */
|
|
||||||
case 69: /* Haswell */
|
|
||||||
case 70: /* Haswell */
|
|
||||||
case 61: /* Broadwell */
|
|
||||||
case 71: /* Broadwell */
|
|
||||||
snb_uncore_cpu_init();
|
|
||||||
break;
|
|
||||||
case 45: /* Sandy Bridge-EP */
|
|
||||||
snbep_uncore_cpu_init();
|
|
||||||
break;
|
|
||||||
case 46: /* Nehalem-EX */
|
|
||||||
case 47: /* Westmere-EX aka. Xeon E7 */
|
|
||||||
nhmex_uncore_cpu_init();
|
|
||||||
break;
|
|
||||||
case 62: /* Ivy Bridge-EP */
|
|
||||||
ivbep_uncore_cpu_init();
|
|
||||||
break;
|
|
||||||
case 63: /* Haswell-EP */
|
|
||||||
hswep_uncore_cpu_init();
|
|
||||||
break;
|
|
||||||
case 79: /* BDX-EP */
|
|
||||||
case 86: /* BDX-DE */
|
|
||||||
bdx_uncore_cpu_init();
|
|
||||||
break;
|
|
||||||
case 87: /* Knights Landing */
|
|
||||||
knl_uncore_cpu_init();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return -ENODEV;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = uncore_types_init(uncore_msr_uncores, true);
|
ret = uncore_types_init(uncore_msr_uncores, true);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -1376,11 +1299,105 @@ static int __init uncore_cpumask_init(bool msr)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define X86_UNCORE_MODEL_MATCH(model, init) \
|
||||||
|
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
|
||||||
|
|
||||||
|
struct intel_uncore_init_fun {
|
||||||
|
void (*cpu_init)(void);
|
||||||
|
int (*pci_init)(void);
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
|
||||||
|
.cpu_init = nhm_uncore_cpu_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
|
||||||
|
.cpu_init = snb_uncore_cpu_init,
|
||||||
|
.pci_init = snb_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
|
||||||
|
.cpu_init = snb_uncore_cpu_init,
|
||||||
|
.pci_init = ivb_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
|
||||||
|
.cpu_init = snb_uncore_cpu_init,
|
||||||
|
.pci_init = hsw_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
|
||||||
|
.cpu_init = snb_uncore_cpu_init,
|
||||||
|
.pci_init = bdw_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
|
||||||
|
.cpu_init = snbep_uncore_cpu_init,
|
||||||
|
.pci_init = snbep_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
|
||||||
|
.cpu_init = nhmex_uncore_cpu_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
|
||||||
|
.cpu_init = ivbep_uncore_cpu_init,
|
||||||
|
.pci_init = ivbep_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
|
||||||
|
.cpu_init = hswep_uncore_cpu_init,
|
||||||
|
.pci_init = hswep_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
|
||||||
|
.cpu_init = bdx_uncore_cpu_init,
|
||||||
|
.pci_init = bdx_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
|
||||||
|
.cpu_init = knl_uncore_cpu_init,
|
||||||
|
.pci_init = knl_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
|
||||||
|
.pci_init = skl_uncore_pci_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
|
||||||
|
X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
|
||||||
|
X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
|
||||||
|
X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
|
||||||
|
X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
|
||||||
|
X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
|
||||||
|
X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
|
||||||
|
X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
|
||||||
|
X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
|
||||||
|
X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
|
||||||
|
X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
|
||||||
|
X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
|
||||||
|
X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
|
||||||
|
X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
|
||||||
|
X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
|
||||||
|
X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
|
||||||
|
X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
|
||||||
|
X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
|
||||||
|
X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
|
||||||
|
X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
|
||||||
|
X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
|
||||||
|
{},
|
||||||
|
};
|
||||||
|
|
||||||
|
MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
|
||||||
|
|
||||||
static int __init intel_uncore_init(void)
|
static int __init intel_uncore_init(void)
|
||||||
{
|
{
|
||||||
int pret, cret, ret;
|
const struct x86_cpu_id *id;
|
||||||
|
struct intel_uncore_init_fun *uncore_init;
|
||||||
|
int pret = 0, cret = 0, ret;
|
||||||
|
|
||||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
id = x86_match_cpu(intel_uncore_match);
|
||||||
|
if (!id)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
if (cpu_has_hypervisor)
|
if (cpu_has_hypervisor)
|
||||||
|
@ -1388,8 +1405,17 @@ static int __init intel_uncore_init(void)
|
||||||
|
|
||||||
max_packages = topology_max_packages();
|
max_packages = topology_max_packages();
|
||||||
|
|
||||||
pret = uncore_pci_init();
|
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
|
||||||
cret = uncore_cpu_init();
|
if (uncore_init->pci_init) {
|
||||||
|
pret = uncore_init->pci_init();
|
||||||
|
if (!pret)
|
||||||
|
pret = uncore_pci_init();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uncore_init->cpu_init) {
|
||||||
|
uncore_init->cpu_init();
|
||||||
|
cret = uncore_cpu_init();
|
||||||
|
}
|
||||||
|
|
||||||
if (cret && pret)
|
if (cret && pret)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
@ -1409,4 +1435,14 @@ err:
|
||||||
cpu_notifier_register_done();
|
cpu_notifier_register_done();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
device_initcall(intel_uncore_init);
|
module_init(intel_uncore_init);
|
||||||
|
|
||||||
|
static void __exit intel_uncore_exit(void)
|
||||||
|
{
|
||||||
|
cpu_notifier_register_begin();
|
||||||
|
__unregister_cpu_notifier(&uncore_cpu_nb);
|
||||||
|
uncore_types_exit(uncore_msr_uncores);
|
||||||
|
uncore_pci_exit();
|
||||||
|
cpu_notifier_register_done();
|
||||||
|
}
|
||||||
|
module_exit(intel_uncore_exit);
|
||||||
|
|
|
@ -6,6 +6,8 @@ enum perf_msr_id {
|
||||||
PERF_MSR_MPERF = 2,
|
PERF_MSR_MPERF = 2,
|
||||||
PERF_MSR_PPERF = 3,
|
PERF_MSR_PPERF = 3,
|
||||||
PERF_MSR_SMI = 4,
|
PERF_MSR_SMI = 4,
|
||||||
|
PERF_MSR_PTSC = 5,
|
||||||
|
PERF_MSR_IRPERF = 6,
|
||||||
|
|
||||||
PERF_MSR_EVENT_MAX,
|
PERF_MSR_EVENT_MAX,
|
||||||
};
|
};
|
||||||
|
@ -15,6 +17,16 @@ static bool test_aperfmperf(int idx)
|
||||||
return boot_cpu_has(X86_FEATURE_APERFMPERF);
|
return boot_cpu_has(X86_FEATURE_APERFMPERF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool test_ptsc(int idx)
|
||||||
|
{
|
||||||
|
return boot_cpu_has(X86_FEATURE_PTSC);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool test_irperf(int idx)
|
||||||
|
{
|
||||||
|
return boot_cpu_has(X86_FEATURE_IRPERF);
|
||||||
|
}
|
||||||
|
|
||||||
static bool test_intel(int idx)
|
static bool test_intel(int idx)
|
||||||
{
|
{
|
||||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
|
||||||
|
@ -69,18 +81,22 @@ struct perf_msr {
|
||||||
bool (*test)(int idx);
|
bool (*test)(int idx);
|
||||||
};
|
};
|
||||||
|
|
||||||
PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
|
PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
|
||||||
PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
|
PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
|
||||||
PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
|
PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
|
||||||
PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
|
PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
|
||||||
PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
|
PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
|
||||||
|
PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05");
|
||||||
|
PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
|
||||||
|
|
||||||
static struct perf_msr msr[] = {
|
static struct perf_msr msr[] = {
|
||||||
[PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
|
[PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
|
||||||
[PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
|
[PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
|
||||||
[PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
|
[PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
|
||||||
[PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
|
[PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
|
||||||
[PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
|
[PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
|
||||||
|
[PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, },
|
||||||
|
[PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, },
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
|
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
|
||||||
|
|
|
@ -601,6 +601,7 @@ struct x86_pmu {
|
||||||
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
|
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
|
||||||
const int *lbr_sel_map; /* lbr_select mappings */
|
const int *lbr_sel_map; /* lbr_select mappings */
|
||||||
bool lbr_double_abort; /* duplicated lbr aborts */
|
bool lbr_double_abort; /* duplicated lbr aborts */
|
||||||
|
bool lbr_pt_coexist; /* LBR may coexist with PT */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intel PT/LBR/BTS are exclusive
|
* Intel PT/LBR/BTS are exclusive
|
||||||
|
@ -859,6 +860,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[];
|
||||||
|
|
||||||
extern struct event_constraint intel_slm_pebs_event_constraints[];
|
extern struct event_constraint intel_slm_pebs_event_constraints[];
|
||||||
|
|
||||||
|
extern struct event_constraint intel_glm_pebs_event_constraints[];
|
||||||
|
|
||||||
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
|
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
|
||||||
|
|
||||||
extern struct event_constraint intel_westmere_pebs_event_constraints[];
|
extern struct event_constraint intel_westmere_pebs_event_constraints[];
|
||||||
|
@ -907,6 +910,8 @@ void intel_pmu_lbr_init_nhm(void);
|
||||||
|
|
||||||
void intel_pmu_lbr_init_atom(void);
|
void intel_pmu_lbr_init_atom(void);
|
||||||
|
|
||||||
|
void intel_pmu_lbr_init_slm(void);
|
||||||
|
|
||||||
void intel_pmu_lbr_init_snb(void);
|
void intel_pmu_lbr_init_snb(void);
|
||||||
|
|
||||||
void intel_pmu_lbr_init_hsw(void);
|
void intel_pmu_lbr_init_hsw(void);
|
||||||
|
|
|
@ -177,6 +177,7 @@
|
||||||
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
|
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
|
||||||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||||
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
||||||
|
#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
|
||||||
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
|
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
|
||||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
||||||
|
|
||||||
|
@ -250,6 +251,7 @@
|
||||||
|
|
||||||
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
|
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
|
||||||
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
|
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
|
||||||
|
#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
|
||||||
|
|
||||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
|
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
|
||||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||||
|
|
|
@ -89,27 +89,16 @@
|
||||||
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
|
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
|
||||||
|
|
||||||
#define MSR_IA32_RTIT_CTL 0x00000570
|
#define MSR_IA32_RTIT_CTL 0x00000570
|
||||||
#define RTIT_CTL_TRACEEN BIT(0)
|
|
||||||
#define RTIT_CTL_CYCLEACC BIT(1)
|
|
||||||
#define RTIT_CTL_OS BIT(2)
|
|
||||||
#define RTIT_CTL_USR BIT(3)
|
|
||||||
#define RTIT_CTL_CR3EN BIT(7)
|
|
||||||
#define RTIT_CTL_TOPA BIT(8)
|
|
||||||
#define RTIT_CTL_MTC_EN BIT(9)
|
|
||||||
#define RTIT_CTL_TSC_EN BIT(10)
|
|
||||||
#define RTIT_CTL_DISRETC BIT(11)
|
|
||||||
#define RTIT_CTL_BRANCH_EN BIT(13)
|
|
||||||
#define RTIT_CTL_MTC_RANGE_OFFSET 14
|
|
||||||
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
|
|
||||||
#define RTIT_CTL_CYC_THRESH_OFFSET 19
|
|
||||||
#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
|
|
||||||
#define RTIT_CTL_PSB_FREQ_OFFSET 24
|
|
||||||
#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
|
|
||||||
#define MSR_IA32_RTIT_STATUS 0x00000571
|
#define MSR_IA32_RTIT_STATUS 0x00000571
|
||||||
#define RTIT_STATUS_CONTEXTEN BIT(1)
|
#define MSR_IA32_RTIT_STATUS 0x00000571
|
||||||
#define RTIT_STATUS_TRIGGEREN BIT(2)
|
#define MSR_IA32_RTIT_ADDR0_A 0x00000580
|
||||||
#define RTIT_STATUS_ERROR BIT(4)
|
#define MSR_IA32_RTIT_ADDR0_B 0x00000581
|
||||||
#define RTIT_STATUS_STOPPED BIT(5)
|
#define MSR_IA32_RTIT_ADDR1_A 0x00000582
|
||||||
|
#define MSR_IA32_RTIT_ADDR1_B 0x00000583
|
||||||
|
#define MSR_IA32_RTIT_ADDR2_A 0x00000584
|
||||||
|
#define MSR_IA32_RTIT_ADDR2_B 0x00000585
|
||||||
|
#define MSR_IA32_RTIT_ADDR3_A 0x00000586
|
||||||
|
#define MSR_IA32_RTIT_ADDR3_B 0x00000587
|
||||||
#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
|
#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
|
||||||
#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
|
#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
|
||||||
#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
|
#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
|
||||||
|
@ -205,6 +194,8 @@
|
||||||
#define MSR_CONFIG_TDP_CONTROL 0x0000064B
|
#define MSR_CONFIG_TDP_CONTROL 0x0000064B
|
||||||
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
|
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
|
||||||
|
|
||||||
|
#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
|
||||||
|
|
||||||
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
|
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
|
||||||
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
|
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
|
||||||
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
|
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
|
||||||
|
@ -315,6 +306,9 @@
|
||||||
#define MSR_AMD64_IBSOPDATA4 0xc001103d
|
#define MSR_AMD64_IBSOPDATA4 0xc001103d
|
||||||
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
|
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
|
||||||
|
|
||||||
|
/* Fam 17h MSRs */
|
||||||
|
#define MSR_F17H_IRPERF 0xc00000e9
|
||||||
|
|
||||||
/* Fam 16h MSRs */
|
/* Fam 16h MSRs */
|
||||||
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
|
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
|
||||||
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
|
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
|
||||||
|
@ -328,6 +322,7 @@
|
||||||
#define MSR_F15H_PERF_CTR 0xc0010201
|
#define MSR_F15H_PERF_CTR 0xc0010201
|
||||||
#define MSR_F15H_NB_PERF_CTL 0xc0010240
|
#define MSR_F15H_NB_PERF_CTL 0xc0010240
|
||||||
#define MSR_F15H_NB_PERF_CTR 0xc0010241
|
#define MSR_F15H_NB_PERF_CTR 0xc0010241
|
||||||
|
#define MSR_F15H_PTSC 0xc0010280
|
||||||
#define MSR_F15H_IC_CFG 0xc0011021
|
#define MSR_F15H_IC_CFG 0xc0011021
|
||||||
|
|
||||||
/* Fam 10h MSRs */
|
/* Fam 10h MSRs */
|
||||||
|
|
|
@ -578,7 +578,7 @@ static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||||
riprel_post_xol(auprobe, regs);
|
riprel_post_xol(auprobe, regs);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct uprobe_xol_ops default_xol_ops = {
|
static const struct uprobe_xol_ops default_xol_ops = {
|
||||||
.pre_xol = default_pre_xol_op,
|
.pre_xol = default_pre_xol_op,
|
||||||
.post_xol = default_post_xol_op,
|
.post_xol = default_post_xol_op,
|
||||||
.abort = default_abort_op,
|
.abort = default_abort_op,
|
||||||
|
@ -695,7 +695,7 @@ static void branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
|
||||||
0, insn->immediate.nbytes);
|
0, insn->immediate.nbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct uprobe_xol_ops branch_xol_ops = {
|
static const struct uprobe_xol_ops branch_xol_ops = {
|
||||||
.emulate = branch_emulate_op,
|
.emulate = branch_emulate_op,
|
||||||
.post_xol = branch_post_xol_op,
|
.post_xol = branch_post_xol_op,
|
||||||
};
|
};
|
||||||
|
|
|
@ -332,14 +332,14 @@ static int callchain_trace(struct stackframe *frame, void *data)
|
||||||
void perf_callchain_kernel(struct perf_callchain_entry *entry,
|
void perf_callchain_kernel(struct perf_callchain_entry *entry,
|
||||||
struct pt_regs *regs)
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
|
xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack,
|
||||||
callchain_trace, NULL, entry);
|
callchain_trace, NULL, entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
void perf_callchain_user(struct perf_callchain_entry *entry,
|
void perf_callchain_user(struct perf_callchain_entry *entry,
|
||||||
struct pt_regs *regs)
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
|
xtensa_backtrace_user(regs, sysctl_perf_event_max_stack,
|
||||||
callchain_trace, entry);
|
callchain_trace, entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -847,6 +847,14 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
|
||||||
if (!platform_get_irq(cpu_pmu->plat_device, 0))
|
if (!platform_get_irq(cpu_pmu->plat_device, 0))
|
||||||
cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
|
cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a CPU PMU potentially in a heterogeneous configuration (e.g.
|
||||||
|
* big.LITTLE). This is not an uncore PMU, and we have taken ctx
|
||||||
|
* sharing into account (e.g. with our pmu::filter_match callback and
|
||||||
|
* pmu::event_init group validation).
|
||||||
|
*/
|
||||||
|
cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out_unregister:
|
out_unregister:
|
||||||
|
|
|
@ -34,6 +34,9 @@
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/cpu_device_id.h>
|
#include <asm/cpu_device_id.h>
|
||||||
|
|
||||||
|
/* Local defines */
|
||||||
|
#define MSR_PLATFORM_POWER_LIMIT 0x0000065C
|
||||||
|
|
||||||
/* bitmasks for RAPL MSRs, used by primitive access functions */
|
/* bitmasks for RAPL MSRs, used by primitive access functions */
|
||||||
#define ENERGY_STATUS_MASK 0xffffffff
|
#define ENERGY_STATUS_MASK 0xffffffff
|
||||||
|
|
||||||
|
@ -86,6 +89,7 @@ enum rapl_domain_type {
|
||||||
RAPL_DOMAIN_PP0, /* core power plane */
|
RAPL_DOMAIN_PP0, /* core power plane */
|
||||||
RAPL_DOMAIN_PP1, /* graphics uncore */
|
RAPL_DOMAIN_PP1, /* graphics uncore */
|
||||||
RAPL_DOMAIN_DRAM,/* DRAM control_type */
|
RAPL_DOMAIN_DRAM,/* DRAM control_type */
|
||||||
|
RAPL_DOMAIN_PLATFORM, /* PSys control_type */
|
||||||
RAPL_DOMAIN_MAX,
|
RAPL_DOMAIN_MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -251,9 +255,11 @@ static const char * const rapl_domain_names[] = {
|
||||||
"core",
|
"core",
|
||||||
"uncore",
|
"uncore",
|
||||||
"dram",
|
"dram",
|
||||||
|
"psys",
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct powercap_control_type *control_type; /* PowerCap Controller */
|
static struct powercap_control_type *control_type; /* PowerCap Controller */
|
||||||
|
static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
|
||||||
|
|
||||||
/* caller to ensure CPU hotplug lock is held */
|
/* caller to ensure CPU hotplug lock is held */
|
||||||
static struct rapl_package *find_package_by_id(int id)
|
static struct rapl_package *find_package_by_id(int id)
|
||||||
|
@ -409,6 +415,14 @@ static const struct powercap_zone_ops zone_ops[] = {
|
||||||
.set_enable = set_domain_enable,
|
.set_enable = set_domain_enable,
|
||||||
.get_enable = get_domain_enable,
|
.get_enable = get_domain_enable,
|
||||||
},
|
},
|
||||||
|
/* RAPL_DOMAIN_PLATFORM */
|
||||||
|
{
|
||||||
|
.get_energy_uj = get_energy_counter,
|
||||||
|
.get_max_energy_range_uj = get_max_energy_counter,
|
||||||
|
.release = release_zone,
|
||||||
|
.set_enable = set_domain_enable,
|
||||||
|
.get_enable = get_domain_enable,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
static int set_power_limit(struct powercap_zone *power_zone, int id,
|
static int set_power_limit(struct powercap_zone *power_zone, int id,
|
||||||
|
@ -1160,6 +1174,13 @@ static int rapl_unregister_powercap(void)
|
||||||
powercap_unregister_zone(control_type,
|
powercap_unregister_zone(control_type,
|
||||||
&rd_package->power_zone);
|
&rd_package->power_zone);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (platform_rapl_domain) {
|
||||||
|
powercap_unregister_zone(control_type,
|
||||||
|
&platform_rapl_domain->power_zone);
|
||||||
|
kfree(platform_rapl_domain);
|
||||||
|
}
|
||||||
|
|
||||||
powercap_unregister_control_type(control_type);
|
powercap_unregister_control_type(control_type);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1239,6 +1260,47 @@ err_cleanup:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int rapl_register_psys(void)
|
||||||
|
{
|
||||||
|
struct rapl_domain *rd;
|
||||||
|
struct powercap_zone *power_zone;
|
||||||
|
u64 val;
|
||||||
|
|
||||||
|
if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_ENERGY_STATUS, &val) || !val)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_POWER_LIMIT, &val) || !val)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
rd = kzalloc(sizeof(*rd), GFP_KERNEL);
|
||||||
|
if (!rd)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM];
|
||||||
|
rd->id = RAPL_DOMAIN_PLATFORM;
|
||||||
|
rd->msrs[0] = MSR_PLATFORM_POWER_LIMIT;
|
||||||
|
rd->msrs[1] = MSR_PLATFORM_ENERGY_STATUS;
|
||||||
|
rd->rpl[0].prim_id = PL1_ENABLE;
|
||||||
|
rd->rpl[0].name = pl1_name;
|
||||||
|
rd->rpl[1].prim_id = PL2_ENABLE;
|
||||||
|
rd->rpl[1].name = pl2_name;
|
||||||
|
rd->rp = find_package_by_id(0);
|
||||||
|
|
||||||
|
power_zone = powercap_register_zone(&rd->power_zone, control_type,
|
||||||
|
"psys", NULL,
|
||||||
|
&zone_ops[RAPL_DOMAIN_PLATFORM],
|
||||||
|
2, &constraint_ops);
|
||||||
|
|
||||||
|
if (IS_ERR(power_zone)) {
|
||||||
|
kfree(rd);
|
||||||
|
return PTR_ERR(power_zone);
|
||||||
|
}
|
||||||
|
|
||||||
|
platform_rapl_domain = rd;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int rapl_register_powercap(void)
|
static int rapl_register_powercap(void)
|
||||||
{
|
{
|
||||||
struct rapl_domain *rd;
|
struct rapl_domain *rd;
|
||||||
|
@ -1255,6 +1317,10 @@ static int rapl_register_powercap(void)
|
||||||
list_for_each_entry(rp, &rapl_packages, plist)
|
list_for_each_entry(rp, &rapl_packages, plist)
|
||||||
if (rapl_package_register_powercap(rp))
|
if (rapl_package_register_powercap(rp))
|
||||||
goto err_cleanup_package;
|
goto err_cleanup_package;
|
||||||
|
|
||||||
|
/* Don't bail out if PSys is not supported */
|
||||||
|
rapl_register_psys();
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
err_cleanup_package:
|
err_cleanup_package:
|
||||||
|
@ -1289,6 +1355,9 @@ static int rapl_check_domain(int cpu, int domain)
|
||||||
case RAPL_DOMAIN_DRAM:
|
case RAPL_DOMAIN_DRAM:
|
||||||
msr = MSR_DRAM_ENERGY_STATUS;
|
msr = MSR_DRAM_ENERGY_STATUS;
|
||||||
break;
|
break;
|
||||||
|
case RAPL_DOMAIN_PLATFORM:
|
||||||
|
/* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */
|
||||||
|
return -EINVAL;
|
||||||
default:
|
default:
|
||||||
pr_err("invalid domain id %d\n", domain);
|
pr_err("invalid domain id %d\n", domain);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
|
@ -58,7 +58,7 @@ struct perf_guest_info_callbacks {
|
||||||
|
|
||||||
struct perf_callchain_entry {
|
struct perf_callchain_entry {
|
||||||
__u64 nr;
|
__u64 nr;
|
||||||
__u64 ip[PERF_MAX_STACK_DEPTH];
|
__u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct perf_raw_record {
|
struct perf_raw_record {
|
||||||
|
@ -151,6 +151,15 @@ struct hw_perf_event {
|
||||||
*/
|
*/
|
||||||
struct task_struct *target;
|
struct task_struct *target;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PMU would store hardware filter configuration
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
void *addr_filters;
|
||||||
|
|
||||||
|
/* Last sync'ed generation of filters */
|
||||||
|
unsigned long addr_filters_gen;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* hw_perf_event::state flags; used to track the PERF_EF_* state.
|
* hw_perf_event::state flags; used to track the PERF_EF_* state.
|
||||||
*/
|
*/
|
||||||
|
@ -216,6 +225,7 @@ struct perf_event;
|
||||||
#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08
|
#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08
|
||||||
#define PERF_PMU_CAP_EXCLUSIVE 0x10
|
#define PERF_PMU_CAP_EXCLUSIVE 0x10
|
||||||
#define PERF_PMU_CAP_ITRACE 0x20
|
#define PERF_PMU_CAP_ITRACE 0x20
|
||||||
|
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct pmu - generic performance monitoring unit
|
* struct pmu - generic performance monitoring unit
|
||||||
|
@ -240,6 +250,9 @@ struct pmu {
|
||||||
int task_ctx_nr;
|
int task_ctx_nr;
|
||||||
int hrtimer_interval_ms;
|
int hrtimer_interval_ms;
|
||||||
|
|
||||||
|
/* number of address filters this PMU can do */
|
||||||
|
unsigned int nr_addr_filters;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fully disable/enable this PMU, can be used to protect from the PMI
|
* Fully disable/enable this PMU, can be used to protect from the PMI
|
||||||
* as well as for lazy/batch writing of the MSRs.
|
* as well as for lazy/batch writing of the MSRs.
|
||||||
|
@ -392,12 +405,71 @@ struct pmu {
|
||||||
*/
|
*/
|
||||||
void (*free_aux) (void *aux); /* optional */
|
void (*free_aux) (void *aux); /* optional */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Validate address range filters: make sure the HW supports the
|
||||||
|
* requested configuration and number of filters; return 0 if the
|
||||||
|
* supplied filters are valid, -errno otherwise.
|
||||||
|
*
|
||||||
|
* Runs in the context of the ioctl()ing process and is not serialized
|
||||||
|
* with the rest of the PMU callbacks.
|
||||||
|
*/
|
||||||
|
int (*addr_filters_validate) (struct list_head *filters);
|
||||||
|
/* optional */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Synchronize address range filter configuration:
|
||||||
|
* translate hw-agnostic filters into hardware configuration in
|
||||||
|
* event::hw::addr_filters.
|
||||||
|
*
|
||||||
|
* Runs as a part of filter sync sequence that is done in ->start()
|
||||||
|
* callback by calling perf_event_addr_filters_sync().
|
||||||
|
*
|
||||||
|
* May (and should) traverse event::addr_filters::list, for which its
|
||||||
|
* caller provides necessary serialization.
|
||||||
|
*/
|
||||||
|
void (*addr_filters_sync) (struct perf_event *event);
|
||||||
|
/* optional */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Filter events for PMU-specific reasons.
|
* Filter events for PMU-specific reasons.
|
||||||
*/
|
*/
|
||||||
int (*filter_match) (struct perf_event *event); /* optional */
|
int (*filter_match) (struct perf_event *event); /* optional */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct perf_addr_filter - address range filter definition
|
||||||
|
* @entry: event's filter list linkage
|
||||||
|
* @inode: object file's inode for file-based filters
|
||||||
|
* @offset: filter range offset
|
||||||
|
* @size: filter range size
|
||||||
|
* @range: 1: range, 0: address
|
||||||
|
* @filter: 1: filter/start, 0: stop
|
||||||
|
*
|
||||||
|
* This is a hardware-agnostic filter configuration as specified by the user.
|
||||||
|
*/
|
||||||
|
struct perf_addr_filter {
|
||||||
|
struct list_head entry;
|
||||||
|
struct inode *inode;
|
||||||
|
unsigned long offset;
|
||||||
|
unsigned long size;
|
||||||
|
unsigned int range : 1,
|
||||||
|
filter : 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct perf_addr_filters_head - container for address range filters
|
||||||
|
* @list: list of filters for this event
|
||||||
|
* @lock: spinlock that serializes accesses to the @list and event's
|
||||||
|
* (and its children's) filter generations.
|
||||||
|
*
|
||||||
|
* A child event will use parent's @list (and therefore @lock), so they are
|
||||||
|
* bundled together; see perf_event_addr_filters().
|
||||||
|
*/
|
||||||
|
struct perf_addr_filters_head {
|
||||||
|
struct list_head list;
|
||||||
|
raw_spinlock_t lock;
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* enum perf_event_active_state - the states of a event
|
* enum perf_event_active_state - the states of a event
|
||||||
*/
|
*/
|
||||||
|
@ -566,6 +638,12 @@ struct perf_event {
|
||||||
|
|
||||||
atomic_t event_limit;
|
atomic_t event_limit;
|
||||||
|
|
||||||
|
/* address range filters */
|
||||||
|
struct perf_addr_filters_head addr_filters;
|
||||||
|
/* vma address array for file-based filders */
|
||||||
|
unsigned long *addr_filters_offs;
|
||||||
|
unsigned long addr_filters_gen;
|
||||||
|
|
||||||
void (*destroy)(struct perf_event *);
|
void (*destroy)(struct perf_event *);
|
||||||
struct rcu_head rcu_head;
|
struct rcu_head rcu_head;
|
||||||
|
|
||||||
|
@ -834,9 +912,25 @@ extern int perf_event_overflow(struct perf_event *event,
|
||||||
struct perf_sample_data *data,
|
struct perf_sample_data *data,
|
||||||
struct pt_regs *regs);
|
struct pt_regs *regs);
|
||||||
|
|
||||||
|
extern void perf_event_output_forward(struct perf_event *event,
|
||||||
|
struct perf_sample_data *data,
|
||||||
|
struct pt_regs *regs);
|
||||||
|
extern void perf_event_output_backward(struct perf_event *event,
|
||||||
|
struct perf_sample_data *data,
|
||||||
|
struct pt_regs *regs);
|
||||||
extern void perf_event_output(struct perf_event *event,
|
extern void perf_event_output(struct perf_event *event,
|
||||||
struct perf_sample_data *data,
|
struct perf_sample_data *data,
|
||||||
struct pt_regs *regs);
|
struct pt_regs *regs);
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_default_overflow_handler(struct perf_event *event)
|
||||||
|
{
|
||||||
|
if (likely(event->overflow_handler == perf_event_output_forward))
|
||||||
|
return true;
|
||||||
|
if (unlikely(event->overflow_handler == perf_event_output_backward))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
extern void
|
extern void
|
||||||
perf_event_header__init_id(struct perf_event_header *header,
|
perf_event_header__init_id(struct perf_event_header *header,
|
||||||
|
@ -977,9 +1071,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
|
||||||
extern int get_callchain_buffers(void);
|
extern int get_callchain_buffers(void);
|
||||||
extern void put_callchain_buffers(void);
|
extern void put_callchain_buffers(void);
|
||||||
|
|
||||||
|
extern int sysctl_perf_event_max_stack;
|
||||||
|
|
||||||
static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
|
static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
|
||||||
{
|
{
|
||||||
if (entry->nr < PERF_MAX_STACK_DEPTH) {
|
if (entry->nr < sysctl_perf_event_max_stack) {
|
||||||
entry->ip[entry->nr++] = ip;
|
entry->ip[entry->nr++] = ip;
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1001,6 +1097,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
|
||||||
void __user *buffer, size_t *lenp,
|
void __user *buffer, size_t *lenp,
|
||||||
loff_t *ppos);
|
loff_t *ppos);
|
||||||
|
|
||||||
|
int perf_event_max_stack_handler(struct ctl_table *table, int write,
|
||||||
|
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||||
|
|
||||||
static inline bool perf_paranoid_tracepoint_raw(void)
|
static inline bool perf_paranoid_tracepoint_raw(void)
|
||||||
{
|
{
|
||||||
|
@ -1045,8 +1143,41 @@ static inline bool has_aux(struct perf_event *event)
|
||||||
return event->pmu->setup_aux;
|
return event->pmu->setup_aux;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool is_write_backward(struct perf_event *event)
|
||||||
|
{
|
||||||
|
return !!event->attr.write_backward;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool has_addr_filter(struct perf_event *event)
|
||||||
|
{
|
||||||
|
return event->pmu->nr_addr_filters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* An inherited event uses parent's filters
|
||||||
|
*/
|
||||||
|
static inline struct perf_addr_filters_head *
|
||||||
|
perf_event_addr_filters(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct perf_addr_filters_head *ifh = &event->addr_filters;
|
||||||
|
|
||||||
|
if (event->parent)
|
||||||
|
ifh = &event->parent->addr_filters;
|
||||||
|
|
||||||
|
return ifh;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void perf_event_addr_filters_sync(struct perf_event *event);
|
||||||
|
|
||||||
extern int perf_output_begin(struct perf_output_handle *handle,
|
extern int perf_output_begin(struct perf_output_handle *handle,
|
||||||
struct perf_event *event, unsigned int size);
|
struct perf_event *event, unsigned int size);
|
||||||
|
extern int perf_output_begin_forward(struct perf_output_handle *handle,
|
||||||
|
struct perf_event *event,
|
||||||
|
unsigned int size);
|
||||||
|
extern int perf_output_begin_backward(struct perf_output_handle *handle,
|
||||||
|
struct perf_event *event,
|
||||||
|
unsigned int size);
|
||||||
|
|
||||||
extern void perf_output_end(struct perf_output_handle *handle);
|
extern void perf_output_end(struct perf_output_handle *handle);
|
||||||
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
|
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
|
||||||
const void *buf, unsigned int len);
|
const void *buf, unsigned int len);
|
||||||
|
|
|
@ -340,7 +340,8 @@ struct perf_event_attr {
|
||||||
comm_exec : 1, /* flag comm events that are due to an exec */
|
comm_exec : 1, /* flag comm events that are due to an exec */
|
||||||
use_clockid : 1, /* use @clockid for time fields */
|
use_clockid : 1, /* use @clockid for time fields */
|
||||||
context_switch : 1, /* context switch data */
|
context_switch : 1, /* context switch data */
|
||||||
__reserved_1 : 37;
|
write_backward : 1, /* Write ring buffer from end to beginning */
|
||||||
|
__reserved_1 : 36;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
__u32 wakeup_events; /* wakeup every n events */
|
__u32 wakeup_events; /* wakeup every n events */
|
||||||
|
@ -401,6 +402,7 @@ struct perf_event_attr {
|
||||||
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
|
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
|
||||||
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
|
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
|
||||||
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
|
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
|
||||||
|
#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
|
||||||
|
|
||||||
enum perf_event_ioc_flags {
|
enum perf_event_ioc_flags {
|
||||||
PERF_IOC_FLAG_GROUP = 1U << 0,
|
PERF_IOC_FLAG_GROUP = 1U << 0,
|
||||||
|
|
|
@ -66,7 +66,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
|
||||||
/* check sanity of attributes */
|
/* check sanity of attributes */
|
||||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||||
value_size < 8 || value_size % 8 ||
|
value_size < 8 || value_size % 8 ||
|
||||||
value_size / 8 > PERF_MAX_STACK_DEPTH)
|
value_size / 8 > sysctl_perf_event_max_stack)
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
/* hash table size must be power of 2 */
|
/* hash table size must be power of 2 */
|
||||||
|
@ -124,8 +124,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
|
||||||
struct perf_callchain_entry *trace;
|
struct perf_callchain_entry *trace;
|
||||||
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
|
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
|
||||||
u32 max_depth = map->value_size / 8;
|
u32 max_depth = map->value_size / 8;
|
||||||
/* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
|
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
|
||||||
u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
|
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
|
||||||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||||
u32 hash, id, trace_nr, trace_len;
|
u32 hash, id, trace_nr, trace_len;
|
||||||
bool user = flags & BPF_F_USER_STACK;
|
bool user = flags & BPF_F_USER_STACK;
|
||||||
|
@ -143,7 +143,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
/* get_perf_callchain() guarantees that trace->nr >= init_nr
|
/* get_perf_callchain() guarantees that trace->nr >= init_nr
|
||||||
* and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
|
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
|
||||||
*/
|
*/
|
||||||
trace_nr = trace->nr - init_nr;
|
trace_nr = trace->nr - init_nr;
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,14 @@ struct callchain_cpus_entries {
|
||||||
struct perf_callchain_entry *cpu_entries[0];
|
struct perf_callchain_entry *cpu_entries[0];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
|
||||||
|
|
||||||
|
static inline size_t perf_callchain_entry__sizeof(void)
|
||||||
|
{
|
||||||
|
return (sizeof(struct perf_callchain_entry) +
|
||||||
|
sizeof(__u64) * sysctl_perf_event_max_stack);
|
||||||
|
}
|
||||||
|
|
||||||
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
|
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
|
||||||
static atomic_t nr_callchain_events;
|
static atomic_t nr_callchain_events;
|
||||||
static DEFINE_MUTEX(callchain_mutex);
|
static DEFINE_MUTEX(callchain_mutex);
|
||||||
|
@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void)
|
||||||
if (!entries)
|
if (!entries)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
|
size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
|
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
|
||||||
|
@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
|
||||||
|
|
||||||
cpu = smp_processor_id();
|
cpu = smp_processor_id();
|
||||||
|
|
||||||
return &entries->cpu_entries[cpu][*rctx];
|
return (((void *)entries->cpu_entries[cpu]) +
|
||||||
|
(*rctx * perf_callchain_entry__sizeof()));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -215,3 +224,25 @@ exit_put:
|
||||||
|
|
||||||
return entry;
|
return entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int perf_event_max_stack_handler(struct ctl_table *table, int write,
|
||||||
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||||
|
{
|
||||||
|
int new_value = sysctl_perf_event_max_stack, ret;
|
||||||
|
struct ctl_table new_table = *table;
|
||||||
|
|
||||||
|
new_table.data = &new_value;
|
||||||
|
ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
|
||||||
|
if (ret || !write)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
mutex_lock(&callchain_mutex);
|
||||||
|
if (atomic_read(&nr_callchain_events))
|
||||||
|
ret = -EBUSY;
|
||||||
|
else
|
||||||
|
sysctl_perf_event_max_stack = new_value;
|
||||||
|
|
||||||
|
mutex_unlock(&callchain_mutex);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -11,13 +11,13 @@
|
||||||
struct ring_buffer {
|
struct ring_buffer {
|
||||||
atomic_t refcount;
|
atomic_t refcount;
|
||||||
struct rcu_head rcu_head;
|
struct rcu_head rcu_head;
|
||||||
struct irq_work irq_work;
|
|
||||||
#ifdef CONFIG_PERF_USE_VMALLOC
|
#ifdef CONFIG_PERF_USE_VMALLOC
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
int page_order; /* allocation order */
|
int page_order; /* allocation order */
|
||||||
#endif
|
#endif
|
||||||
int nr_pages; /* nr of data pages */
|
int nr_pages; /* nr of data pages */
|
||||||
int overwrite; /* can overwrite itself */
|
int overwrite; /* can overwrite itself */
|
||||||
|
int paused; /* can write into ring buffer */
|
||||||
|
|
||||||
atomic_t poll; /* POLL_ for wakeups */
|
atomic_t poll; /* POLL_ for wakeups */
|
||||||
|
|
||||||
|
@ -65,6 +65,14 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
|
||||||
rb_free(rb);
|
rb_free(rb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
|
||||||
|
{
|
||||||
|
if (!pause && rb->nr_pages)
|
||||||
|
rb->paused = 0;
|
||||||
|
else
|
||||||
|
rb->paused = 1;
|
||||||
|
}
|
||||||
|
|
||||||
extern struct ring_buffer *
|
extern struct ring_buffer *
|
||||||
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
|
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
|
||||||
extern void perf_event_wakeup(struct perf_event *event);
|
extern void perf_event_wakeup(struct perf_event *event);
|
||||||
|
|
|
@ -102,8 +102,21 @@ out:
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
int perf_output_begin(struct perf_output_handle *handle,
|
static bool __always_inline
|
||||||
struct perf_event *event, unsigned int size)
|
ring_buffer_has_space(unsigned long head, unsigned long tail,
|
||||||
|
unsigned long data_size, unsigned int size,
|
||||||
|
bool backward)
|
||||||
|
{
|
||||||
|
if (!backward)
|
||||||
|
return CIRC_SPACE(head, tail, data_size) >= size;
|
||||||
|
else
|
||||||
|
return CIRC_SPACE(tail, head, data_size) >= size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __always_inline
|
||||||
|
__perf_output_begin(struct perf_output_handle *handle,
|
||||||
|
struct perf_event *event, unsigned int size,
|
||||||
|
bool backward)
|
||||||
{
|
{
|
||||||
struct ring_buffer *rb;
|
struct ring_buffer *rb;
|
||||||
unsigned long tail, offset, head;
|
unsigned long tail, offset, head;
|
||||||
|
@ -125,8 +138,11 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||||
if (unlikely(!rb))
|
if (unlikely(!rb))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (unlikely(!rb->nr_pages))
|
if (unlikely(rb->paused)) {
|
||||||
|
if (rb->nr_pages)
|
||||||
|
local_inc(&rb->lost);
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
handle->rb = rb;
|
handle->rb = rb;
|
||||||
handle->event = event;
|
handle->event = event;
|
||||||
|
@ -143,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||||
do {
|
do {
|
||||||
tail = READ_ONCE(rb->user_page->data_tail);
|
tail = READ_ONCE(rb->user_page->data_tail);
|
||||||
offset = head = local_read(&rb->head);
|
offset = head = local_read(&rb->head);
|
||||||
if (!rb->overwrite &&
|
if (!rb->overwrite) {
|
||||||
unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
|
if (unlikely(!ring_buffer_has_space(head, tail,
|
||||||
goto fail;
|
perf_data_size(rb),
|
||||||
|
size, backward)))
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The above forms a control dependency barrier separating the
|
* The above forms a control dependency barrier separating the
|
||||||
|
@ -159,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||||
* See perf_output_put_handle().
|
* See perf_output_put_handle().
|
||||||
*/
|
*/
|
||||||
|
|
||||||
head += size;
|
if (!backward)
|
||||||
|
head += size;
|
||||||
|
else
|
||||||
|
head -= size;
|
||||||
} while (local_cmpxchg(&rb->head, offset, head) != offset);
|
} while (local_cmpxchg(&rb->head, offset, head) != offset);
|
||||||
|
|
||||||
|
if (backward) {
|
||||||
|
offset = head;
|
||||||
|
head = (u64)(-head);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We rely on the implied barrier() by local_cmpxchg() to ensure
|
* We rely on the implied barrier() by local_cmpxchg() to ensure
|
||||||
* none of the data stores below can be lifted up by the compiler.
|
* none of the data stores below can be lifted up by the compiler.
|
||||||
|
@ -203,6 +230,26 @@ out:
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int perf_output_begin_forward(struct perf_output_handle *handle,
|
||||||
|
struct perf_event *event, unsigned int size)
|
||||||
|
{
|
||||||
|
return __perf_output_begin(handle, event, size, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
int perf_output_begin_backward(struct perf_output_handle *handle,
|
||||||
|
struct perf_event *event, unsigned int size)
|
||||||
|
{
|
||||||
|
return __perf_output_begin(handle, event, size, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
int perf_output_begin(struct perf_output_handle *handle,
|
||||||
|
struct perf_event *event, unsigned int size)
|
||||||
|
{
|
||||||
|
|
||||||
|
return __perf_output_begin(handle, event, size,
|
||||||
|
unlikely(is_write_backward(event)));
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int perf_output_copy(struct perf_output_handle *handle,
|
unsigned int perf_output_copy(struct perf_output_handle *handle,
|
||||||
const void *buf, unsigned int len)
|
const void *buf, unsigned int len)
|
||||||
{
|
{
|
||||||
|
@ -221,8 +268,6 @@ void perf_output_end(struct perf_output_handle *handle)
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void rb_irq_work(struct irq_work *work);
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
||||||
{
|
{
|
||||||
|
@ -243,16 +288,13 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
||||||
|
|
||||||
INIT_LIST_HEAD(&rb->event_list);
|
INIT_LIST_HEAD(&rb->event_list);
|
||||||
spin_lock_init(&rb->event_lock);
|
spin_lock_init(&rb->event_lock);
|
||||||
init_irq_work(&rb->irq_work, rb_irq_work);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ring_buffer_put_async(struct ring_buffer *rb)
|
/*
|
||||||
{
|
* perf_output_begin() only checks rb->paused, therefore
|
||||||
if (!atomic_dec_and_test(&rb->refcount))
|
* rb->paused must be true if we have no pages for output.
|
||||||
return;
|
*/
|
||||||
|
if (!rb->nr_pages)
|
||||||
rb->rcu_head.next = (void *)rb;
|
rb->paused = 1;
|
||||||
irq_work_queue(&rb->irq_work);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -264,6 +306,10 @@ static void ring_buffer_put_async(struct ring_buffer *rb)
|
||||||
* The ordering is similar to that of perf_output_{begin,end}, with
|
* The ordering is similar to that of perf_output_{begin,end}, with
|
||||||
* the exception of (B), which should be taken care of by the pmu
|
* the exception of (B), which should be taken care of by the pmu
|
||||||
* driver, since ordering rules will differ depending on hardware.
|
* driver, since ordering rules will differ depending on hardware.
|
||||||
|
*
|
||||||
|
* Call this from pmu::start(); see the comment in perf_aux_output_end()
|
||||||
|
* about its use in pmu callbacks. Both can also be called from the PMI
|
||||||
|
* handler if needed.
|
||||||
*/
|
*/
|
||||||
void *perf_aux_output_begin(struct perf_output_handle *handle,
|
void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||||
struct perf_event *event)
|
struct perf_event *event)
|
||||||
|
@ -287,6 +333,13 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||||
if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
|
if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
|
||||||
|
* the aux buffer is in perf_mmap_close(), about to get freed.
|
||||||
|
*/
|
||||||
|
if (!atomic_read(&rb->aux_mmap_count))
|
||||||
|
goto err_put;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Nesting is not supported for AUX area, make sure nested
|
* Nesting is not supported for AUX area, make sure nested
|
||||||
* writers are caught early
|
* writers are caught early
|
||||||
|
@ -328,10 +381,11 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||||
return handle->rb->aux_priv;
|
return handle->rb->aux_priv;
|
||||||
|
|
||||||
err_put:
|
err_put:
|
||||||
|
/* can't be last */
|
||||||
rb_free_aux(rb);
|
rb_free_aux(rb);
|
||||||
|
|
||||||
err:
|
err:
|
||||||
ring_buffer_put_async(rb);
|
ring_buffer_put(rb);
|
||||||
handle->event = NULL;
|
handle->event = NULL;
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -342,6 +396,10 @@ err:
|
||||||
* aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
|
* aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
|
||||||
* pmu driver's responsibility to observe ordering rules of the hardware,
|
* pmu driver's responsibility to observe ordering rules of the hardware,
|
||||||
* so that all the data is externally visible before this is called.
|
* so that all the data is externally visible before this is called.
|
||||||
|
*
|
||||||
|
* Note: this has to be called from pmu::stop() callback, as the assumption
|
||||||
|
* of the AUX buffer management code is that after pmu::stop(), the AUX
|
||||||
|
* transaction must be stopped and therefore drop the AUX reference count.
|
||||||
*/
|
*/
|
||||||
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||||
bool truncated)
|
bool truncated)
|
||||||
|
@ -389,8 +447,9 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
|
||||||
handle->event = NULL;
|
handle->event = NULL;
|
||||||
|
|
||||||
local_set(&rb->aux_nest, 0);
|
local_set(&rb->aux_nest, 0);
|
||||||
|
/* can't be last */
|
||||||
rb_free_aux(rb);
|
rb_free_aux(rb);
|
||||||
ring_buffer_put_async(rb);
|
ring_buffer_put(rb);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -471,6 +530,14 @@ static void __rb_free_aux(struct ring_buffer *rb)
|
||||||
{
|
{
|
||||||
int pg;
|
int pg;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Should never happen, the last reference should be dropped from
|
||||||
|
* perf_mmap_close() path, which first stops aux transactions (which
|
||||||
|
* in turn are the atomic holders of aux_refcount) and then does the
|
||||||
|
* last rb_free_aux().
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(in_atomic());
|
||||||
|
|
||||||
if (rb->aux_priv) {
|
if (rb->aux_priv) {
|
||||||
rb->free_aux(rb->aux_priv);
|
rb->free_aux(rb->aux_priv);
|
||||||
rb->free_aux = NULL;
|
rb->free_aux = NULL;
|
||||||
|
@ -582,18 +649,7 @@ out:
|
||||||
void rb_free_aux(struct ring_buffer *rb)
|
void rb_free_aux(struct ring_buffer *rb)
|
||||||
{
|
{
|
||||||
if (atomic_dec_and_test(&rb->aux_refcount))
|
if (atomic_dec_and_test(&rb->aux_refcount))
|
||||||
irq_work_queue(&rb->irq_work);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void rb_irq_work(struct irq_work *work)
|
|
||||||
{
|
|
||||||
struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
|
|
||||||
|
|
||||||
if (!atomic_read(&rb->aux_refcount))
|
|
||||||
__rb_free_aux(rb);
|
__rb_free_aux(rb);
|
||||||
|
|
||||||
if (rb->rcu_head.next == (void *)rb)
|
|
||||||
call_rcu(&rb->rcu_head, rb_free_rcu);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef CONFIG_PERF_USE_VMALLOC
|
#ifndef CONFIG_PERF_USE_VMALLOC
|
||||||
|
|
|
@ -130,6 +130,9 @@ static int one_thousand = 1000;
|
||||||
#ifdef CONFIG_PRINTK
|
#ifdef CONFIG_PRINTK
|
||||||
static int ten_thousand = 10000;
|
static int ten_thousand = 10000;
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_PERF_EVENTS
|
||||||
|
static int six_hundred_forty_kb = 640 * 1024;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
|
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
|
||||||
static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
|
static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
|
||||||
|
@ -1144,6 +1147,15 @@ static struct ctl_table kern_table[] = {
|
||||||
.extra1 = &zero,
|
.extra1 = &zero,
|
||||||
.extra2 = &one_hundred,
|
.extra2 = &one_hundred,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "perf_event_max_stack",
|
||||||
|
.data = NULL, /* filled in by handler */
|
||||||
|
.maxlen = sizeof(sysctl_perf_event_max_stack),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = perf_event_max_stack_handler,
|
||||||
|
.extra1 = &zero,
|
||||||
|
.extra2 = &six_hundred_forty_kb,
|
||||||
|
},
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_KMEMCHECK
|
#ifdef CONFIG_KMEMCHECK
|
||||||
{
|
{
|
||||||
|
|
|
@ -47,6 +47,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event,
|
||||||
if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
|
if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
|
|
||||||
|
if (!is_sampling_event(p_event))
|
||||||
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't allow user space callchains for function trace
|
* We don't allow user space callchains for function trace
|
||||||
* event, due to issues with page faults while tracing page
|
* event, due to issues with page faults while tracing page
|
||||||
|
|
|
@ -137,7 +137,8 @@ libsubcmd_clean:
|
||||||
$(call descend,lib/subcmd,clean)
|
$(call descend,lib/subcmd,clean)
|
||||||
|
|
||||||
perf_clean:
|
perf_clean:
|
||||||
$(call descend,$(@:_clean=),clean)
|
$(Q)mkdir -p $(PERF_O) .
|
||||||
|
$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
|
||||||
|
|
||||||
selftests_clean:
|
selftests_clean:
|
||||||
$(call descend,testing/$(@:_clean=),clean)
|
$(call descend,testing/$(@:_clean=),clean)
|
||||||
|
|
|
@ -49,6 +49,10 @@ FEATURE_TESTS_BASIC := \
|
||||||
libslang \
|
libslang \
|
||||||
libcrypto \
|
libcrypto \
|
||||||
libunwind \
|
libunwind \
|
||||||
|
libunwind-x86 \
|
||||||
|
libunwind-x86_64 \
|
||||||
|
libunwind-arm \
|
||||||
|
libunwind-aarch64 \
|
||||||
pthread-attr-setaffinity-np \
|
pthread-attr-setaffinity-np \
|
||||||
stackprotector-all \
|
stackprotector-all \
|
||||||
timerfd \
|
timerfd \
|
||||||
|
@ -69,7 +73,9 @@ FEATURE_TESTS_EXTRA := \
|
||||||
libbabeltrace \
|
libbabeltrace \
|
||||||
liberty \
|
liberty \
|
||||||
liberty-z \
|
liberty-z \
|
||||||
libunwind-debug-frame
|
libunwind-debug-frame \
|
||||||
|
libunwind-debug-frame-arm \
|
||||||
|
libunwind-debug-frame-aarch64
|
||||||
|
|
||||||
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
|
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,12 @@ FILES= \
|
||||||
test-libcrypto.bin \
|
test-libcrypto.bin \
|
||||||
test-libunwind.bin \
|
test-libunwind.bin \
|
||||||
test-libunwind-debug-frame.bin \
|
test-libunwind-debug-frame.bin \
|
||||||
|
test-libunwind-x86.bin \
|
||||||
|
test-libunwind-x86_64.bin \
|
||||||
|
test-libunwind-arm.bin \
|
||||||
|
test-libunwind-aarch64.bin \
|
||||||
|
test-libunwind-debug-frame-arm.bin \
|
||||||
|
test-libunwind-debug-frame-aarch64.bin \
|
||||||
test-pthread-attr-setaffinity-np.bin \
|
test-pthread-attr-setaffinity-np.bin \
|
||||||
test-stackprotector-all.bin \
|
test-stackprotector-all.bin \
|
||||||
test-timerfd.bin \
|
test-timerfd.bin \
|
||||||
|
@ -103,6 +109,23 @@ $(OUTPUT)test-libunwind.bin:
|
||||||
|
|
||||||
$(OUTPUT)test-libunwind-debug-frame.bin:
|
$(OUTPUT)test-libunwind-debug-frame.bin:
|
||||||
$(BUILD) -lelf
|
$(BUILD) -lelf
|
||||||
|
$(OUTPUT)test-libunwind-x86.bin:
|
||||||
|
$(BUILD) -lelf -lunwind-x86
|
||||||
|
|
||||||
|
$(OUTPUT)test-libunwind-x86_64.bin:
|
||||||
|
$(BUILD) -lelf -lunwind-x86_64
|
||||||
|
|
||||||
|
$(OUTPUT)test-libunwind-arm.bin:
|
||||||
|
$(BUILD) -lelf -lunwind-arm
|
||||||
|
|
||||||
|
$(OUTPUT)test-libunwind-aarch64.bin:
|
||||||
|
$(BUILD) -lelf -lunwind-aarch64
|
||||||
|
|
||||||
|
$(OUTPUT)test-libunwind-debug-frame-arm.bin:
|
||||||
|
$(BUILD) -lelf -lunwind-arm
|
||||||
|
|
||||||
|
$(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
|
||||||
|
$(BUILD) -lelf -lunwind-aarch64
|
||||||
|
|
||||||
$(OUTPUT)test-libaudit.bin:
|
$(OUTPUT)test-libaudit.bin:
|
||||||
$(BUILD) -laudit
|
$(BUILD) -laudit
|
||||||
|
|
|
@ -27,10 +27,9 @@ int main(void)
|
||||||
attr.log_level = 0;
|
attr.log_level = 0;
|
||||||
attr.kern_version = 0;
|
attr.kern_version = 0;
|
||||||
|
|
||||||
attr = attr;
|
|
||||||
/*
|
/*
|
||||||
* Test existence of __NR_bpf and BPF_PROG_LOAD.
|
* Test existence of __NR_bpf and BPF_PROG_LOAD.
|
||||||
* This call should fail if we run the testcase.
|
* This call should fail if we run the testcase.
|
||||||
*/
|
*/
|
||||||
return syscall(__NR_bpf, BPF_PROG_LOAD, attr, sizeof(attr));
|
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
|
||||||
}
|
}
|
||||||
|
|
26
tools/build/feature/test-libunwind-aarch64.c
Normal file
26
tools/build/feature/test-libunwind-aarch64.c
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
#include <libunwind-aarch64.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
|
||||||
|
unw_word_t ip,
|
||||||
|
unw_dyn_info_t *di,
|
||||||
|
unw_proc_info_t *pi,
|
||||||
|
int need_unwind_info, void *arg);
|
||||||
|
|
||||||
|
#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
|
||||||
|
|
||||||
|
static unw_accessors_t accessors;
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
unw_addr_space_t addr_space;
|
||||||
|
|
||||||
|
addr_space = unw_create_addr_space(&accessors, 0);
|
||||||
|
if (addr_space)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
unw_init_remote(NULL, addr_space, NULL);
|
||||||
|
dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
27
tools/build/feature/test-libunwind-arm.c
Normal file
27
tools/build/feature/test-libunwind-arm.c
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#include <libunwind-arm.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
|
||||||
|
unw_word_t ip,
|
||||||
|
unw_dyn_info_t *di,
|
||||||
|
unw_proc_info_t *pi,
|
||||||
|
int need_unwind_info, void *arg);
|
||||||
|
|
||||||
|
|
||||||
|
#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
|
||||||
|
|
||||||
|
static unw_accessors_t accessors;
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
unw_addr_space_t addr_space;
|
||||||
|
|
||||||
|
addr_space = unw_create_addr_space(&accessors, 0);
|
||||||
|
if (addr_space)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
unw_init_remote(NULL, addr_space, NULL);
|
||||||
|
dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
16
tools/build/feature/test-libunwind-debug-frame-aarch64.c
Normal file
16
tools/build/feature/test-libunwind-debug-frame-aarch64.c
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
#include <libunwind-aarch64.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
extern int
|
||||||
|
UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
|
||||||
|
unw_word_t ip, unw_word_t segbase,
|
||||||
|
const char *obj_name, unw_word_t start,
|
||||||
|
unw_word_t end);
|
||||||
|
|
||||||
|
#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
|
||||||
|
return 0;
|
||||||
|
}
|
16
tools/build/feature/test-libunwind-debug-frame-arm.c
Normal file
16
tools/build/feature/test-libunwind-debug-frame-arm.c
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
#include <libunwind-arm.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
extern int
|
||||||
|
UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
|
||||||
|
unw_word_t ip, unw_word_t segbase,
|
||||||
|
const char *obj_name, unw_word_t start,
|
||||||
|
unw_word_t end);
|
||||||
|
|
||||||
|
#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
|
||||||
|
return 0;
|
||||||
|
}
|
27
tools/build/feature/test-libunwind-x86.c
Normal file
27
tools/build/feature/test-libunwind-x86.c
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#include <libunwind-x86.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
|
||||||
|
unw_word_t ip,
|
||||||
|
unw_dyn_info_t *di,
|
||||||
|
unw_proc_info_t *pi,
|
||||||
|
int need_unwind_info, void *arg);
|
||||||
|
|
||||||
|
|
||||||
|
#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
|
||||||
|
|
||||||
|
static unw_accessors_t accessors;
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
unw_addr_space_t addr_space;
|
||||||
|
|
||||||
|
addr_space = unw_create_addr_space(&accessors, 0);
|
||||||
|
if (addr_space)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
unw_init_remote(NULL, addr_space, NULL);
|
||||||
|
dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
27
tools/build/feature/test-libunwind-x86_64.c
Normal file
27
tools/build/feature/test-libunwind-x86_64.c
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#include <libunwind-x86_64.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
|
||||||
|
unw_word_t ip,
|
||||||
|
unw_dyn_info_t *di,
|
||||||
|
unw_proc_info_t *pi,
|
||||||
|
int need_unwind_info, void *arg);
|
||||||
|
|
||||||
|
|
||||||
|
#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
|
||||||
|
|
||||||
|
static unw_accessors_t accessors;
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
unw_addr_space_t addr_space;
|
||||||
|
|
||||||
|
addr_space = unw_create_addr_space(&accessors, 0);
|
||||||
|
if (addr_space)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
unw_init_remote(NULL, addr_space, NULL);
|
||||||
|
dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -351,6 +351,19 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int procfs__read_str(const char *entry, char **buf, size_t *sizep)
|
||||||
|
{
|
||||||
|
char path[PATH_MAX];
|
||||||
|
const char *procfs = procfs__mountpoint();
|
||||||
|
|
||||||
|
if (!procfs)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
snprintf(path, sizeof(path), "%s/%s", procfs, entry);
|
||||||
|
|
||||||
|
return filename__read_str(path, buf, sizep);
|
||||||
|
}
|
||||||
|
|
||||||
int sysfs__read_ull(const char *entry, unsigned long long *value)
|
int sysfs__read_ull(const char *entry, unsigned long long *value)
|
||||||
{
|
{
|
||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
|
|
|
@ -29,6 +29,8 @@ int filename__read_int(const char *filename, int *value);
|
||||||
int filename__read_ull(const char *filename, unsigned long long *value);
|
int filename__read_ull(const char *filename, unsigned long long *value);
|
||||||
int filename__read_str(const char *filename, char **buf, size_t *sizep);
|
int filename__read_str(const char *filename, char **buf, size_t *sizep);
|
||||||
|
|
||||||
|
int procfs__read_str(const char *entry, char **buf, size_t *sizep);
|
||||||
|
|
||||||
int sysctl__read_int(const char *sysctl, int *value);
|
int sysctl__read_int(const char *sysctl, int *value);
|
||||||
int sysfs__read_int(const char *entry, int *value);
|
int sysfs__read_int(const char *entry, int *value);
|
||||||
int sysfs__read_ull(const char *entry, unsigned long long *value);
|
int sysfs__read_ull(const char *entry, unsigned long long *value);
|
||||||
|
|
|
@ -672,6 +672,7 @@ The letters are:
|
||||||
d create a debug log
|
d create a debug log
|
||||||
g synthesize a call chain (use with i or x)
|
g synthesize a call chain (use with i or x)
|
||||||
l synthesize last branch entries (use with i or x)
|
l synthesize last branch entries (use with i or x)
|
||||||
|
s skip initial number of events
|
||||||
|
|
||||||
"Instructions" events look like they were recorded by "perf record -e
|
"Instructions" events look like they were recorded by "perf record -e
|
||||||
instructions".
|
instructions".
|
||||||
|
@ -730,6 +731,12 @@ from one sample to the next.
|
||||||
|
|
||||||
To disable trace decoding entirely, use the option --no-itrace.
|
To disable trace decoding entirely, use the option --no-itrace.
|
||||||
|
|
||||||
|
It is also possible to skip events generated (instructions, branches, transactions)
|
||||||
|
at the beginning. This is useful to ignore initialization code.
|
||||||
|
|
||||||
|
--itrace=i0nss1000000
|
||||||
|
|
||||||
|
skips the first million instructions.
|
||||||
|
|
||||||
dump option
|
dump option
|
||||||
-----------
|
-----------
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
d create a debug log
|
d create a debug log
|
||||||
g synthesize a call chain (use with i or x)
|
g synthesize a call chain (use with i or x)
|
||||||
l synthesize last branch entries (use with i or x)
|
l synthesize last branch entries (use with i or x)
|
||||||
|
s skip initial number of events
|
||||||
|
|
||||||
The default is all events i.e. the same as --itrace=ibxe
|
The default is all events i.e. the same as --itrace=ibxe
|
||||||
|
|
||||||
|
@ -24,3 +25,10 @@
|
||||||
|
|
||||||
Also the number of last branch entries (default 64, max. 1024) for
|
Also the number of last branch entries (default 64, max. 1024) for
|
||||||
instructions or transactions events can be specified.
|
instructions or transactions events can be specified.
|
||||||
|
|
||||||
|
It is also possible to skip events generated (instructions, branches, transactions)
|
||||||
|
at the beginning. This is useful to ignore initialization code.
|
||||||
|
|
||||||
|
--itrace=i0nss1000000
|
||||||
|
|
||||||
|
skips the first million instructions.
|
||||||
|
|
|
@ -33,7 +33,7 @@ OPTIONS
|
||||||
|
|
||||||
-f::
|
-f::
|
||||||
--force::
|
--force::
|
||||||
Don't complain, do it.
|
Don't do ownership validation.
|
||||||
|
|
||||||
-v::
|
-v::
|
||||||
--verbose::
|
--verbose::
|
||||||
|
|
|
@ -75,7 +75,7 @@ OPTIONS
|
||||||
|
|
||||||
-f::
|
-f::
|
||||||
--force::
|
--force::
|
||||||
Don't complain, do it.
|
Don't do ownership validation.
|
||||||
|
|
||||||
--symfs=<directory>::
|
--symfs=<directory>::
|
||||||
Look for files with symbols relative to this directory.
|
Look for files with symbols relative to this directory.
|
||||||
|
|
|
@ -93,6 +93,67 @@ raw encoding of 0x1A8 can be used:
|
||||||
You should refer to the processor specific documentation for getting these
|
You should refer to the processor specific documentation for getting these
|
||||||
details. Some of them are referenced in the SEE ALSO section below.
|
details. Some of them are referenced in the SEE ALSO section below.
|
||||||
|
|
||||||
|
ARBITRARY PMUS
|
||||||
|
--------------
|
||||||
|
|
||||||
|
perf also supports an extended syntax for specifying raw parameters
|
||||||
|
to PMUs. Using this typically requires looking up the specific event
|
||||||
|
in the CPU vendor specific documentation.
|
||||||
|
|
||||||
|
The available PMUs and their raw parameters can be listed with
|
||||||
|
|
||||||
|
ls /sys/devices/*/format
|
||||||
|
|
||||||
|
For example the raw event "LSD.UOPS" core pmu event above could
|
||||||
|
be specified as
|
||||||
|
|
||||||
|
perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
|
||||||
|
|
||||||
|
PER SOCKET PMUS
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Some PMUs are not associated with a core, but with a whole CPU socket.
|
||||||
|
Events on these PMUs generally cannot be sampled, but only counted globally
|
||||||
|
with perf stat -a. They can be bound to one logical CPU, but will measure
|
||||||
|
all the CPUs in the same socket.
|
||||||
|
|
||||||
|
This example measures memory bandwidth every second
|
||||||
|
on the first memory controller on socket 0 of a Intel Xeon system
|
||||||
|
|
||||||
|
perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
|
||||||
|
|
||||||
|
Each memory controller has its own PMU. Measuring the complete system
|
||||||
|
bandwidth would require specifying all imc PMUs (see perf list output),
|
||||||
|
and adding the values together.
|
||||||
|
|
||||||
|
This example measures the combined core power every second
|
||||||
|
|
||||||
|
perf stat -I 1000 -e power/energy-cores/ -a
|
||||||
|
|
||||||
|
ACCESS RESTRICTIONS
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
For non root users generally only context switched PMU events are available.
|
||||||
|
This is normally only the events in the cpu PMU, the predefined events
|
||||||
|
like cycles and instructions and some software events.
|
||||||
|
|
||||||
|
Other PMUs and global measurements are normally root only.
|
||||||
|
Some event qualifiers, such as "any", are also root only.
|
||||||
|
|
||||||
|
This can be overriden by setting the kernel.perf_event_paranoid
|
||||||
|
sysctl to -1, which allows non root to use these events.
|
||||||
|
|
||||||
|
For accessing trace point events perf needs to have read access to
|
||||||
|
/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
|
||||||
|
setting.
|
||||||
|
|
||||||
|
TRACING
|
||||||
|
-------
|
||||||
|
|
||||||
|
Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
|
||||||
|
that allows low overhead execution tracing. These are described in a separate
|
||||||
|
intel-pt.txt document.
|
||||||
|
|
||||||
PARAMETERIZED EVENTS
|
PARAMETERIZED EVENTS
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
@ -106,6 +167,50 @@ also be supplied. For example:
|
||||||
|
|
||||||
perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
|
perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
|
||||||
|
|
||||||
|
EVENT GROUPS
|
||||||
|
------------
|
||||||
|
|
||||||
|
Perf supports time based multiplexing of events, when the number of events
|
||||||
|
active exceeds the number of hardware performance counters. Multiplexing
|
||||||
|
can cause measurement errors when the workload changes its execution
|
||||||
|
profile.
|
||||||
|
|
||||||
|
When metrics are computed using formulas from event counts, it is useful to
|
||||||
|
ensure some events are always measured together as a group to minimize multiplexing
|
||||||
|
errors. Event groups can be specified using { }.
|
||||||
|
|
||||||
|
perf stat -e '{instructions,cycles}' ...
|
||||||
|
|
||||||
|
The number of available performance counters depend on the CPU. A group
|
||||||
|
cannot contain more events than available counters.
|
||||||
|
For example Intel Core CPUs typically have four generic performance counters
|
||||||
|
for the core, plus three fixed counters for instructions, cycles and
|
||||||
|
ref-cycles. Some special events have restrictions on which counter they
|
||||||
|
can schedule, and may not support multiple instances in a single group.
|
||||||
|
When too many events are specified in the group none of them will not
|
||||||
|
be measured.
|
||||||
|
|
||||||
|
Globally pinned events can limit the number of counters available for
|
||||||
|
other groups. On x86 systems, the NMI watchdog pins a counter by default.
|
||||||
|
The nmi watchdog can be disabled as root with
|
||||||
|
|
||||||
|
echo 0 > /proc/sys/kernel/nmi_watchdog
|
||||||
|
|
||||||
|
Events from multiple different PMUs cannot be mixed in a group, with
|
||||||
|
some exceptions for software events.
|
||||||
|
|
||||||
|
LEADER SAMPLING
|
||||||
|
---------------
|
||||||
|
|
||||||
|
perf also supports group leader sampling using the :S specifier.
|
||||||
|
|
||||||
|
perf record -e '{cycles,instructions}:S' ...
|
||||||
|
perf report --group
|
||||||
|
|
||||||
|
Normally all events in a event group sample, but with :S only
|
||||||
|
the first event (the leader) samples, and it only reads the values of the
|
||||||
|
other events in the group.
|
||||||
|
|
||||||
OPTIONS
|
OPTIONS
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
@ -143,5 +248,5 @@ SEE ALSO
|
||||||
--------
|
--------
|
||||||
linkperf:perf-stat[1], linkperf:perf-top[1],
|
linkperf:perf-stat[1], linkperf:perf-top[1],
|
||||||
linkperf:perf-record[1],
|
linkperf:perf-record[1],
|
||||||
http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
|
http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
|
||||||
http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
|
http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
|
||||||
|
|
|
@ -48,6 +48,14 @@ OPTIONS
|
||||||
option can be passed in record mode. It will be interpreted the same way as perf
|
option can be passed in record mode. It will be interpreted the same way as perf
|
||||||
record.
|
record.
|
||||||
|
|
||||||
|
-K::
|
||||||
|
--all-kernel::
|
||||||
|
Configure all used events to run in kernel space.
|
||||||
|
|
||||||
|
-U::
|
||||||
|
--all-user::
|
||||||
|
Configure all used events to run in user space.
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
--------
|
--------
|
||||||
linkperf:perf-record[1], linkperf:perf-report[1]
|
linkperf:perf-record[1], linkperf:perf-report[1]
|
||||||
|
|
|
@ -347,6 +347,19 @@ Configure all used events to run in kernel space.
|
||||||
--all-user::
|
--all-user::
|
||||||
Configure all used events to run in user space.
|
Configure all used events to run in user space.
|
||||||
|
|
||||||
|
--timestamp-filename
|
||||||
|
Append timestamp to output file name.
|
||||||
|
|
||||||
|
--switch-output::
|
||||||
|
Generate multiple perf.data files, timestamp prefixed, switching to a new one
|
||||||
|
when receiving a SIGUSR2.
|
||||||
|
|
||||||
|
A possible use case is to, given an external event, slice the perf.data file
|
||||||
|
that gets then processed, possibly via a perf script, to decide if that
|
||||||
|
particular perf.data snapshot should be kept or not.
|
||||||
|
|
||||||
|
Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
--------
|
--------
|
||||||
linkperf:perf-stat[1], linkperf:perf-list[1]
|
linkperf:perf-stat[1], linkperf:perf-list[1]
|
||||||
|
|
|
@ -248,7 +248,7 @@ OPTIONS
|
||||||
Note that when using the --itrace option the synthesized callchain size
|
Note that when using the --itrace option the synthesized callchain size
|
||||||
will override this value if the synthesized callchain size is bigger.
|
will override this value if the synthesized callchain size is bigger.
|
||||||
|
|
||||||
Default: 127
|
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
|
||||||
|
|
||||||
-G::
|
-G::
|
||||||
--inverted::
|
--inverted::
|
||||||
|
@ -285,7 +285,7 @@ OPTIONS
|
||||||
|
|
||||||
-f::
|
-f::
|
||||||
--force::
|
--force::
|
||||||
Don't complain, do it.
|
Don't do ownership validation.
|
||||||
|
|
||||||
--symfs=<directory>::
|
--symfs=<directory>::
|
||||||
Look for files with symbols relative to this directory.
|
Look for files with symbols relative to this directory.
|
||||||
|
|
|
@ -50,6 +50,22 @@ OPTIONS
|
||||||
--dump-raw-trace=::
|
--dump-raw-trace=::
|
||||||
Display verbose dump of the sched data.
|
Display verbose dump of the sched data.
|
||||||
|
|
||||||
|
OPTIONS for 'perf sched map'
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
--compact::
|
||||||
|
Show only CPUs with activity. Helps visualizing on high core
|
||||||
|
count systems.
|
||||||
|
|
||||||
|
--cpus::
|
||||||
|
Show just entries with activities for the given CPUs.
|
||||||
|
|
||||||
|
--color-cpus::
|
||||||
|
Highlight the given cpus.
|
||||||
|
|
||||||
|
--color-pids::
|
||||||
|
Highlight the given pids.
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
--------
|
--------
|
||||||
linkperf:perf-record[1]
|
linkperf:perf-record[1]
|
||||||
|
|
|
@ -259,9 +259,23 @@ include::itrace.txt[]
|
||||||
--full-source-path::
|
--full-source-path::
|
||||||
Show the full path for source files for srcline output.
|
Show the full path for source files for srcline output.
|
||||||
|
|
||||||
|
--max-stack::
|
||||||
|
Set the stack depth limit when parsing the callchain, anything
|
||||||
|
beyond the specified depth will be ignored. This is a trade-off
|
||||||
|
between information loss and faster processing especially for
|
||||||
|
workloads that can have a very long callchain stack.
|
||||||
|
Note that when using the --itrace option the synthesized callchain size
|
||||||
|
will override this value if the synthesized callchain size is bigger.
|
||||||
|
|
||||||
|
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
|
||||||
|
|
||||||
--ns::
|
--ns::
|
||||||
Use 9 decimal places when displaying time (i.e. show the nanoseconds)
|
Use 9 decimal places when displaying time (i.e. show the nanoseconds)
|
||||||
|
|
||||||
|
-f::
|
||||||
|
--force::
|
||||||
|
Don't do ownership validation.
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
--------
|
--------
|
||||||
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
linkperf:perf-record[1], linkperf:perf-script-perl[1],
|
||||||
|
|
|
@ -177,7 +177,7 @@ Default is to monitor all CPUS.
|
||||||
between information loss and faster processing especially for
|
between information loss and faster processing especially for
|
||||||
workloads that can have a very long callchain stack.
|
workloads that can have a very long callchain stack.
|
||||||
|
|
||||||
Default: 127
|
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
|
||||||
|
|
||||||
--ignore-callees=<regex>::
|
--ignore-callees=<regex>::
|
||||||
Ignore callees of the function(s) matching the given regex.
|
Ignore callees of the function(s) matching the given regex.
|
||||||
|
|
|
@ -117,9 +117,41 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
|
||||||
--syscalls::
|
--syscalls::
|
||||||
Trace system calls. This options is enabled by default.
|
Trace system calls. This options is enabled by default.
|
||||||
|
|
||||||
|
--call-graph [mode,type,min[,limit],order[,key][,branch]]::
|
||||||
|
Setup and enable call-graph (stack chain/backtrace) recording.
|
||||||
|
See `--call-graph` section in perf-record and perf-report
|
||||||
|
man pages for details. The ones that are most useful in 'perf trace'
|
||||||
|
are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
|
||||||
|
|
||||||
|
Using this will, for the root user, bump the value of --mmap-pages to 4
|
||||||
|
times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
|
||||||
|
sysctl. This is done only if the user doesn't specify a --mmap-pages value.
|
||||||
|
|
||||||
|
--kernel-syscall-graph::
|
||||||
|
Show the kernel callchains on the syscall exit path.
|
||||||
|
|
||||||
--event::
|
--event::
|
||||||
Trace other events, see 'perf list' for a complete list.
|
Trace other events, see 'perf list' for a complete list.
|
||||||
|
|
||||||
|
--max-stack::
|
||||||
|
Set the stack depth limit when parsing the callchain, anything
|
||||||
|
beyond the specified depth will be ignored. Note that at this point
|
||||||
|
this is just about the presentation part, i.e. the kernel is still
|
||||||
|
not limiting, the overhead of callchains needs to be set via the
|
||||||
|
knobs in --call-graph dwarf.
|
||||||
|
|
||||||
|
Implies '--call-graph dwarf' when --call-graph not present on the
|
||||||
|
command line, on systems where DWARF unwinding was built in.
|
||||||
|
|
||||||
|
Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
|
||||||
|
|
||||||
|
--min-stack::
|
||||||
|
Set the stack depth limit when parsing the callchain, anything
|
||||||
|
below the specified depth will be ignored. Disabled by default.
|
||||||
|
|
||||||
|
Implies '--call-graph dwarf' when --call-graph not present on the
|
||||||
|
command line, on systems where DWARF unwinding was built in.
|
||||||
|
|
||||||
--proc-map-timeout::
|
--proc-map-timeout::
|
||||||
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
|
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
|
||||||
because the file may be huge. A time out is needed in such cases.
|
because the file may be huge. A time out is needed in such cases.
|
||||||
|
|
|
@ -183,6 +183,11 @@ endif
|
||||||
include config/Makefile
|
include config/Makefile
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(config),0)
|
||||||
|
include $(srctree)/tools/scripts/Makefile.arch
|
||||||
|
-include arch/$(ARCH)/Makefile
|
||||||
|
endif
|
||||||
|
|
||||||
# The FEATURE_DUMP_EXPORT holds location of the actual
|
# The FEATURE_DUMP_EXPORT holds location of the actual
|
||||||
# FEATURE_DUMP file to be used to bypass feature detection
|
# FEATURE_DUMP file to be used to bypass feature detection
|
||||||
# (for bpf or any other subproject)
|
# (for bpf or any other subproject)
|
||||||
|
@ -297,8 +302,6 @@ endif
|
||||||
# because maintaining the nesting to match is a pain. If
|
# because maintaining the nesting to match is a pain. If
|
||||||
# we had "elif" things would have been much nicer...
|
# we had "elif" things would have been much nicer...
|
||||||
|
|
||||||
-include arch/$(ARCH)/Makefile
|
|
||||||
|
|
||||||
ifneq ($(OUTPUT),)
|
ifneq ($(OUTPUT),)
|
||||||
CFLAGS += -I$(OUTPUT)
|
CFLAGS += -I$(OUTPUT)
|
||||||
endif
|
endif
|
||||||
|
@ -390,7 +393,7 @@ endif
|
||||||
__build-dir = $(subst $(OUTPUT),,$(dir $@))
|
__build-dir = $(subst $(OUTPUT),,$(dir $@))
|
||||||
build-dir = $(if $(__build-dir),$(__build-dir),.)
|
build-dir = $(if $(__build-dir),$(__build-dir),.)
|
||||||
|
|
||||||
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep
|
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders
|
||||||
|
|
||||||
$(OUTPUT)%.o: %.c prepare FORCE
|
$(OUTPUT)%.o: %.c prepare FORCE
|
||||||
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
|
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
|
||||||
|
@ -430,7 +433,7 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
|
||||||
|
|
||||||
LIBPERF_IN := $(OUTPUT)libperf-in.o
|
LIBPERF_IN := $(OUTPUT)libperf-in.o
|
||||||
|
|
||||||
$(LIBPERF_IN): fixdep FORCE
|
$(LIBPERF_IN): prepare fixdep FORCE
|
||||||
$(Q)$(MAKE) $(build)=libperf
|
$(Q)$(MAKE) $(build)=libperf
|
||||||
|
|
||||||
$(LIB_FILE): $(LIBPERF_IN)
|
$(LIB_FILE): $(LIBPERF_IN)
|
||||||
|
@ -625,7 +628,7 @@ config-clean:
|
||||||
$(call QUIET_CLEAN, config)
|
$(call QUIET_CLEAN, config)
|
||||||
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
|
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
|
||||||
|
|
||||||
clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
|
clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
|
||||||
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
|
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
|
||||||
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
|
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
|
||||||
$(Q)$(RM) $(OUTPUT).config-detected
|
$(Q)$(RM) $(OUTPUT).config-detected
|
||||||
|
@ -662,5 +665,5 @@ FORCE:
|
||||||
.PHONY: all install clean config-clean strip install-gtk
|
.PHONY: all install clean config-clean strip install-gtk
|
||||||
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
|
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
|
||||||
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
|
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
|
||||||
.PHONY: libtraceevent_plugins
|
.PHONY: libtraceevent_plugins archheaders
|
||||||
|
|
||||||
|
|
|
@ -3,4 +3,5 @@ PERF_HAVE_DWARF_REGS := 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
HAVE_KVM_STAT_SUPPORT := 1
|
HAVE_KVM_STAT_SUPPORT := 1
|
||||||
|
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
|
||||||
PERF_HAVE_JITDUMP := 1
|
PERF_HAVE_JITDUMP := 1
|
||||||
|
|
|
@ -10,19 +10,26 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
#include <dwarf-regs.h>
|
#include <dwarf-regs.h>
|
||||||
|
#include <linux/ptrace.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
struct pt_regs_dwarfnum {
|
struct pt_regs_dwarfnum {
|
||||||
const char *name;
|
const char *name;
|
||||||
unsigned int dwarfnum;
|
unsigned int dwarfnum;
|
||||||
|
unsigned int ptregs_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define STR(s) #s
|
#define REG_DWARFNUM_NAME(r, num) \
|
||||||
#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
|
{.name = STR(%)STR(r), .dwarfnum = num, \
|
||||||
#define GPR_DWARFNUM_NAME(num) \
|
.ptregs_offset = offsetof(struct pt_regs, r)}
|
||||||
{.name = STR(%gpr##num), .dwarfnum = num}
|
#define GPR_DWARFNUM_NAME(num) \
|
||||||
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
|
{.name = STR(%gpr##num), .dwarfnum = num, \
|
||||||
|
.ptregs_offset = offsetof(struct pt_regs, gpr[num])}
|
||||||
|
#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reference:
|
* Reference:
|
||||||
|
@ -61,12 +68,12 @@ static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
|
||||||
GPR_DWARFNUM_NAME(29),
|
GPR_DWARFNUM_NAME(29),
|
||||||
GPR_DWARFNUM_NAME(30),
|
GPR_DWARFNUM_NAME(30),
|
||||||
GPR_DWARFNUM_NAME(31),
|
GPR_DWARFNUM_NAME(31),
|
||||||
REG_DWARFNUM_NAME("%msr", 66),
|
REG_DWARFNUM_NAME(msr, 66),
|
||||||
REG_DWARFNUM_NAME("%ctr", 109),
|
REG_DWARFNUM_NAME(ctr, 109),
|
||||||
REG_DWARFNUM_NAME("%link", 108),
|
REG_DWARFNUM_NAME(link, 108),
|
||||||
REG_DWARFNUM_NAME("%xer", 101),
|
REG_DWARFNUM_NAME(xer, 101),
|
||||||
REG_DWARFNUM_NAME("%dar", 119),
|
REG_DWARFNUM_NAME(dar, 119),
|
||||||
REG_DWARFNUM_NAME("%dsisr", 118),
|
REG_DWARFNUM_NAME(dsisr, 118),
|
||||||
REG_DWARFNUM_END,
|
REG_DWARFNUM_END,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -86,3 +93,12 @@ const char *get_arch_regstr(unsigned int n)
|
||||||
return roff->name;
|
return roff->name;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int regs_query_register_offset(const char *name)
|
||||||
|
{
|
||||||
|
const struct pt_regs_dwarfnum *roff;
|
||||||
|
for (roff = regdwarfnum_table; roff->name != NULL; roff++)
|
||||||
|
if (!strcmp(roff->name, name))
|
||||||
|
return roff->ptregs_offset;
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
|
@ -19,12 +19,6 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
|
||||||
ehdr.e_type == ET_DYN;
|
ehdr.e_type == ET_DYN;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_CALL_ELF) && _CALL_ELF == 2
|
|
||||||
void arch__elf_sym_adjust(GElf_Sym *sym)
|
|
||||||
{
|
|
||||||
sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(_CALL_ELF) || _CALL_ELF != 2
|
#if !defined(_CALL_ELF) || _CALL_ELF != 2
|
||||||
|
@ -65,18 +59,45 @@ bool arch__prefers_symtab(void)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_LIBELF_SUPPORT
|
||||||
|
void arch__sym_update(struct symbol *s, GElf_Sym *sym)
|
||||||
|
{
|
||||||
|
s->arch_sym = sym->st_other;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#define PPC64LE_LEP_OFFSET 8
|
#define PPC64LE_LEP_OFFSET 8
|
||||||
|
|
||||||
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
|
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
|
||||||
struct probe_trace_event *tev, struct map *map)
|
struct probe_trace_event *tev, struct map *map,
|
||||||
|
struct symbol *sym)
|
||||||
{
|
{
|
||||||
|
int lep_offset;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ppc64 ABIv2 local entry point is currently always 2 instructions
|
* When probing at a function entry point, we normally always want the
|
||||||
* (8 bytes) after the global entry point.
|
* LEP since that catches calls to the function through both the GEP and
|
||||||
|
* the LEP. Hence, we would like to probe at an offset of 8 bytes if
|
||||||
|
* the user only specified the function entry.
|
||||||
|
*
|
||||||
|
* However, if the user specifies an offset, we fall back to using the
|
||||||
|
* GEP since all userspace applications (objdump/readelf) show function
|
||||||
|
* disassembly with offsets from the GEP.
|
||||||
|
*
|
||||||
|
* In addition, we shouldn't specify an offset for kretprobes.
|
||||||
*/
|
*/
|
||||||
if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
|
if (pev->point.offset || pev->point.retprobe || !map || !sym)
|
||||||
tev->point.address += PPC64LE_LEP_OFFSET;
|
return;
|
||||||
|
|
||||||
|
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
|
||||||
|
|
||||||
|
if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
|
||||||
tev->point.offset += PPC64LE_LEP_OFFSET;
|
tev->point.offset += PPC64LE_LEP_OFFSET;
|
||||||
|
else if (lep_offset) {
|
||||||
|
if (pev->uprobes)
|
||||||
|
tev->point.address += lep_offset;
|
||||||
|
else
|
||||||
|
tev->point.offset += lep_offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -4,3 +4,26 @@ endif
|
||||||
HAVE_KVM_STAT_SUPPORT := 1
|
HAVE_KVM_STAT_SUPPORT := 1
|
||||||
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
|
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
|
||||||
PERF_HAVE_JITDUMP := 1
|
PERF_HAVE_JITDUMP := 1
|
||||||
|
|
||||||
|
###
|
||||||
|
# Syscall table generation
|
||||||
|
#
|
||||||
|
|
||||||
|
out := $(OUTPUT)arch/x86/include/generated/asm
|
||||||
|
header := $(out)/syscalls_64.c
|
||||||
|
sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
|
||||||
|
systbl := $(sys)/syscalltbl.sh
|
||||||
|
|
||||||
|
# Create output directory if not already present
|
||||||
|
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
|
||||||
|
|
||||||
|
$(header): $(sys)/syscall_64.tbl $(systbl)
|
||||||
|
@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
|
||||||
|
(diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
|
||||||
|
|| echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true
|
||||||
|
$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
|
||||||
|
|
||||||
|
clean::
|
||||||
|
$(call QUIET_CLEAN, x86) $(RM) $(header)
|
||||||
|
|
||||||
|
archheaders: $(header)
|
||||||
|
|
376
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
Normal file
376
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
Normal file
|
@ -0,0 +1,376 @@
|
||||||
|
#
|
||||||
|
# 64-bit system call numbers and entry vectors
|
||||||
|
#
|
||||||
|
# The format is:
|
||||||
|
# <number> <abi> <name> <entry point>
|
||||||
|
#
|
||||||
|
# The abi is "common", "64" or "x32" for this file.
|
||||||
|
#
|
||||||
|
0 common read sys_read
|
||||||
|
1 common write sys_write
|
||||||
|
2 common open sys_open
|
||||||
|
3 common close sys_close
|
||||||
|
4 common stat sys_newstat
|
||||||
|
5 common fstat sys_newfstat
|
||||||
|
6 common lstat sys_newlstat
|
||||||
|
7 common poll sys_poll
|
||||||
|
8 common lseek sys_lseek
|
||||||
|
9 common mmap sys_mmap
|
||||||
|
10 common mprotect sys_mprotect
|
||||||
|
11 common munmap sys_munmap
|
||||||
|
12 common brk sys_brk
|
||||||
|
13 64 rt_sigaction sys_rt_sigaction
|
||||||
|
14 common rt_sigprocmask sys_rt_sigprocmask
|
||||||
|
15 64 rt_sigreturn sys_rt_sigreturn/ptregs
|
||||||
|
16 64 ioctl sys_ioctl
|
||||||
|
17 common pread64 sys_pread64
|
||||||
|
18 common pwrite64 sys_pwrite64
|
||||||
|
19 64 readv sys_readv
|
||||||
|
20 64 writev sys_writev
|
||||||
|
21 common access sys_access
|
||||||
|
22 common pipe sys_pipe
|
||||||
|
23 common select sys_select
|
||||||
|
24 common sched_yield sys_sched_yield
|
||||||
|
25 common mremap sys_mremap
|
||||||
|
26 common msync sys_msync
|
||||||
|
27 common mincore sys_mincore
|
||||||
|
28 common madvise sys_madvise
|
||||||
|
29 common shmget sys_shmget
|
||||||
|
30 common shmat sys_shmat
|
||||||
|
31 common shmctl sys_shmctl
|
||||||
|
32 common dup sys_dup
|
||||||
|
33 common dup2 sys_dup2
|
||||||
|
34 common pause sys_pause
|
||||||
|
35 common nanosleep sys_nanosleep
|
||||||
|
36 common getitimer sys_getitimer
|
||||||
|
37 common alarm sys_alarm
|
||||||
|
38 common setitimer sys_setitimer
|
||||||
|
39 common getpid sys_getpid
|
||||||
|
40 common sendfile sys_sendfile64
|
||||||
|
41 common socket sys_socket
|
||||||
|
42 common connect sys_connect
|
||||||
|
43 common accept sys_accept
|
||||||
|
44 common sendto sys_sendto
|
||||||
|
45 64 recvfrom sys_recvfrom
|
||||||
|
46 64 sendmsg sys_sendmsg
|
||||||
|
47 64 recvmsg sys_recvmsg
|
||||||
|
48 common shutdown sys_shutdown
|
||||||
|
49 common bind sys_bind
|
||||||
|
50 common listen sys_listen
|
||||||
|
51 common getsockname sys_getsockname
|
||||||
|
52 common getpeername sys_getpeername
|
||||||
|
53 common socketpair sys_socketpair
|
||||||
|
54 64 setsockopt sys_setsockopt
|
||||||
|
55 64 getsockopt sys_getsockopt
|
||||||
|
56 common clone sys_clone/ptregs
|
||||||
|
57 common fork sys_fork/ptregs
|
||||||
|
58 common vfork sys_vfork/ptregs
|
||||||
|
59 64 execve sys_execve/ptregs
|
||||||
|
60 common exit sys_exit
|
||||||
|
61 common wait4 sys_wait4
|
||||||
|
62 common kill sys_kill
|
||||||
|
63 common uname sys_newuname
|
||||||
|
64 common semget sys_semget
|
||||||
|
65 common semop sys_semop
|
||||||
|
66 common semctl sys_semctl
|
||||||
|
67 common shmdt sys_shmdt
|
||||||
|
68 common msgget sys_msgget
|
||||||
|
69 common msgsnd sys_msgsnd
|
||||||
|
70 common msgrcv sys_msgrcv
|
||||||
|
71 common msgctl sys_msgctl
|
||||||
|
72 common fcntl sys_fcntl
|
||||||
|
73 common flock sys_flock
|
||||||
|
74 common fsync sys_fsync
|
||||||
|
75 common fdatasync sys_fdatasync
|
||||||
|
76 common truncate sys_truncate
|
||||||
|
77 common ftruncate sys_ftruncate
|
||||||
|
78 common getdents sys_getdents
|
||||||
|
79 common getcwd sys_getcwd
|
||||||
|
80 common chdir sys_chdir
|
||||||
|
81 common fchdir sys_fchdir
|
||||||
|
82 common rename sys_rename
|
||||||
|
83 common mkdir sys_mkdir
|
||||||
|
84 common rmdir sys_rmdir
|
||||||
|
85 common creat sys_creat
|
||||||
|
86 common link sys_link
|
||||||
|
87 common unlink sys_unlink
|
||||||
|
88 common symlink sys_symlink
|
||||||
|
89 common readlink sys_readlink
|
||||||
|
90 common chmod sys_chmod
|
||||||
|
91 common fchmod sys_fchmod
|
||||||
|
92 common chown sys_chown
|
||||||
|
93 common fchown sys_fchown
|
||||||
|
94 common lchown sys_lchown
|
||||||
|
95 common umask sys_umask
|
||||||
|
96 common gettimeofday sys_gettimeofday
|
||||||
|
97 common getrlimit sys_getrlimit
|
||||||
|
98 common getrusage sys_getrusage
|
||||||
|
99 common sysinfo sys_sysinfo
|
||||||
|
100 common times sys_times
|
||||||
|
101 64 ptrace sys_ptrace
|
||||||
|
102 common getuid sys_getuid
|
||||||
|
103 common syslog sys_syslog
|
||||||
|
104 common getgid sys_getgid
|
||||||
|
105 common setuid sys_setuid
|
||||||
|
106 common setgid sys_setgid
|
||||||
|
107 common geteuid sys_geteuid
|
||||||
|
108 common getegid sys_getegid
|
||||||
|
109 common setpgid sys_setpgid
|
||||||
|
110 common getppid sys_getppid
|
||||||
|
111 common getpgrp sys_getpgrp
|
||||||
|
112 common setsid sys_setsid
|
||||||
|
113 common setreuid sys_setreuid
|
||||||
|
114 common setregid sys_setregid
|
||||||
|
115 common getgroups sys_getgroups
|
||||||
|
116 common setgroups sys_setgroups
|
||||||
|
117 common setresuid sys_setresuid
|
||||||
|
118 common getresuid sys_getresuid
|
||||||
|
119 common setresgid sys_setresgid
|
||||||
|
120 common getresgid sys_getresgid
|
||||||
|
121 common getpgid sys_getpgid
|
||||||
|
122 common setfsuid sys_setfsuid
|
||||||
|
123 common setfsgid sys_setfsgid
|
||||||
|
124 common getsid sys_getsid
|
||||||
|
125 common capget sys_capget
|
||||||
|
126 common capset sys_capset
|
||||||
|
127 64 rt_sigpending sys_rt_sigpending
|
||||||
|
128 64 rt_sigtimedwait sys_rt_sigtimedwait
|
||||||
|
129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
|
||||||
|
130 common rt_sigsuspend sys_rt_sigsuspend
|
||||||
|
131 64 sigaltstack sys_sigaltstack
|
||||||
|
132 common utime sys_utime
|
||||||
|
133 common mknod sys_mknod
|
||||||
|
134 64 uselib
|
||||||
|
135 common personality sys_personality
|
||||||
|
136 common ustat sys_ustat
|
||||||
|
137 common statfs sys_statfs
|
||||||
|
138 common fstatfs sys_fstatfs
|
||||||
|
139 common sysfs sys_sysfs
|
||||||
|
140 common getpriority sys_getpriority
|
||||||
|
141 common setpriority sys_setpriority
|
||||||
|
142 common sched_setparam sys_sched_setparam
|
||||||
|
143 common sched_getparam sys_sched_getparam
|
||||||
|
144 common sched_setscheduler sys_sched_setscheduler
|
||||||
|
145 common sched_getscheduler sys_sched_getscheduler
|
||||||
|
146 common sched_get_priority_max sys_sched_get_priority_max
|
||||||
|
147 common sched_get_priority_min sys_sched_get_priority_min
|
||||||
|
148 common sched_rr_get_interval sys_sched_rr_get_interval
|
||||||
|
149 common mlock sys_mlock
|
||||||
|
150 common munlock sys_munlock
|
||||||
|
151 common mlockall sys_mlockall
|
||||||
|
152 common munlockall sys_munlockall
|
||||||
|
153 common vhangup sys_vhangup
|
||||||
|
154 common modify_ldt sys_modify_ldt
|
||||||
|
155 common pivot_root sys_pivot_root
|
||||||
|
156 64 _sysctl sys_sysctl
|
||||||
|
157 common prctl sys_prctl
|
||||||
|
158 common arch_prctl sys_arch_prctl
|
||||||
|
159 common adjtimex sys_adjtimex
|
||||||
|
160 common setrlimit sys_setrlimit
|
||||||
|
161 common chroot sys_chroot
|
||||||
|
162 common sync sys_sync
|
||||||
|
163 common acct sys_acct
|
||||||
|
164 common settimeofday sys_settimeofday
|
||||||
|
165 common mount sys_mount
|
||||||
|
166 common umount2 sys_umount
|
||||||
|
167 common swapon sys_swapon
|
||||||
|
168 common swapoff sys_swapoff
|
||||||
|
169 common reboot sys_reboot
|
||||||
|
170 common sethostname sys_sethostname
|
||||||
|
171 common setdomainname sys_setdomainname
|
||||||
|
172 common iopl sys_iopl/ptregs
|
||||||
|
173 common ioperm sys_ioperm
|
||||||
|
174 64 create_module
|
||||||
|
175 common init_module sys_init_module
|
||||||
|
176 common delete_module sys_delete_module
|
||||||
|
177 64 get_kernel_syms
|
||||||
|
178 64 query_module
|
||||||
|
179 common quotactl sys_quotactl
|
||||||
|
180 64 nfsservctl
|
||||||
|
181 common getpmsg
|
||||||
|
182 common putpmsg
|
||||||
|
183 common afs_syscall
|
||||||
|
184 common tuxcall
|
||||||
|
185 common security
|
||||||
|
186 common gettid sys_gettid
|
||||||
|
187 common readahead sys_readahead
|
||||||
|
188 common setxattr sys_setxattr
|
||||||
|
189 common lsetxattr sys_lsetxattr
|
||||||
|
190 common fsetxattr sys_fsetxattr
|
||||||
|
191 common getxattr sys_getxattr
|
||||||
|
192 common lgetxattr sys_lgetxattr
|
||||||
|
193 common fgetxattr sys_fgetxattr
|
||||||
|
194 common listxattr sys_listxattr
|
||||||
|
195 common llistxattr sys_llistxattr
|
||||||
|
196 common flistxattr sys_flistxattr
|
||||||
|
197 common removexattr sys_removexattr
|
||||||
|
198 common lremovexattr sys_lremovexattr
|
||||||
|
199 common fremovexattr sys_fremovexattr
|
||||||
|
200 common tkill sys_tkill
|
||||||
|
201 common time sys_time
|
||||||
|
202 common futex sys_futex
|
||||||
|
203 common sched_setaffinity sys_sched_setaffinity
|
||||||
|
204 common sched_getaffinity sys_sched_getaffinity
|
||||||
|
205 64 set_thread_area
|
||||||
|
206 64 io_setup sys_io_setup
|
||||||
|
207 common io_destroy sys_io_destroy
|
||||||
|
208 common io_getevents sys_io_getevents
|
||||||
|
209 64 io_submit sys_io_submit
|
||||||
|
210 common io_cancel sys_io_cancel
|
||||||
|
211 64 get_thread_area
|
||||||
|
212 common lookup_dcookie sys_lookup_dcookie
|
||||||
|
213 common epoll_create sys_epoll_create
|
||||||
|
214 64 epoll_ctl_old
|
||||||
|
215 64 epoll_wait_old
|
||||||
|
216 common remap_file_pages sys_remap_file_pages
|
||||||
|
217 common getdents64 sys_getdents64
|
||||||
|
218 common set_tid_address sys_set_tid_address
|
||||||
|
219 common restart_syscall sys_restart_syscall
|
||||||
|
220 common semtimedop sys_semtimedop
|
||||||
|
221 common fadvise64 sys_fadvise64
|
||||||
|
222 64 timer_create sys_timer_create
|
||||||
|
223 common timer_settime sys_timer_settime
|
||||||
|
224 common timer_gettime sys_timer_gettime
|
||||||
|
225 common timer_getoverrun sys_timer_getoverrun
|
||||||
|
226 common timer_delete sys_timer_delete
|
||||||
|
227 common clock_settime sys_clock_settime
|
||||||
|
228 common clock_gettime sys_clock_gettime
|
||||||
|
229 common clock_getres sys_clock_getres
|
||||||
|
230 common clock_nanosleep sys_clock_nanosleep
|
||||||
|
231 common exit_group sys_exit_group
|
||||||
|
232 common epoll_wait sys_epoll_wait
|
||||||
|
233 common epoll_ctl sys_epoll_ctl
|
||||||
|
234 common tgkill sys_tgkill
|
||||||
|
235 common utimes sys_utimes
|
||||||
|
236 64 vserver
|
||||||
|
237 common mbind sys_mbind
|
||||||
|
238 common set_mempolicy sys_set_mempolicy
|
||||||
|
239 common get_mempolicy sys_get_mempolicy
|
||||||
|
240 common mq_open sys_mq_open
|
||||||
|
241 common mq_unlink sys_mq_unlink
|
||||||
|
242 common mq_timedsend sys_mq_timedsend
|
||||||
|
243 common mq_timedreceive sys_mq_timedreceive
|
||||||
|
244 64 mq_notify sys_mq_notify
|
||||||
|
245 common mq_getsetattr sys_mq_getsetattr
|
||||||
|
246 64 kexec_load sys_kexec_load
|
||||||
|
247 64 waitid sys_waitid
|
||||||
|
248 common add_key sys_add_key
|
||||||
|
249 common request_key sys_request_key
|
||||||
|
250 common keyctl sys_keyctl
|
||||||
|
251 common ioprio_set sys_ioprio_set
|
||||||
|
252 common ioprio_get sys_ioprio_get
|
||||||
|
253 common inotify_init sys_inotify_init
|
||||||
|
254 common inotify_add_watch sys_inotify_add_watch
|
||||||
|
255 common inotify_rm_watch sys_inotify_rm_watch
|
||||||
|
256 common migrate_pages sys_migrate_pages
|
||||||
|
257 common openat sys_openat
|
||||||
|
258 common mkdirat sys_mkdirat
|
||||||
|
259 common mknodat sys_mknodat
|
||||||
|
260 common fchownat sys_fchownat
|
||||||
|
261 common futimesat sys_futimesat
|
||||||
|
262 common newfstatat sys_newfstatat
|
||||||
|
263 common unlinkat sys_unlinkat
|
||||||
|
264 common renameat sys_renameat
|
||||||
|
265 common linkat sys_linkat
|
||||||
|
266 common symlinkat sys_symlinkat
|
||||||
|
267 common readlinkat sys_readlinkat
|
||||||
|
268 common fchmodat sys_fchmodat
|
||||||
|
269 common faccessat sys_faccessat
|
||||||
|
270 common pselect6 sys_pselect6
|
||||||
|
271 common ppoll sys_ppoll
|
||||||
|
272 common unshare sys_unshare
|
||||||
|
273 64 set_robust_list sys_set_robust_list
|
||||||
|
274 64 get_robust_list sys_get_robust_list
|
||||||
|
275 common splice sys_splice
|
||||||
|
276 common tee sys_tee
|
||||||
|
277 common sync_file_range sys_sync_file_range
|
||||||
|
278 64 vmsplice sys_vmsplice
|
||||||
|
279 64 move_pages sys_move_pages
|
||||||
|
280 common utimensat sys_utimensat
|
||||||
|
281 common epoll_pwait sys_epoll_pwait
|
||||||
|
282 common signalfd sys_signalfd
|
||||||
|
283 common timerfd_create sys_timerfd_create
|
||||||
|
284 common eventfd sys_eventfd
|
||||||
|
285 common fallocate sys_fallocate
|
||||||
|
286 common timerfd_settime sys_timerfd_settime
|
||||||
|
287 common timerfd_gettime sys_timerfd_gettime
|
||||||
|
288 common accept4 sys_accept4
|
||||||
|
289 common signalfd4 sys_signalfd4
|
||||||
|
290 common eventfd2 sys_eventfd2
|
||||||
|
291 common epoll_create1 sys_epoll_create1
|
||||||
|
292 common dup3 sys_dup3
|
||||||
|
293 common pipe2 sys_pipe2
|
||||||
|
294 common inotify_init1 sys_inotify_init1
|
||||||
|
295 64 preadv sys_preadv
|
||||||
|
296 64 pwritev sys_pwritev
|
||||||
|
297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
|
||||||
|
298 common perf_event_open sys_perf_event_open
|
||||||
|
299 64 recvmmsg sys_recvmmsg
|
||||||
|
300 common fanotify_init sys_fanotify_init
|
||||||
|
301 common fanotify_mark sys_fanotify_mark
|
||||||
|
302 common prlimit64 sys_prlimit64
|
||||||
|
303 common name_to_handle_at sys_name_to_handle_at
|
||||||
|
304 common open_by_handle_at sys_open_by_handle_at
|
||||||
|
305 common clock_adjtime sys_clock_adjtime
|
||||||
|
306 common syncfs sys_syncfs
|
||||||
|
307 64 sendmmsg sys_sendmmsg
|
||||||
|
308 common setns sys_setns
|
||||||
|
309 common getcpu sys_getcpu
|
||||||
|
310 64 process_vm_readv sys_process_vm_readv
|
||||||
|
311 64 process_vm_writev sys_process_vm_writev
|
||||||
|
312 common kcmp sys_kcmp
|
||||||
|
313 common finit_module sys_finit_module
|
||||||
|
314 common sched_setattr sys_sched_setattr
|
||||||
|
315 common sched_getattr sys_sched_getattr
|
||||||
|
316 common renameat2 sys_renameat2
|
||||||
|
317 common seccomp sys_seccomp
|
||||||
|
318 common getrandom sys_getrandom
|
||||||
|
319 common memfd_create sys_memfd_create
|
||||||
|
320 common kexec_file_load sys_kexec_file_load
|
||||||
|
321 common bpf sys_bpf
|
||||||
|
322 64 execveat sys_execveat/ptregs
|
||||||
|
323 common userfaultfd sys_userfaultfd
|
||||||
|
324 common membarrier sys_membarrier
|
||||||
|
325 common mlock2 sys_mlock2
|
||||||
|
326 common copy_file_range sys_copy_file_range
|
||||||
|
327 64 preadv2 sys_preadv2
|
||||||
|
328 64 pwritev2 sys_pwritev2
|
||||||
|
|
||||||
|
#
|
||||||
|
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||||
|
# for native 64-bit operation.
|
||||||
|
#
|
||||||
|
512 x32 rt_sigaction compat_sys_rt_sigaction
|
||||||
|
513 x32 rt_sigreturn sys32_x32_rt_sigreturn
|
||||||
|
514 x32 ioctl compat_sys_ioctl
|
||||||
|
515 x32 readv compat_sys_readv
|
||||||
|
516 x32 writev compat_sys_writev
|
||||||
|
517 x32 recvfrom compat_sys_recvfrom
|
||||||
|
518 x32 sendmsg compat_sys_sendmsg
|
||||||
|
519 x32 recvmsg compat_sys_recvmsg
|
||||||
|
520 x32 execve compat_sys_execve/ptregs
|
||||||
|
521 x32 ptrace compat_sys_ptrace
|
||||||
|
522 x32 rt_sigpending compat_sys_rt_sigpending
|
||||||
|
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
|
||||||
|
524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
|
||||||
|
525 x32 sigaltstack compat_sys_sigaltstack
|
||||||
|
526 x32 timer_create compat_sys_timer_create
|
||||||
|
527 x32 mq_notify compat_sys_mq_notify
|
||||||
|
528 x32 kexec_load compat_sys_kexec_load
|
||||||
|
529 x32 waitid compat_sys_waitid
|
||||||
|
530 x32 set_robust_list compat_sys_set_robust_list
|
||||||
|
531 x32 get_robust_list compat_sys_get_robust_list
|
||||||
|
532 x32 vmsplice compat_sys_vmsplice
|
||||||
|
533 x32 move_pages compat_sys_move_pages
|
||||||
|
534 x32 preadv compat_sys_preadv64
|
||||||
|
535 x32 pwritev compat_sys_pwritev64
|
||||||
|
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
|
||||||
|
537 x32 recvmmsg compat_sys_recvmmsg
|
||||||
|
538 x32 sendmmsg compat_sys_sendmmsg
|
||||||
|
539 x32 process_vm_readv compat_sys_process_vm_readv
|
||||||
|
540 x32 process_vm_writev compat_sys_process_vm_writev
|
||||||
|
541 x32 setsockopt compat_sys_setsockopt
|
||||||
|
542 x32 getsockopt compat_sys_getsockopt
|
||||||
|
543 x32 io_setup compat_sys_io_setup
|
||||||
|
544 x32 io_submit compat_sys_io_submit
|
||||||
|
545 x32 execveat compat_sys_execveat/ptregs
|
39
tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
Executable file
39
tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
Executable file
|
@ -0,0 +1,39 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
in="$1"
|
||||||
|
arch="$2"
|
||||||
|
|
||||||
|
syscall_macro() {
|
||||||
|
nr="$1"
|
||||||
|
name="$2"
|
||||||
|
|
||||||
|
echo " [$nr] = \"$name\","
|
||||||
|
}
|
||||||
|
|
||||||
|
emit() {
|
||||||
|
nr="$1"
|
||||||
|
entry="$2"
|
||||||
|
|
||||||
|
syscall_macro "$nr" "$entry"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "static const char *syscalltbl_${arch}[] = {"
|
||||||
|
|
||||||
|
sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
|
||||||
|
grep '^[0-9]' "$in" | sort -n > $sorted_table
|
||||||
|
|
||||||
|
max_nr=0
|
||||||
|
while read nr abi name entry compat; do
|
||||||
|
if [ $nr -ge 512 ] ; then # discard compat sycalls
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
emit "$nr" "$name"
|
||||||
|
max_nr=$nr
|
||||||
|
done < $sorted_table
|
||||||
|
|
||||||
|
rm -f $sorted_table
|
||||||
|
|
||||||
|
echo "};"
|
||||||
|
|
||||||
|
echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"
|
|
@ -71,7 +71,7 @@ int test__perf_time_to_tsc(int subtest __maybe_unused)
|
||||||
|
|
||||||
CHECK__(parse_events(evlist, "cycles:u", NULL));
|
CHECK__(parse_events(evlist, "cycles:u", NULL));
|
||||||
|
|
||||||
perf_evlist__config(evlist, &opts);
|
perf_evlist__config(evlist, &opts, NULL);
|
||||||
|
|
||||||
evsel = perf_evlist__first(evlist);
|
evsel = perf_evlist__first(evlist);
|
||||||
|
|
||||||
|
|
|
@ -438,6 +438,11 @@ struct auxtrace_record *intel_bts_recording_init(int *err)
|
||||||
if (!intel_bts_pmu)
|
if (!intel_bts_pmu)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
|
||||||
|
*err = -errno;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
btsr = zalloc(sizeof(struct intel_bts_recording));
|
btsr = zalloc(sizeof(struct intel_bts_recording));
|
||||||
if (!btsr) {
|
if (!btsr) {
|
||||||
*err = -ENOMEM;
|
*err = -ENOMEM;
|
||||||
|
|
|
@ -1027,6 +1027,11 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
|
||||||
if (!intel_pt_pmu)
|
if (!intel_pt_pmu)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
|
||||||
|
*err = -errno;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
ptr = zalloc(sizeof(struct intel_pt_recording));
|
ptr = zalloc(sizeof(struct intel_pt_recording));
|
||||||
if (!ptr) {
|
if (!ptr) {
|
||||||
*err = -ENOMEM;
|
*err = -ENOMEM;
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include "../../util/debug.h"
|
#include "../../util/debug.h"
|
||||||
#include "../../util/tsc.h"
|
#include "../../util/tsc.h"
|
||||||
#include "tsc.h"
|
|
||||||
|
|
||||||
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
|
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
|
||||||
struct perf_tsc_conversion *tc)
|
struct perf_tsc_conversion *tc)
|
||||||
|
@ -46,3 +45,34 @@ u64 rdtsc(void)
|
||||||
|
|
||||||
return low | ((u64)high) << 32;
|
return low | ((u64)high) << 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
|
||||||
|
struct perf_tool *tool,
|
||||||
|
perf_event__handler_t process,
|
||||||
|
struct machine *machine)
|
||||||
|
{
|
||||||
|
union perf_event event = {
|
||||||
|
.time_conv = {
|
||||||
|
.header = {
|
||||||
|
.type = PERF_RECORD_TIME_CONV,
|
||||||
|
.size = sizeof(struct time_conv_event),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
struct perf_tsc_conversion tc;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = perf_read_tsc_conversion(pc, &tc);
|
||||||
|
if (err == -EOPNOTSUPP)
|
||||||
|
return 0;
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
pr_debug2("Synthesizing TSC conversion information\n");
|
||||||
|
|
||||||
|
event.time_conv.time_mult = tc.time_mult;
|
||||||
|
event.time_conv.time_shift = tc.time_shift;
|
||||||
|
event.time_conv.time_zero = tc.time_zero;
|
||||||
|
|
||||||
|
return process(tool, &event, NULL, machine);
|
||||||
|
}
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
|
|
||||||
#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
|
|
||||||
|
|
||||||
#include <linux/types.h>
|
|
||||||
|
|
||||||
struct perf_tsc_conversion {
|
|
||||||
u16 time_shift;
|
|
||||||
u32 time_mult;
|
|
||||||
u64 time_zero;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct perf_event_mmap_page;
|
|
||||||
|
|
||||||
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
|
|
||||||
struct perf_tsc_conversion *tc);
|
|
||||||
|
|
||||||
#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
|
|
|
@ -83,7 +83,7 @@ static void *workerfn(void *arg)
|
||||||
do {
|
do {
|
||||||
int ret;
|
int ret;
|
||||||
again:
|
again:
|
||||||
ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
|
ret = futex_lock_pi(w->futex, NULL, futex_flag);
|
||||||
|
|
||||||
if (ret) { /* handle lock acquisition */
|
if (ret) { /* handle lock acquisition */
|
||||||
if (!silent)
|
if (!silent)
|
||||||
|
|
|
@ -57,13 +57,11 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* futex_lock_pi() - block on uaddr as a PI mutex
|
* futex_lock_pi() - block on uaddr as a PI mutex
|
||||||
* @detect: whether (1) or not (0) to perform deadlock detection
|
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
|
futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
|
||||||
int opflags)
|
|
||||||
{
|
{
|
||||||
return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
|
return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
* Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
|
* Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "debug.h"
|
||||||
#include "../perf.h"
|
#include "../perf.h"
|
||||||
#include "../util/util.h"
|
#include "../util/util.h"
|
||||||
#include <subcmd/parse-options.h>
|
#include <subcmd/parse-options.h>
|
||||||
|
@ -63,14 +64,16 @@ static struct perf_event_attr cycle_attr = {
|
||||||
.config = PERF_COUNT_HW_CPU_CYCLES
|
.config = PERF_COUNT_HW_CPU_CYCLES
|
||||||
};
|
};
|
||||||
|
|
||||||
static void init_cycles(void)
|
static int init_cycles(void)
|
||||||
{
|
{
|
||||||
cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
|
cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
|
||||||
|
|
||||||
if (cycles_fd < 0 && errno == ENOSYS)
|
if (cycles_fd < 0 && errno == ENOSYS) {
|
||||||
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
|
pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
|
||||||
else
|
return -1;
|
||||||
BUG_ON(cycles_fd < 0);
|
}
|
||||||
|
|
||||||
|
return cycles_fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 get_cycles(void)
|
static u64 get_cycles(void)
|
||||||
|
@ -155,8 +158,13 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
|
||||||
|
|
||||||
argc = parse_options(argc, argv, options, info->usage, 0);
|
argc = parse_options(argc, argv, options, info->usage, 0);
|
||||||
|
|
||||||
if (use_cycles)
|
if (use_cycles) {
|
||||||
init_cycles();
|
i = init_cycles();
|
||||||
|
if (i < 0) {
|
||||||
|
fprintf(stderr, "Failed to open cycles counter\n");
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
size = (size_t)perf_atoll((char *)size_str);
|
size = (size_t)perf_atoll((char *)size_str);
|
||||||
size_total = (double)size * nr_loops;
|
size_total = (double)size * nr_loops;
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <subcmd/parse-options.h>
|
#include <subcmd/parse-options.h>
|
||||||
#include "util/util.h"
|
#include "util/util.h"
|
||||||
#include "util/debug.h"
|
#include "util/debug.h"
|
||||||
|
#include "util/config.h"
|
||||||
|
|
||||||
static bool use_system_config, use_user_config;
|
static bool use_system_config, use_user_config;
|
||||||
|
|
||||||
|
@ -32,13 +33,28 @@ static struct option config_options[] = {
|
||||||
OPT_END()
|
OPT_END()
|
||||||
};
|
};
|
||||||
|
|
||||||
static int show_config(const char *key, const char *value,
|
static int show_config(struct perf_config_set *set)
|
||||||
void *cb __maybe_unused)
|
|
||||||
{
|
{
|
||||||
if (value)
|
struct perf_config_section *section;
|
||||||
printf("%s=%s\n", key, value);
|
struct perf_config_item *item;
|
||||||
else
|
struct list_head *sections;
|
||||||
printf("%s\n", key);
|
|
||||||
|
if (set == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
sections = &set->sections;
|
||||||
|
if (list_empty(sections))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
list_for_each_entry(section, sections, node) {
|
||||||
|
list_for_each_entry(item, §ion->items, node) {
|
||||||
|
char *value = item->value;
|
||||||
|
|
||||||
|
if (value)
|
||||||
|
printf("%s.%s=%s\n", section->name,
|
||||||
|
item->name, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -46,6 +62,7 @@ static int show_config(const char *key, const char *value,
|
||||||
int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
|
int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
struct perf_config_set *set;
|
||||||
char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
|
char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
|
||||||
|
|
||||||
argc = parse_options(argc, argv, config_options, config_usage,
|
argc = parse_options(argc, argv, config_options, config_usage,
|
||||||
|
@ -63,13 +80,19 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
else if (use_user_config)
|
else if (use_user_config)
|
||||||
config_exclusive_filename = user_config;
|
config_exclusive_filename = user_config;
|
||||||
|
|
||||||
|
set = perf_config_set__new();
|
||||||
|
if (!set) {
|
||||||
|
ret = -1;
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
|
||||||
switch (actions) {
|
switch (actions) {
|
||||||
case ACTION_LIST:
|
case ACTION_LIST:
|
||||||
if (argc) {
|
if (argc) {
|
||||||
pr_err("Error: takes no arguments\n");
|
pr_err("Error: takes no arguments\n");
|
||||||
parse_options_usage(config_usage, config_options, "l", 1);
|
parse_options_usage(config_usage, config_options, "l", 1);
|
||||||
} else {
|
} else {
|
||||||
ret = perf_config(show_config, NULL);
|
ret = show_config(set);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
const char * config_filename = config_exclusive_filename;
|
const char * config_filename = config_exclusive_filename;
|
||||||
if (!config_exclusive_filename)
|
if (!config_exclusive_filename)
|
||||||
|
@ -83,5 +106,7 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
usage_with_options(config_usage, config_options);
|
usage_with_options(config_usage, config_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
perf_config_set__delete(set);
|
||||||
|
out_err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -428,7 +428,7 @@ static void hists__baseline_only(struct hists *hists)
|
||||||
struct rb_root *root;
|
struct rb_root *root;
|
||||||
struct rb_node *next;
|
struct rb_node *next;
|
||||||
|
|
||||||
if (sort__need_collapse)
|
if (hists__has(hists, need_collapse))
|
||||||
root = &hists->entries_collapsed;
|
root = &hists->entries_collapsed;
|
||||||
else
|
else
|
||||||
root = hists->entries_in;
|
root = hists->entries_in;
|
||||||
|
@ -450,7 +450,7 @@ static void hists__precompute(struct hists *hists)
|
||||||
struct rb_root *root;
|
struct rb_root *root;
|
||||||
struct rb_node *next;
|
struct rb_node *next;
|
||||||
|
|
||||||
if (sort__need_collapse)
|
if (hists__has(hists, need_collapse))
|
||||||
root = &hists->entries_collapsed;
|
root = &hists->entries_collapsed;
|
||||||
else
|
else
|
||||||
root = hists->entries_in;
|
root = hists->entries_in;
|
||||||
|
|
|
@ -61,6 +61,7 @@ static int check_emacsclient_version(void)
|
||||||
struct child_process ec_process;
|
struct child_process ec_process;
|
||||||
const char *argv_ec[] = { "emacsclient", "--version", NULL };
|
const char *argv_ec[] = { "emacsclient", "--version", NULL };
|
||||||
int version;
|
int version;
|
||||||
|
int ret = -1;
|
||||||
|
|
||||||
/* emacsclient prints its version number on stderr */
|
/* emacsclient prints its version number on stderr */
|
||||||
memset(&ec_process, 0, sizeof(ec_process));
|
memset(&ec_process, 0, sizeof(ec_process));
|
||||||
|
@ -71,7 +72,10 @@ static int check_emacsclient_version(void)
|
||||||
fprintf(stderr, "Failed to start emacsclient.\n");
|
fprintf(stderr, "Failed to start emacsclient.\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
strbuf_read(&buffer, ec_process.err, 20);
|
if (strbuf_read(&buffer, ec_process.err, 20) < 0) {
|
||||||
|
fprintf(stderr, "Failed to read emacsclient version\n");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
close(ec_process.err);
|
close(ec_process.err);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -82,8 +86,7 @@ static int check_emacsclient_version(void)
|
||||||
|
|
||||||
if (prefixcmp(buffer.buf, "emacsclient")) {
|
if (prefixcmp(buffer.buf, "emacsclient")) {
|
||||||
fprintf(stderr, "Failed to parse emacsclient version.\n");
|
fprintf(stderr, "Failed to parse emacsclient version.\n");
|
||||||
strbuf_release(&buffer);
|
goto out;
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
version = atoi(buffer.buf + strlen("emacsclient"));
|
version = atoi(buffer.buf + strlen("emacsclient"));
|
||||||
|
@ -92,12 +95,11 @@ static int check_emacsclient_version(void)
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"emacsclient version '%d' too old (< 22).\n",
|
"emacsclient version '%d' too old (< 22).\n",
|
||||||
version);
|
version);
|
||||||
strbuf_release(&buffer);
|
} else
|
||||||
return -1;
|
ret = 0;
|
||||||
}
|
out:
|
||||||
|
|
||||||
strbuf_release(&buffer);
|
strbuf_release(&buffer);
|
||||||
return 0;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void exec_woman_emacs(const char *path, const char *page)
|
static void exec_woman_emacs(const char *path, const char *page)
|
||||||
|
|
|
@ -748,6 +748,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
.auxtrace_info = perf_event__repipe_op2_synth,
|
.auxtrace_info = perf_event__repipe_op2_synth,
|
||||||
.auxtrace = perf_event__repipe_auxtrace,
|
.auxtrace = perf_event__repipe_auxtrace,
|
||||||
.auxtrace_error = perf_event__repipe_op2_synth,
|
.auxtrace_error = perf_event__repipe_op2_synth,
|
||||||
|
.time_conv = perf_event__repipe_op2_synth,
|
||||||
.finished_round = perf_event__repipe_oe_synth,
|
.finished_round = perf_event__repipe_oe_synth,
|
||||||
.build_id = perf_event__repipe_op2_synth,
|
.build_id = perf_event__repipe_op2_synth,
|
||||||
.id_index = perf_event__repipe_op2_synth,
|
.id_index = perf_event__repipe_op2_synth,
|
||||||
|
|
|
@ -375,7 +375,7 @@ static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
|
||||||
}
|
}
|
||||||
|
|
||||||
al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
|
al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
|
||||||
sample__resolve_callchain(sample, NULL, evsel, &al, 16);
|
sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
|
||||||
|
|
||||||
callchain_cursor_commit(&callchain_cursor);
|
callchain_cursor_commit(&callchain_cursor);
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|
|
@ -982,7 +982,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
|
||||||
struct perf_evlist *evlist = kvm->evlist;
|
struct perf_evlist *evlist = kvm->evlist;
|
||||||
char sbuf[STRERR_BUFSIZE];
|
char sbuf[STRERR_BUFSIZE];
|
||||||
|
|
||||||
perf_evlist__config(evlist, &kvm->opts);
|
perf_evlist__config(evlist, &kvm->opts, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: exclude_{guest,host} do not apply here.
|
* Note: exclude_{guest,host} do not apply here.
|
||||||
|
|
|
@ -62,19 +62,22 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
|
||||||
int rec_argc, i = 0, j;
|
int rec_argc, i = 0, j;
|
||||||
const char **rec_argv;
|
const char **rec_argv;
|
||||||
int ret;
|
int ret;
|
||||||
|
bool all_user = false, all_kernel = false;
|
||||||
struct option options[] = {
|
struct option options[] = {
|
||||||
OPT_CALLBACK('e', "event", &mem, "event",
|
OPT_CALLBACK('e', "event", &mem, "event",
|
||||||
"event selector. use 'perf mem record -e list' to list available events",
|
"event selector. use 'perf mem record -e list' to list available events",
|
||||||
parse_record_events),
|
parse_record_events),
|
||||||
OPT_INCR('v', "verbose", &verbose,
|
OPT_INCR('v', "verbose", &verbose,
|
||||||
"be more verbose (show counter open errors, etc)"),
|
"be more verbose (show counter open errors, etc)"),
|
||||||
|
OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"),
|
||||||
|
OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"),
|
||||||
OPT_END()
|
OPT_END()
|
||||||
};
|
};
|
||||||
|
|
||||||
argc = parse_options(argc, argv, options, record_mem_usage,
|
argc = parse_options(argc, argv, options, record_mem_usage,
|
||||||
PARSE_OPT_STOP_AT_NON_OPTION);
|
PARSE_OPT_STOP_AT_NON_OPTION);
|
||||||
|
|
||||||
rec_argc = argc + 7; /* max number of arguments */
|
rec_argc = argc + 9; /* max number of arguments */
|
||||||
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
rec_argv = calloc(rec_argc + 1, sizeof(char *));
|
||||||
if (!rec_argv)
|
if (!rec_argv)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -103,6 +106,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
|
||||||
rec_argv[i++] = perf_mem_events__name(j);
|
rec_argv[i++] = perf_mem_events__name(j);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (all_user)
|
||||||
|
rec_argv[i++] = "--all-user";
|
||||||
|
|
||||||
|
if (all_kernel)
|
||||||
|
rec_argv[i++] = "--all-kernel";
|
||||||
|
|
||||||
for (j = 0; j < argc; j++, i++)
|
for (j = 0; j < argc; j++, i++)
|
||||||
rec_argv[i] = argv[j];
|
rec_argv[i] = argv[j];
|
||||||
|
|
||||||
|
|
|
@ -29,10 +29,12 @@
|
||||||
#include "util/data.h"
|
#include "util/data.h"
|
||||||
#include "util/perf_regs.h"
|
#include "util/perf_regs.h"
|
||||||
#include "util/auxtrace.h"
|
#include "util/auxtrace.h"
|
||||||
|
#include "util/tsc.h"
|
||||||
#include "util/parse-branch-options.h"
|
#include "util/parse-branch-options.h"
|
||||||
#include "util/parse-regs-options.h"
|
#include "util/parse-regs-options.h"
|
||||||
#include "util/llvm-utils.h"
|
#include "util/llvm-utils.h"
|
||||||
#include "util/bpf-loader.h"
|
#include "util/bpf-loader.h"
|
||||||
|
#include "util/trigger.h"
|
||||||
#include "asm/bug.h"
|
#include "asm/bug.h"
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
@ -55,6 +57,8 @@ struct record {
|
||||||
bool no_buildid_cache;
|
bool no_buildid_cache;
|
||||||
bool no_buildid_cache_set;
|
bool no_buildid_cache_set;
|
||||||
bool buildid_all;
|
bool buildid_all;
|
||||||
|
bool timestamp_filename;
|
||||||
|
bool switch_output;
|
||||||
unsigned long long samples;
|
unsigned long long samples;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -124,9 +128,10 @@ out:
|
||||||
static volatile int done;
|
static volatile int done;
|
||||||
static volatile int signr = -1;
|
static volatile int signr = -1;
|
||||||
static volatile int child_finished;
|
static volatile int child_finished;
|
||||||
static volatile int auxtrace_snapshot_enabled;
|
|
||||||
static volatile int auxtrace_snapshot_err;
|
|
||||||
static volatile int auxtrace_record__snapshot_started;
|
static volatile int auxtrace_record__snapshot_started;
|
||||||
|
static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
|
||||||
|
static DEFINE_TRIGGER(switch_output_trigger);
|
||||||
|
|
||||||
static void sig_handler(int sig)
|
static void sig_handler(int sig)
|
||||||
{
|
{
|
||||||
|
@ -244,11 +249,12 @@ static void record__read_auxtrace_snapshot(struct record *rec)
|
||||||
{
|
{
|
||||||
pr_debug("Recording AUX area tracing snapshot\n");
|
pr_debug("Recording AUX area tracing snapshot\n");
|
||||||
if (record__auxtrace_read_snapshot_all(rec) < 0) {
|
if (record__auxtrace_read_snapshot_all(rec) < 0) {
|
||||||
auxtrace_snapshot_err = -1;
|
trigger_error(&auxtrace_snapshot_trigger);
|
||||||
} else {
|
} else {
|
||||||
auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
|
if (auxtrace_record__snapshot_finish(rec->itr))
|
||||||
if (!auxtrace_snapshot_err)
|
trigger_error(&auxtrace_snapshot_trigger);
|
||||||
auxtrace_snapshot_enabled = 1;
|
else
|
||||||
|
trigger_ready(&auxtrace_snapshot_trigger);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -283,7 +289,7 @@ static int record__open(struct record *rec)
|
||||||
struct record_opts *opts = &rec->opts;
|
struct record_opts *opts = &rec->opts;
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
perf_evlist__config(evlist, opts);
|
perf_evlist__config(evlist, opts, &callchain_param);
|
||||||
|
|
||||||
evlist__for_each(evlist, pos) {
|
evlist__for_each(evlist, pos) {
|
||||||
try_again:
|
try_again:
|
||||||
|
@ -494,6 +500,73 @@ record__finish_output(struct record *rec)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int record__synthesize_workload(struct record *rec)
|
||||||
|
{
|
||||||
|
struct {
|
||||||
|
struct thread_map map;
|
||||||
|
struct thread_map_data map_data;
|
||||||
|
} thread_map;
|
||||||
|
|
||||||
|
thread_map.map.nr = 1;
|
||||||
|
thread_map.map.map[0].pid = rec->evlist->workload.pid;
|
||||||
|
thread_map.map.map[0].comm = NULL;
|
||||||
|
return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
|
||||||
|
process_synthesized_event,
|
||||||
|
&rec->session->machines.host,
|
||||||
|
rec->opts.sample_address,
|
||||||
|
rec->opts.proc_map_timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int record__synthesize(struct record *rec);
|
||||||
|
|
||||||
|
static int
|
||||||
|
record__switch_output(struct record *rec, bool at_exit)
|
||||||
|
{
|
||||||
|
struct perf_data_file *file = &rec->file;
|
||||||
|
int fd, err;
|
||||||
|
|
||||||
|
/* Same Size: "2015122520103046"*/
|
||||||
|
char timestamp[] = "InvalidTimestamp";
|
||||||
|
|
||||||
|
rec->samples = 0;
|
||||||
|
record__finish_output(rec);
|
||||||
|
err = fetch_current_timestamp(timestamp, sizeof(timestamp));
|
||||||
|
if (err) {
|
||||||
|
pr_err("Failed to get current timestamp\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = perf_data_file__switch(file, timestamp,
|
||||||
|
rec->session->header.data_offset,
|
||||||
|
at_exit);
|
||||||
|
if (fd >= 0 && !at_exit) {
|
||||||
|
rec->bytes_written = 0;
|
||||||
|
rec->session->header.data_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!quiet)
|
||||||
|
fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
|
||||||
|
file->path, timestamp);
|
||||||
|
|
||||||
|
/* Output tracking events */
|
||||||
|
if (!at_exit) {
|
||||||
|
record__synthesize(rec);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In 'perf record --switch-output' without -a,
|
||||||
|
* record__synthesize() in record__switch_output() won't
|
||||||
|
* generate tracking events because there's no thread_map
|
||||||
|
* in evlist. Which causes newly created perf.data doesn't
|
||||||
|
* contain map and comm information.
|
||||||
|
* Create a fake thread_map and directly call
|
||||||
|
* perf_event__synthesize_thread_map() for those events.
|
||||||
|
*/
|
||||||
|
if (target__none(&rec->opts.target))
|
||||||
|
record__synthesize_workload(rec);
|
||||||
|
}
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
static volatile int workload_exec_errno;
|
static volatile int workload_exec_errno;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -512,6 +585,15 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
|
||||||
|
|
||||||
static void snapshot_sig_handler(int sig);
|
static void snapshot_sig_handler(int sig);
|
||||||
|
|
||||||
|
int __weak
|
||||||
|
perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
|
||||||
|
struct perf_tool *tool __maybe_unused,
|
||||||
|
perf_event__handler_t process __maybe_unused,
|
||||||
|
struct machine *machine __maybe_unused)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int record__synthesize(struct record *rec)
|
static int record__synthesize(struct record *rec)
|
||||||
{
|
{
|
||||||
struct perf_session *session = rec->session;
|
struct perf_session *session = rec->session;
|
||||||
|
@ -549,6 +631,11 @@ static int record__synthesize(struct record *rec)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
|
||||||
|
process_synthesized_event, machine);
|
||||||
|
if (err)
|
||||||
|
goto out;
|
||||||
|
|
||||||
if (rec->opts.full_auxtrace) {
|
if (rec->opts.full_auxtrace) {
|
||||||
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
|
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
|
||||||
session, process_synthesized_event);
|
session, process_synthesized_event);
|
||||||
|
@ -600,10 +687,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||||
signal(SIGCHLD, sig_handler);
|
signal(SIGCHLD, sig_handler);
|
||||||
signal(SIGINT, sig_handler);
|
signal(SIGINT, sig_handler);
|
||||||
signal(SIGTERM, sig_handler);
|
signal(SIGTERM, sig_handler);
|
||||||
if (rec->opts.auxtrace_snapshot_mode)
|
|
||||||
|
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
|
||||||
signal(SIGUSR2, snapshot_sig_handler);
|
signal(SIGUSR2, snapshot_sig_handler);
|
||||||
else
|
if (rec->opts.auxtrace_snapshot_mode)
|
||||||
|
trigger_on(&auxtrace_snapshot_trigger);
|
||||||
|
if (rec->switch_output)
|
||||||
|
trigger_on(&switch_output_trigger);
|
||||||
|
} else {
|
||||||
signal(SIGUSR2, SIG_IGN);
|
signal(SIGUSR2, SIG_IGN);
|
||||||
|
}
|
||||||
|
|
||||||
session = perf_session__new(file, false, tool);
|
session = perf_session__new(file, false, tool);
|
||||||
if (session == NULL) {
|
if (session == NULL) {
|
||||||
|
@ -729,27 +822,45 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||||
perf_evlist__enable(rec->evlist);
|
perf_evlist__enable(rec->evlist);
|
||||||
}
|
}
|
||||||
|
|
||||||
auxtrace_snapshot_enabled = 1;
|
trigger_ready(&auxtrace_snapshot_trigger);
|
||||||
|
trigger_ready(&switch_output_trigger);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
unsigned long long hits = rec->samples;
|
unsigned long long hits = rec->samples;
|
||||||
|
|
||||||
if (record__mmap_read_all(rec) < 0) {
|
if (record__mmap_read_all(rec) < 0) {
|
||||||
auxtrace_snapshot_enabled = 0;
|
trigger_error(&auxtrace_snapshot_trigger);
|
||||||
|
trigger_error(&switch_output_trigger);
|
||||||
err = -1;
|
err = -1;
|
||||||
goto out_child;
|
goto out_child;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (auxtrace_record__snapshot_started) {
|
if (auxtrace_record__snapshot_started) {
|
||||||
auxtrace_record__snapshot_started = 0;
|
auxtrace_record__snapshot_started = 0;
|
||||||
if (!auxtrace_snapshot_err)
|
if (!trigger_is_error(&auxtrace_snapshot_trigger))
|
||||||
record__read_auxtrace_snapshot(rec);
|
record__read_auxtrace_snapshot(rec);
|
||||||
if (auxtrace_snapshot_err) {
|
if (trigger_is_error(&auxtrace_snapshot_trigger)) {
|
||||||
pr_err("AUX area tracing snapshot failed\n");
|
pr_err("AUX area tracing snapshot failed\n");
|
||||||
err = -1;
|
err = -1;
|
||||||
goto out_child;
|
goto out_child;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (trigger_is_hit(&switch_output_trigger)) {
|
||||||
|
trigger_ready(&switch_output_trigger);
|
||||||
|
|
||||||
|
if (!quiet)
|
||||||
|
fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
|
||||||
|
waking);
|
||||||
|
waking = 0;
|
||||||
|
fd = record__switch_output(rec, false);
|
||||||
|
if (fd < 0) {
|
||||||
|
pr_err("Failed to switch to new file\n");
|
||||||
|
trigger_error(&switch_output_trigger);
|
||||||
|
err = fd;
|
||||||
|
goto out_child;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (hits == rec->samples) {
|
if (hits == rec->samples) {
|
||||||
if (done || draining)
|
if (done || draining)
|
||||||
break;
|
break;
|
||||||
|
@ -772,12 +883,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
|
||||||
* disable events in this case.
|
* disable events in this case.
|
||||||
*/
|
*/
|
||||||
if (done && !disabled && !target__none(&opts->target)) {
|
if (done && !disabled && !target__none(&opts->target)) {
|
||||||
auxtrace_snapshot_enabled = 0;
|
trigger_off(&auxtrace_snapshot_trigger);
|
||||||
perf_evlist__disable(rec->evlist);
|
perf_evlist__disable(rec->evlist);
|
||||||
disabled = true;
|
disabled = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auxtrace_snapshot_enabled = 0;
|
trigger_off(&auxtrace_snapshot_trigger);
|
||||||
|
trigger_off(&switch_output_trigger);
|
||||||
|
|
||||||
if (forks && workload_exec_errno) {
|
if (forks && workload_exec_errno) {
|
||||||
char msg[STRERR_BUFSIZE];
|
char msg[STRERR_BUFSIZE];
|
||||||
|
@ -811,11 +923,22 @@ out_child:
|
||||||
/* this will be recalculated during process_buildids() */
|
/* this will be recalculated during process_buildids() */
|
||||||
rec->samples = 0;
|
rec->samples = 0;
|
||||||
|
|
||||||
if (!err)
|
if (!err) {
|
||||||
record__finish_output(rec);
|
if (!rec->timestamp_filename) {
|
||||||
|
record__finish_output(rec);
|
||||||
|
} else {
|
||||||
|
fd = record__switch_output(rec, true);
|
||||||
|
if (fd < 0) {
|
||||||
|
status = fd;
|
||||||
|
goto out_delete_session;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!err && !quiet) {
|
if (!err && !quiet) {
|
||||||
char samples[128];
|
char samples[128];
|
||||||
|
const char *postfix = rec->timestamp_filename ?
|
||||||
|
".<timestamp>" : "";
|
||||||
|
|
||||||
if (rec->samples && !rec->opts.full_auxtrace)
|
if (rec->samples && !rec->opts.full_auxtrace)
|
||||||
scnprintf(samples, sizeof(samples),
|
scnprintf(samples, sizeof(samples),
|
||||||
|
@ -823,9 +946,9 @@ out_child:
|
||||||
else
|
else
|
||||||
samples[0] = '\0';
|
samples[0] = '\0';
|
||||||
|
|
||||||
fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
|
fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
|
||||||
perf_data_file__size(file) / 1024.0 / 1024.0,
|
perf_data_file__size(file) / 1024.0 / 1024.0,
|
||||||
file->path, samples);
|
file->path, postfix, samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
out_delete_session:
|
out_delete_session:
|
||||||
|
@ -833,58 +956,61 @@ out_delete_session:
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void callchain_debug(void)
|
static void callchain_debug(struct callchain_param *callchain)
|
||||||
{
|
{
|
||||||
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
|
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
|
||||||
|
|
||||||
pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
|
pr_debug("callchain: type %s\n", str[callchain->record_mode]);
|
||||||
|
|
||||||
if (callchain_param.record_mode == CALLCHAIN_DWARF)
|
if (callchain->record_mode == CALLCHAIN_DWARF)
|
||||||
pr_debug("callchain: stack dump size %d\n",
|
pr_debug("callchain: stack dump size %d\n",
|
||||||
callchain_param.dump_size);
|
callchain->dump_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
int record_opts__parse_callchain(struct record_opts *record,
|
||||||
|
struct callchain_param *callchain,
|
||||||
|
const char *arg, bool unset)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
callchain->enabled = !unset;
|
||||||
|
|
||||||
|
/* --no-call-graph */
|
||||||
|
if (unset) {
|
||||||
|
callchain->record_mode = CALLCHAIN_NONE;
|
||||||
|
pr_debug("callchain: disabled\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = parse_callchain_record_opt(arg, callchain);
|
||||||
|
if (!ret) {
|
||||||
|
/* Enable data address sampling for DWARF unwind. */
|
||||||
|
if (callchain->record_mode == CALLCHAIN_DWARF)
|
||||||
|
record->sample_address = true;
|
||||||
|
callchain_debug(callchain);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int record_parse_callchain_opt(const struct option *opt,
|
int record_parse_callchain_opt(const struct option *opt,
|
||||||
const char *arg,
|
const char *arg,
|
||||||
int unset)
|
int unset)
|
||||||
{
|
{
|
||||||
int ret;
|
return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
|
||||||
struct record_opts *record = (struct record_opts *)opt->value;
|
|
||||||
|
|
||||||
record->callgraph_set = true;
|
|
||||||
callchain_param.enabled = !unset;
|
|
||||||
|
|
||||||
/* --no-call-graph */
|
|
||||||
if (unset) {
|
|
||||||
callchain_param.record_mode = CALLCHAIN_NONE;
|
|
||||||
pr_debug("callchain: disabled\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = parse_callchain_record_opt(arg, &callchain_param);
|
|
||||||
if (!ret) {
|
|
||||||
/* Enable data address sampling for DWARF unwind. */
|
|
||||||
if (callchain_param.record_mode == CALLCHAIN_DWARF)
|
|
||||||
record->sample_address = true;
|
|
||||||
callchain_debug();
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int record_callchain_opt(const struct option *opt,
|
int record_callchain_opt(const struct option *opt,
|
||||||
const char *arg __maybe_unused,
|
const char *arg __maybe_unused,
|
||||||
int unset __maybe_unused)
|
int unset __maybe_unused)
|
||||||
{
|
{
|
||||||
struct record_opts *record = (struct record_opts *)opt->value;
|
struct callchain_param *callchain = opt->value;
|
||||||
|
|
||||||
record->callgraph_set = true;
|
callchain->enabled = true;
|
||||||
callchain_param.enabled = true;
|
|
||||||
|
|
||||||
if (callchain_param.record_mode == CALLCHAIN_NONE)
|
if (callchain->record_mode == CALLCHAIN_NONE)
|
||||||
callchain_param.record_mode = CALLCHAIN_FP;
|
callchain->record_mode = CALLCHAIN_FP;
|
||||||
|
|
||||||
callchain_debug();
|
callchain_debug(callchain);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1122,7 +1248,7 @@ struct option __record_options[] = {
|
||||||
record__parse_mmap_pages),
|
record__parse_mmap_pages),
|
||||||
OPT_BOOLEAN(0, "group", &record.opts.group,
|
OPT_BOOLEAN(0, "group", &record.opts.group,
|
||||||
"put the counters into a counter group"),
|
"put the counters into a counter group"),
|
||||||
OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
|
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
|
||||||
NULL, "enables call-graph recording" ,
|
NULL, "enables call-graph recording" ,
|
||||||
&record_callchain_opt),
|
&record_callchain_opt),
|
||||||
OPT_CALLBACK(0, "call-graph", &record.opts,
|
OPT_CALLBACK(0, "call-graph", &record.opts,
|
||||||
|
@ -1195,6 +1321,10 @@ struct option __record_options[] = {
|
||||||
"file", "vmlinux pathname"),
|
"file", "vmlinux pathname"),
|
||||||
OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
|
OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
|
||||||
"Record build-id of all DSOs regardless of hits"),
|
"Record build-id of all DSOs regardless of hits"),
|
||||||
|
OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
|
||||||
|
"append timestamp to output filename"),
|
||||||
|
OPT_BOOLEAN(0, "switch-output", &record.switch_output,
|
||||||
|
"Switch output when receive SIGUSR2"),
|
||||||
OPT_END()
|
OPT_END()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1250,6 +1380,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rec->switch_output)
|
||||||
|
rec->timestamp_filename = true;
|
||||||
|
|
||||||
if (!rec->itr) {
|
if (!rec->itr) {
|
||||||
rec->itr = auxtrace_record__init(rec->evlist, &err);
|
rec->itr = auxtrace_record__init(rec->evlist, &err);
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -1261,6 +1394,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
err = bpf__setup_stdout(rec->evlist);
|
||||||
|
if (err) {
|
||||||
|
bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
|
||||||
|
pr_err("ERROR: Setup BPF stdout failed: %s\n",
|
||||||
|
errbuf);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
|
|
||||||
symbol__init(NULL);
|
symbol__init(NULL);
|
||||||
|
@ -1275,8 +1416,36 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
|
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
|
||||||
"even with a suitable vmlinux or kallsyms file.\n\n");
|
"even with a suitable vmlinux or kallsyms file.\n\n");
|
||||||
|
|
||||||
if (rec->no_buildid_cache || rec->no_buildid)
|
if (rec->no_buildid_cache || rec->no_buildid) {
|
||||||
disable_buildid_cache();
|
disable_buildid_cache();
|
||||||
|
} else if (rec->switch_output) {
|
||||||
|
/*
|
||||||
|
* In 'perf record --switch-output', disable buildid
|
||||||
|
* generation by default to reduce data file switching
|
||||||
|
* overhead. Still generate buildid if they are required
|
||||||
|
* explicitly using
|
||||||
|
*
|
||||||
|
* perf record --signal-trigger --no-no-buildid \
|
||||||
|
* --no-no-buildid-cache
|
||||||
|
*
|
||||||
|
* Following code equals to:
|
||||||
|
*
|
||||||
|
* if ((rec->no_buildid || !rec->no_buildid_set) &&
|
||||||
|
* (rec->no_buildid_cache || !rec->no_buildid_cache_set))
|
||||||
|
* disable_buildid_cache();
|
||||||
|
*/
|
||||||
|
bool disable = true;
|
||||||
|
|
||||||
|
if (rec->no_buildid_set && !rec->no_buildid)
|
||||||
|
disable = false;
|
||||||
|
if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
|
||||||
|
disable = false;
|
||||||
|
if (disable) {
|
||||||
|
rec->no_buildid = true;
|
||||||
|
rec->no_buildid_cache = true;
|
||||||
|
disable_buildid_cache();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (rec->evlist->nr_entries == 0 &&
|
if (rec->evlist->nr_entries == 0 &&
|
||||||
perf_evlist__add_default(rec->evlist) < 0) {
|
perf_evlist__add_default(rec->evlist) < 0) {
|
||||||
|
@ -1335,9 +1504,13 @@ out_symbol_exit:
|
||||||
|
|
||||||
static void snapshot_sig_handler(int sig __maybe_unused)
|
static void snapshot_sig_handler(int sig __maybe_unused)
|
||||||
{
|
{
|
||||||
if (!auxtrace_snapshot_enabled)
|
if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
|
||||||
return;
|
trigger_hit(&auxtrace_snapshot_trigger);
|
||||||
auxtrace_snapshot_enabled = 0;
|
auxtrace_record__snapshot_started = 1;
|
||||||
auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
|
if (auxtrace_record__snapshot_start(record.itr))
|
||||||
auxtrace_record__snapshot_started = 1;
|
trigger_error(&auxtrace_snapshot_trigger);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trigger_is_ready(&switch_output_trigger))
|
||||||
|
trigger_hit(&switch_output_trigger);
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,6 @@ struct report {
|
||||||
struct perf_tool tool;
|
struct perf_tool tool;
|
||||||
struct perf_session *session;
|
struct perf_session *session;
|
||||||
bool use_tui, use_gtk, use_stdio;
|
bool use_tui, use_gtk, use_stdio;
|
||||||
bool dont_use_callchains;
|
|
||||||
bool show_full_info;
|
bool show_full_info;
|
||||||
bool show_threads;
|
bool show_threads;
|
||||||
bool inverted_callchain;
|
bool inverted_callchain;
|
||||||
|
@ -235,7 +234,7 @@ static int report__setup_sample_type(struct report *rep)
|
||||||
sample_type |= PERF_SAMPLE_BRANCH_STACK;
|
sample_type |= PERF_SAMPLE_BRANCH_STACK;
|
||||||
|
|
||||||
if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
|
if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
|
||||||
if (sort__has_parent) {
|
if (perf_hpp_list.parent) {
|
||||||
ui__error("Selected --sort parent, but no "
|
ui__error("Selected --sort parent, but no "
|
||||||
"callchain data. Did you call "
|
"callchain data. Did you call "
|
||||||
"'perf record' without -g?\n");
|
"'perf record' without -g?\n");
|
||||||
|
@ -247,7 +246,7 @@ static int report__setup_sample_type(struct report *rep)
|
||||||
"you call 'perf record' without -g?\n");
|
"you call 'perf record' without -g?\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
} else if (!rep->dont_use_callchains &&
|
} else if (!callchain_param.enabled &&
|
||||||
callchain_param.mode != CHAIN_NONE &&
|
callchain_param.mode != CHAIN_NONE &&
|
||||||
!symbol_conf.use_callchain) {
|
!symbol_conf.use_callchain) {
|
||||||
symbol_conf.use_callchain = true;
|
symbol_conf.use_callchain = true;
|
||||||
|
@ -599,13 +598,15 @@ static int __cmd_report(struct report *rep)
|
||||||
static int
|
static int
|
||||||
report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
|
report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
|
||||||
{
|
{
|
||||||
struct report *rep = (struct report *)opt->value;
|
struct callchain_param *callchain = opt->value;
|
||||||
|
|
||||||
|
callchain->enabled = !unset;
|
||||||
/*
|
/*
|
||||||
* --no-call-graph
|
* --no-call-graph
|
||||||
*/
|
*/
|
||||||
if (unset) {
|
if (unset) {
|
||||||
rep->dont_use_callchains = true;
|
symbol_conf.use_callchain = false;
|
||||||
|
callchain->mode = CHAIN_NONE;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -690,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
.ordered_events = true,
|
.ordered_events = true,
|
||||||
.ordering_requires_timestamps = true,
|
.ordering_requires_timestamps = true,
|
||||||
},
|
},
|
||||||
.max_stack = PERF_MAX_STACK_DEPTH,
|
.max_stack = sysctl_perf_event_max_stack,
|
||||||
.pretty_printing_style = "normal",
|
.pretty_printing_style = "normal",
|
||||||
.socket_filter = -1,
|
.socket_filter = -1,
|
||||||
};
|
};
|
||||||
|
@ -734,7 +735,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
"regex filter to identify parent, see: '--sort parent'"),
|
"regex filter to identify parent, see: '--sort parent'"),
|
||||||
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
|
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
|
||||||
"Only display entries with parent-match"),
|
"Only display entries with parent-match"),
|
||||||
OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
|
OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
|
||||||
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
|
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
|
||||||
report_callchain_help, &report_parse_callchain_opt,
|
report_callchain_help, &report_parse_callchain_opt,
|
||||||
callchain_default_opt),
|
callchain_default_opt),
|
||||||
|
@ -743,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
OPT_INTEGER(0, "max-stack", &report.max_stack,
|
OPT_INTEGER(0, "max-stack", &report.max_stack,
|
||||||
"Set the maximum stack depth when parsing the callchain, "
|
"Set the maximum stack depth when parsing the callchain, "
|
||||||
"anything beyond the specified depth will be ignored. "
|
"anything beyond the specified depth will be ignored. "
|
||||||
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
|
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||||
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
|
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
|
||||||
"alias for inverted call graph"),
|
"alias for inverted call graph"),
|
||||||
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
|
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
|
||||||
|
@ -935,7 +936,7 @@ repeat:
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
sort__need_collapse = true;
|
perf_hpp_list.need_collapse = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Force tty output for header output and per-thread stat. */
|
/* Force tty output for header output and per-thread stat. */
|
||||||
|
|
|
@ -11,6 +11,8 @@
|
||||||
#include "util/session.h"
|
#include "util/session.h"
|
||||||
#include "util/tool.h"
|
#include "util/tool.h"
|
||||||
#include "util/cloexec.h"
|
#include "util/cloexec.h"
|
||||||
|
#include "util/thread_map.h"
|
||||||
|
#include "util/color.h"
|
||||||
|
|
||||||
#include <subcmd/parse-options.h>
|
#include <subcmd/parse-options.h>
|
||||||
#include "util/trace-event.h"
|
#include "util/trace-event.h"
|
||||||
|
@ -122,6 +124,21 @@ struct trace_sched_handler {
|
||||||
struct machine *machine);
|
struct machine *machine);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define COLOR_PIDS PERF_COLOR_BLUE
|
||||||
|
#define COLOR_CPUS PERF_COLOR_BG_RED
|
||||||
|
|
||||||
|
struct perf_sched_map {
|
||||||
|
DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
|
||||||
|
int *comp_cpus;
|
||||||
|
bool comp;
|
||||||
|
struct thread_map *color_pids;
|
||||||
|
const char *color_pids_str;
|
||||||
|
struct cpu_map *color_cpus;
|
||||||
|
const char *color_cpus_str;
|
||||||
|
struct cpu_map *cpus;
|
||||||
|
const char *cpus_str;
|
||||||
|
};
|
||||||
|
|
||||||
struct perf_sched {
|
struct perf_sched {
|
||||||
struct perf_tool tool;
|
struct perf_tool tool;
|
||||||
const char *sort_order;
|
const char *sort_order;
|
||||||
|
@ -173,6 +190,7 @@ struct perf_sched {
|
||||||
struct list_head sort_list, cmp_pid;
|
struct list_head sort_list, cmp_pid;
|
||||||
bool force;
|
bool force;
|
||||||
bool skip_merge;
|
bool skip_merge;
|
||||||
|
struct perf_sched_map map;
|
||||||
};
|
};
|
||||||
|
|
||||||
static u64 get_nsecs(void)
|
static u64 get_nsecs(void)
|
||||||
|
@ -1339,6 +1357,38 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
union map_priv {
|
||||||
|
void *ptr;
|
||||||
|
bool color;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool thread__has_color(struct thread *thread)
|
||||||
|
{
|
||||||
|
union map_priv priv = {
|
||||||
|
.ptr = thread__priv(thread),
|
||||||
|
};
|
||||||
|
|
||||||
|
return priv.color;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct thread*
|
||||||
|
map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
|
||||||
|
{
|
||||||
|
struct thread *thread = machine__findnew_thread(machine, pid, tid);
|
||||||
|
union map_priv priv = {
|
||||||
|
.color = false,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!sched->map.color_pids || !thread || thread__priv(thread))
|
||||||
|
return thread;
|
||||||
|
|
||||||
|
if (thread_map__has(sched->map.color_pids, tid))
|
||||||
|
priv.color = true;
|
||||||
|
|
||||||
|
thread__set_priv(thread, priv.ptr);
|
||||||
|
return thread;
|
||||||
|
}
|
||||||
|
|
||||||
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||||
struct perf_sample *sample, struct machine *machine)
|
struct perf_sample *sample, struct machine *machine)
|
||||||
{
|
{
|
||||||
|
@ -1347,13 +1397,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||||
int new_shortname;
|
int new_shortname;
|
||||||
u64 timestamp0, timestamp = sample->time;
|
u64 timestamp0, timestamp = sample->time;
|
||||||
s64 delta;
|
s64 delta;
|
||||||
int cpu, this_cpu = sample->cpu;
|
int i, this_cpu = sample->cpu;
|
||||||
|
int cpus_nr;
|
||||||
|
bool new_cpu = false;
|
||||||
|
const char *color = PERF_COLOR_NORMAL;
|
||||||
|
|
||||||
BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
|
BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
|
||||||
|
|
||||||
if (this_cpu > sched->max_cpu)
|
if (this_cpu > sched->max_cpu)
|
||||||
sched->max_cpu = this_cpu;
|
sched->max_cpu = this_cpu;
|
||||||
|
|
||||||
|
if (sched->map.comp) {
|
||||||
|
cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
|
||||||
|
if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
|
||||||
|
sched->map.comp_cpus[cpus_nr++] = this_cpu;
|
||||||
|
new_cpu = true;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
cpus_nr = sched->max_cpu;
|
||||||
|
|
||||||
timestamp0 = sched->cpu_last_switched[this_cpu];
|
timestamp0 = sched->cpu_last_switched[this_cpu];
|
||||||
sched->cpu_last_switched[this_cpu] = timestamp;
|
sched->cpu_last_switched[this_cpu] = timestamp;
|
||||||
if (timestamp0)
|
if (timestamp0)
|
||||||
|
@ -1366,7 +1428,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
sched_in = machine__findnew_thread(machine, -1, next_pid);
|
sched_in = map__findnew_thread(sched, machine, -1, next_pid);
|
||||||
if (sched_in == NULL)
|
if (sched_in == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -1400,26 +1462,52 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
|
||||||
new_shortname = 1;
|
new_shortname = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
|
for (i = 0; i < cpus_nr; i++) {
|
||||||
|
int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
|
||||||
|
struct thread *curr_thread = sched->curr_thread[cpu];
|
||||||
|
const char *pid_color = color;
|
||||||
|
const char *cpu_color = color;
|
||||||
|
|
||||||
|
if (curr_thread && thread__has_color(curr_thread))
|
||||||
|
pid_color = COLOR_PIDS;
|
||||||
|
|
||||||
|
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
|
||||||
|
cpu_color = COLOR_CPUS;
|
||||||
|
|
||||||
if (cpu != this_cpu)
|
if (cpu != this_cpu)
|
||||||
printf(" ");
|
color_fprintf(stdout, cpu_color, " ");
|
||||||
else
|
else
|
||||||
printf("*");
|
color_fprintf(stdout, cpu_color, "*");
|
||||||
|
|
||||||
if (sched->curr_thread[cpu])
|
if (sched->curr_thread[cpu])
|
||||||
printf("%2s ", sched->curr_thread[cpu]->shortname);
|
color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
|
||||||
else
|
else
|
||||||
printf(" ");
|
color_fprintf(stdout, color, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
printf(" %12.6f secs ", (double)timestamp/1e9);
|
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp/1e9);
|
||||||
if (new_shortname) {
|
if (new_shortname) {
|
||||||
printf("%s => %s:%d\n",
|
const char *pid_color = color;
|
||||||
|
|
||||||
|
if (thread__has_color(sched_in))
|
||||||
|
pid_color = COLOR_PIDS;
|
||||||
|
|
||||||
|
color_fprintf(stdout, pid_color, "%s => %s:%d",
|
||||||
sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
|
sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
|
||||||
} else {
|
|
||||||
printf("\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sched->map.comp && new_cpu)
|
||||||
|
color_fprintf(stdout, color, " (CPU %d)", this_cpu);
|
||||||
|
|
||||||
|
out:
|
||||||
|
color_fprintf(stdout, color, "\n");
|
||||||
|
|
||||||
thread__put(sched_in);
|
thread__put(sched_in);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1675,9 +1763,75 @@ static int perf_sched__lat(struct perf_sched *sched)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int setup_map_cpus(struct perf_sched *sched)
|
||||||
|
{
|
||||||
|
struct cpu_map *map;
|
||||||
|
|
||||||
|
sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
|
||||||
|
|
||||||
|
if (sched->map.comp) {
|
||||||
|
sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
|
||||||
|
if (!sched->map.comp_cpus)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sched->map.cpus_str)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
map = cpu_map__new(sched->map.cpus_str);
|
||||||
|
if (!map) {
|
||||||
|
pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sched->map.cpus = map;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int setup_color_pids(struct perf_sched *sched)
|
||||||
|
{
|
||||||
|
struct thread_map *map;
|
||||||
|
|
||||||
|
if (!sched->map.color_pids_str)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
map = thread_map__new_by_tid_str(sched->map.color_pids_str);
|
||||||
|
if (!map) {
|
||||||
|
pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sched->map.color_pids = map;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int setup_color_cpus(struct perf_sched *sched)
|
||||||
|
{
|
||||||
|
struct cpu_map *map;
|
||||||
|
|
||||||
|
if (!sched->map.color_cpus_str)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
map = cpu_map__new(sched->map.color_cpus_str);
|
||||||
|
if (!map) {
|
||||||
|
pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sched->map.color_cpus = map;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int perf_sched__map(struct perf_sched *sched)
|
static int perf_sched__map(struct perf_sched *sched)
|
||||||
{
|
{
|
||||||
sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
|
if (setup_map_cpus(sched))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (setup_color_pids(sched))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if (setup_color_cpus(sched))
|
||||||
|
return -1;
|
||||||
|
|
||||||
setup_pager();
|
setup_pager();
|
||||||
if (perf_sched__read_events(sched))
|
if (perf_sched__read_events(sched))
|
||||||
|
@ -1831,6 +1985,17 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
"dump raw trace in ASCII"),
|
"dump raw trace in ASCII"),
|
||||||
OPT_END()
|
OPT_END()
|
||||||
};
|
};
|
||||||
|
const struct option map_options[] = {
|
||||||
|
OPT_BOOLEAN(0, "compact", &sched.map.comp,
|
||||||
|
"map output in compact mode"),
|
||||||
|
OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
|
||||||
|
"highlight given pids in map"),
|
||||||
|
OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
|
||||||
|
"highlight given CPUs in map"),
|
||||||
|
OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
|
||||||
|
"display given CPUs in map"),
|
||||||
|
OPT_END()
|
||||||
|
};
|
||||||
const char * const latency_usage[] = {
|
const char * const latency_usage[] = {
|
||||||
"perf sched latency [<options>]",
|
"perf sched latency [<options>]",
|
||||||
NULL
|
NULL
|
||||||
|
@ -1839,6 +2004,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
"perf sched replay [<options>]",
|
"perf sched replay [<options>]",
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
const char * const map_usage[] = {
|
||||||
|
"perf sched map [<options>]",
|
||||||
|
NULL
|
||||||
|
};
|
||||||
const char *const sched_subcommands[] = { "record", "latency", "map",
|
const char *const sched_subcommands[] = { "record", "latency", "map",
|
||||||
"replay", "script", NULL };
|
"replay", "script", NULL };
|
||||||
const char *sched_usage[] = {
|
const char *sched_usage[] = {
|
||||||
|
@ -1887,6 +2056,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
setup_sorting(&sched, latency_options, latency_usage);
|
setup_sorting(&sched, latency_options, latency_usage);
|
||||||
return perf_sched__lat(&sched);
|
return perf_sched__lat(&sched);
|
||||||
} else if (!strcmp(argv[0], "map")) {
|
} else if (!strcmp(argv[0], "map")) {
|
||||||
|
if (argc) {
|
||||||
|
argc = parse_options(argc, argv, map_options, map_usage, 0);
|
||||||
|
if (argc)
|
||||||
|
usage_with_options(map_usage, map_options);
|
||||||
|
}
|
||||||
sched.tp_handler = &map_ops;
|
sched.tp_handler = &map_ops;
|
||||||
setup_sorting(&sched, latency_options, latency_usage);
|
setup_sorting(&sched, latency_options, latency_usage);
|
||||||
return perf_sched__map(&sched);
|
return perf_sched__map(&sched);
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "util/thread_map.h"
|
#include "util/thread_map.h"
|
||||||
#include "util/stat.h"
|
#include "util/stat.h"
|
||||||
#include <linux/bitmap.h>
|
#include <linux/bitmap.h>
|
||||||
|
#include <linux/stringify.h>
|
||||||
#include "asm/bug.h"
|
#include "asm/bug.h"
|
||||||
#include "util/mem-events.h"
|
#include "util/mem-events.h"
|
||||||
|
|
||||||
|
@ -317,19 +318,19 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
|
||||||
|
|
||||||
output[type].print_ip_opts = 0;
|
output[type].print_ip_opts = 0;
|
||||||
if (PRINT_FIELD(IP))
|
if (PRINT_FIELD(IP))
|
||||||
output[type].print_ip_opts |= PRINT_IP_OPT_IP;
|
output[type].print_ip_opts |= EVSEL__PRINT_IP;
|
||||||
|
|
||||||
if (PRINT_FIELD(SYM))
|
if (PRINT_FIELD(SYM))
|
||||||
output[type].print_ip_opts |= PRINT_IP_OPT_SYM;
|
output[type].print_ip_opts |= EVSEL__PRINT_SYM;
|
||||||
|
|
||||||
if (PRINT_FIELD(DSO))
|
if (PRINT_FIELD(DSO))
|
||||||
output[type].print_ip_opts |= PRINT_IP_OPT_DSO;
|
output[type].print_ip_opts |= EVSEL__PRINT_DSO;
|
||||||
|
|
||||||
if (PRINT_FIELD(SYMOFFSET))
|
if (PRINT_FIELD(SYMOFFSET))
|
||||||
output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
|
output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
|
||||||
|
|
||||||
if (PRINT_FIELD(SRCLINE))
|
if (PRINT_FIELD(SRCLINE))
|
||||||
output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE;
|
output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -569,18 +570,23 @@ static void print_sample_bts(struct perf_sample *sample,
|
||||||
/* print branch_from information */
|
/* print branch_from information */
|
||||||
if (PRINT_FIELD(IP)) {
|
if (PRINT_FIELD(IP)) {
|
||||||
unsigned int print_opts = output[attr->type].print_ip_opts;
|
unsigned int print_opts = output[attr->type].print_ip_opts;
|
||||||
|
struct callchain_cursor *cursor = NULL;
|
||||||
|
|
||||||
if (symbol_conf.use_callchain && sample->callchain) {
|
if (symbol_conf.use_callchain && sample->callchain &&
|
||||||
printf("\n");
|
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
|
||||||
} else {
|
sample, NULL, NULL, scripting_max_stack) == 0)
|
||||||
printf(" ");
|
cursor = &callchain_cursor;
|
||||||
if (print_opts & PRINT_IP_OPT_SRCLINE) {
|
|
||||||
|
if (cursor == NULL) {
|
||||||
|
putchar(' ');
|
||||||
|
if (print_opts & EVSEL__PRINT_SRCLINE) {
|
||||||
print_srcline_last = true;
|
print_srcline_last = true;
|
||||||
print_opts &= ~PRINT_IP_OPT_SRCLINE;
|
print_opts &= ~EVSEL__PRINT_SRCLINE;
|
||||||
}
|
}
|
||||||
}
|
} else
|
||||||
perf_evsel__print_ip(evsel, sample, al, print_opts,
|
putchar('\n');
|
||||||
scripting_max_stack);
|
|
||||||
|
sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* print branch_to information */
|
/* print branch_to information */
|
||||||
|
@ -783,14 +789,15 @@ static void process_event(struct perf_script *script,
|
||||||
printf("%16" PRIu64, sample->weight);
|
printf("%16" PRIu64, sample->weight);
|
||||||
|
|
||||||
if (PRINT_FIELD(IP)) {
|
if (PRINT_FIELD(IP)) {
|
||||||
if (!symbol_conf.use_callchain)
|
struct callchain_cursor *cursor = NULL;
|
||||||
printf(" ");
|
|
||||||
else
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
perf_evsel__print_ip(evsel, sample, al,
|
if (symbol_conf.use_callchain && sample->callchain &&
|
||||||
output[attr->type].print_ip_opts,
|
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
|
||||||
scripting_max_stack);
|
sample, NULL, NULL, scripting_max_stack) == 0)
|
||||||
|
cursor = &callchain_cursor;
|
||||||
|
|
||||||
|
putchar(cursor ? '\n' : ' ');
|
||||||
|
sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PRINT_FIELD(IREGS))
|
if (PRINT_FIELD(IREGS))
|
||||||
|
@ -1959,6 +1966,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
.exit = perf_event__process_exit,
|
.exit = perf_event__process_exit,
|
||||||
.fork = perf_event__process_fork,
|
.fork = perf_event__process_fork,
|
||||||
.attr = process_attr,
|
.attr = process_attr,
|
||||||
|
.event_update = perf_event__process_event_update,
|
||||||
.tracing_data = perf_event__process_tracing_data,
|
.tracing_data = perf_event__process_tracing_data,
|
||||||
.build_id = perf_event__process_build_id,
|
.build_id = perf_event__process_build_id,
|
||||||
.id_index = perf_event__process_id_index,
|
.id_index = perf_event__process_id_index,
|
||||||
|
@ -2020,6 +2028,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
"only consider symbols in these pids"),
|
"only consider symbols in these pids"),
|
||||||
OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
|
OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
|
||||||
"only consider symbols in these tids"),
|
"only consider symbols in these tids"),
|
||||||
|
OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
|
||||||
|
"Set the maximum stack depth when parsing the callchain, "
|
||||||
|
"anything beyond the specified depth will be ignored. "
|
||||||
|
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||||
OPT_BOOLEAN('I', "show-info", &show_full_info,
|
OPT_BOOLEAN('I', "show-info", &show_full_info,
|
||||||
"display extended information from perf.data file"),
|
"display extended information from perf.data file"),
|
||||||
OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
|
OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
|
||||||
|
@ -2055,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
scripting_max_stack = sysctl_perf_event_max_stack;
|
||||||
|
|
||||||
setup_scripting();
|
setup_scripting();
|
||||||
|
|
||||||
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
|
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
|
||||||
|
|
|
@ -298,6 +298,14 @@ static int read_counter(struct perf_evsel *counter)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (verbose > 1) {
|
||||||
|
fprintf(stat_config.output,
|
||||||
|
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
||||||
|
perf_evsel__name(counter),
|
||||||
|
cpu,
|
||||||
|
count->val, count->ena, count->run);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -688,7 +688,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter,
|
||||||
struct hist_entry *he = iter->he;
|
struct hist_entry *he = iter->he;
|
||||||
struct perf_evsel *evsel = iter->evsel;
|
struct perf_evsel *evsel = iter->evsel;
|
||||||
|
|
||||||
if (sort__has_sym && single)
|
if (perf_hpp_list.sym && single)
|
||||||
perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
|
perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
|
||||||
|
|
||||||
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
|
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
|
||||||
|
@ -886,7 +886,7 @@ static int perf_top__start_counters(struct perf_top *top)
|
||||||
struct perf_evlist *evlist = top->evlist;
|
struct perf_evlist *evlist = top->evlist;
|
||||||
struct record_opts *opts = &top->record_opts;
|
struct record_opts *opts = &top->record_opts;
|
||||||
|
|
||||||
perf_evlist__config(evlist, opts);
|
perf_evlist__config(evlist, opts, &callchain_param);
|
||||||
|
|
||||||
evlist__for_each(evlist, counter) {
|
evlist__for_each(evlist, counter) {
|
||||||
try_again:
|
try_again:
|
||||||
|
@ -917,15 +917,15 @@ out_err:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused)
|
static int callchain_param__setup_sample_type(struct callchain_param *callchain)
|
||||||
{
|
{
|
||||||
if (!sort__has_sym) {
|
if (!perf_hpp_list.sym) {
|
||||||
if (symbol_conf.use_callchain) {
|
if (callchain->enabled) {
|
||||||
ui__error("Selected -g but \"sym\" not present in --sort/-s.");
|
ui__error("Selected -g but \"sym\" not present in --sort/-s.");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
} else if (callchain_param.mode != CHAIN_NONE) {
|
} else if (callchain->mode != CHAIN_NONE) {
|
||||||
if (callchain_register_param(&callchain_param) < 0) {
|
if (callchain_register_param(callchain) < 0) {
|
||||||
ui__error("Can't register callchain params.\n");
|
ui__error("Can't register callchain params.\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -952,7 +952,7 @@ static int __cmd_top(struct perf_top *top)
|
||||||
goto out_delete;
|
goto out_delete;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = perf_top__setup_sample_type(top);
|
ret = callchain_param__setup_sample_type(&callchain_param);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_delete;
|
goto out_delete;
|
||||||
|
|
||||||
|
@ -962,7 +962,7 @@ static int __cmd_top(struct perf_top *top)
|
||||||
machine__synthesize_threads(&top->session->machines.host, &opts->target,
|
machine__synthesize_threads(&top->session->machines.host, &opts->target,
|
||||||
top->evlist->threads, false, opts->proc_map_timeout);
|
top->evlist->threads, false, opts->proc_map_timeout);
|
||||||
|
|
||||||
if (sort__has_socket) {
|
if (perf_hpp_list.socket) {
|
||||||
ret = perf_env__read_cpu_topology_map(&perf_env);
|
ret = perf_env__read_cpu_topology_map(&perf_env);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out_err_cpu_topo;
|
goto out_err_cpu_topo;
|
||||||
|
@ -1045,18 +1045,17 @@ callchain_opt(const struct option *opt, const char *arg, int unset)
|
||||||
static int
|
static int
|
||||||
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
|
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
|
||||||
{
|
{
|
||||||
struct record_opts *record = (struct record_opts *)opt->value;
|
struct callchain_param *callchain = opt->value;
|
||||||
|
|
||||||
record->callgraph_set = true;
|
callchain->enabled = !unset;
|
||||||
callchain_param.enabled = !unset;
|
callchain->record_mode = CALLCHAIN_FP;
|
||||||
callchain_param.record_mode = CALLCHAIN_FP;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* --no-call-graph
|
* --no-call-graph
|
||||||
*/
|
*/
|
||||||
if (unset) {
|
if (unset) {
|
||||||
symbol_conf.use_callchain = false;
|
symbol_conf.use_callchain = false;
|
||||||
callchain_param.record_mode = CALLCHAIN_NONE;
|
callchain->record_mode = CALLCHAIN_NONE;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1104,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
},
|
},
|
||||||
.proc_map_timeout = 500,
|
.proc_map_timeout = 500,
|
||||||
},
|
},
|
||||||
.max_stack = PERF_MAX_STACK_DEPTH,
|
.max_stack = sysctl_perf_event_max_stack,
|
||||||
.sym_pcnt_filter = 5,
|
.sym_pcnt_filter = 5,
|
||||||
};
|
};
|
||||||
struct record_opts *opts = &top.record_opts;
|
struct record_opts *opts = &top.record_opts;
|
||||||
|
@ -1162,17 +1161,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
"output field(s): overhead, period, sample plus all of sort keys"),
|
"output field(s): overhead, period, sample plus all of sort keys"),
|
||||||
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
|
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
|
||||||
"Show a column with the number of samples"),
|
"Show a column with the number of samples"),
|
||||||
OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
|
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
|
||||||
NULL, "enables call-graph recording and display",
|
NULL, "enables call-graph recording and display",
|
||||||
&callchain_opt),
|
&callchain_opt),
|
||||||
OPT_CALLBACK(0, "call-graph", &top.record_opts,
|
OPT_CALLBACK(0, "call-graph", &callchain_param,
|
||||||
"record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
|
"record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
|
||||||
top_callchain_help, &parse_callchain_opt),
|
top_callchain_help, &parse_callchain_opt),
|
||||||
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
|
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
|
||||||
"Accumulate callchains of children and show total overhead as well"),
|
"Accumulate callchains of children and show total overhead as well"),
|
||||||
OPT_INTEGER(0, "max-stack", &top.max_stack,
|
OPT_INTEGER(0, "max-stack", &top.max_stack,
|
||||||
"Set the maximum stack depth when parsing the callchain. "
|
"Set the maximum stack depth when parsing the callchain. "
|
||||||
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
|
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
|
||||||
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
|
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
|
||||||
"ignore callees of these functions in call graphs",
|
"ignore callees of these functions in call graphs",
|
||||||
report_parse_ignore_callees_opt),
|
report_parse_ignore_callees_opt),
|
||||||
|
@ -1256,7 +1255,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
|
|
||||||
sort__mode = SORT_MODE__TOP;
|
sort__mode = SORT_MODE__TOP;
|
||||||
/* display thread wants entries to be collapsed in a different tree */
|
/* display thread wants entries to be collapsed in a different tree */
|
||||||
sort__need_collapse = 1;
|
perf_hpp_list.need_collapse = 1;
|
||||||
|
|
||||||
if (top.use_stdio)
|
if (top.use_stdio)
|
||||||
use_browser = 0;
|
use_browser = 0;
|
||||||
|
@ -1312,7 +1311,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
|
||||||
|
|
||||||
top.sym_evsel = perf_evlist__first(top.evlist);
|
top.sym_evsel = perf_evlist__first(top.evlist);
|
||||||
|
|
||||||
if (!symbol_conf.use_callchain) {
|
if (!callchain_param.enabled) {
|
||||||
symbol_conf.cumulate_callchain = false;
|
symbol_conf.cumulate_callchain = false;
|
||||||
perf_hpp__cancel_cumulate();
|
perf_hpp__cancel_cumulate();
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -27,7 +27,7 @@ NO_PERF_REGS := 1
|
||||||
ifeq ($(ARCH),x86)
|
ifeq ($(ARCH),x86)
|
||||||
$(call detected,CONFIG_X86)
|
$(call detected,CONFIG_X86)
|
||||||
ifeq (${IS_64_BIT}, 1)
|
ifeq (${IS_64_BIT}, 1)
|
||||||
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
|
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
|
||||||
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
|
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
|
||||||
LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
|
LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
|
||||||
$(call detected,CONFIG_X86_64)
|
$(call detected,CONFIG_X86_64)
|
||||||
|
@ -295,9 +295,6 @@ ifndef NO_LIBELF
|
||||||
CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
|
CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# include ARCH specific config
|
|
||||||
-include $(src-perf)/arch/$(ARCH)/Makefile
|
|
||||||
|
|
||||||
ifndef NO_DWARF
|
ifndef NO_DWARF
|
||||||
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
|
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
|
||||||
msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
|
msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
|
||||||
|
|
|
@ -92,6 +92,22 @@ error:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int use_arch_timestamp;
|
||||||
|
|
||||||
|
static inline uint64_t
|
||||||
|
get_arch_timestamp(void)
|
||||||
|
{
|
||||||
|
#if defined(__i386__) || defined(__x86_64__)
|
||||||
|
unsigned int low, high;
|
||||||
|
|
||||||
|
asm volatile("rdtsc" : "=a" (low), "=d" (high));
|
||||||
|
|
||||||
|
return low | ((uint64_t)high) << 32;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#define NSEC_PER_SEC 1000000000
|
#define NSEC_PER_SEC 1000000000
|
||||||
static int perf_clk_id = CLOCK_MONOTONIC;
|
static int perf_clk_id = CLOCK_MONOTONIC;
|
||||||
|
|
||||||
|
@ -107,6 +123,9 @@ perf_get_timestamp(void)
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
if (use_arch_timestamp)
|
||||||
|
return get_arch_timestamp();
|
||||||
|
|
||||||
ret = clock_gettime(perf_clk_id, &ts);
|
ret = clock_gettime(perf_clk_id, &ts);
|
||||||
if (ret)
|
if (ret)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -203,6 +222,17 @@ perf_close_marker_file(void)
|
||||||
munmap(marker_addr, pgsz);
|
munmap(marker_addr, pgsz);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
init_arch_timestamp(void)
|
||||||
|
{
|
||||||
|
char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP");
|
||||||
|
|
||||||
|
if (!str || !*str || !strcmp(str, "0"))
|
||||||
|
return;
|
||||||
|
|
||||||
|
use_arch_timestamp = 1;
|
||||||
|
}
|
||||||
|
|
||||||
void *jvmti_open(void)
|
void *jvmti_open(void)
|
||||||
{
|
{
|
||||||
int pad_cnt;
|
int pad_cnt;
|
||||||
|
@ -211,11 +241,17 @@ void *jvmti_open(void)
|
||||||
int fd;
|
int fd;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
|
|
||||||
|
init_arch_timestamp();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check if clockid is supported
|
* check if clockid is supported
|
||||||
*/
|
*/
|
||||||
if (!perf_get_timestamp())
|
if (!perf_get_timestamp()) {
|
||||||
warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
|
if (use_arch_timestamp)
|
||||||
|
warnx("jvmti: arch timestamp not supported");
|
||||||
|
else
|
||||||
|
warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
|
||||||
|
}
|
||||||
|
|
||||||
memset(&header, 0, sizeof(header));
|
memset(&header, 0, sizeof(header));
|
||||||
|
|
||||||
|
@ -263,6 +299,9 @@ void *jvmti_open(void)
|
||||||
|
|
||||||
header.timestamp = perf_get_timestamp();
|
header.timestamp = perf_get_timestamp();
|
||||||
|
|
||||||
|
if (use_arch_timestamp)
|
||||||
|
header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP;
|
||||||
|
|
||||||
if (!fwrite(&header, sizeof(header), 1, fp)) {
|
if (!fwrite(&header, sizeof(header), 1, fp)) {
|
||||||
warn("jvmti: cannot write dumpfile header");
|
warn("jvmti: cannot write dumpfile header");
|
||||||
goto error;
|
goto error;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include <subcmd/parse-options.h>
|
#include <subcmd/parse-options.h>
|
||||||
#include "util/bpf-loader.h"
|
#include "util/bpf-loader.h"
|
||||||
#include "util/debug.h"
|
#include "util/debug.h"
|
||||||
|
#include <api/fs/fs.h>
|
||||||
#include <api/fs/tracing_path.h>
|
#include <api/fs/tracing_path.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
@ -308,9 +309,11 @@ static int handle_alias(int *argcp, const char ***argv)
|
||||||
if (*argcp > 1) {
|
if (*argcp > 1) {
|
||||||
struct strbuf buf;
|
struct strbuf buf;
|
||||||
|
|
||||||
strbuf_init(&buf, PATH_MAX);
|
if (strbuf_init(&buf, PATH_MAX) < 0 ||
|
||||||
strbuf_addstr(&buf, alias_string);
|
strbuf_addstr(&buf, alias_string) < 0 ||
|
||||||
sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
|
sq_quote_argv(&buf, (*argv) + 1,
|
||||||
|
PATH_MAX) < 0)
|
||||||
|
die("Failed to allocate memory.");
|
||||||
free(alias_string);
|
free(alias_string);
|
||||||
alias_string = buf.buf;
|
alias_string = buf.buf;
|
||||||
}
|
}
|
||||||
|
@ -533,6 +536,7 @@ int main(int argc, const char **argv)
|
||||||
{
|
{
|
||||||
const char *cmd;
|
const char *cmd;
|
||||||
char sbuf[STRERR_BUFSIZE];
|
char sbuf[STRERR_BUFSIZE];
|
||||||
|
int value;
|
||||||
|
|
||||||
/* libsubcmd init */
|
/* libsubcmd init */
|
||||||
exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
|
exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
|
||||||
|
@ -542,6 +546,9 @@ int main(int argc, const char **argv)
|
||||||
page_size = sysconf(_SC_PAGE_SIZE);
|
page_size = sysconf(_SC_PAGE_SIZE);
|
||||||
cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
|
cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
|
||||||
|
|
||||||
|
if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
|
||||||
|
sysctl_perf_event_max_stack = value;
|
||||||
|
|
||||||
cmd = extract_argv0_path(argv[0]);
|
cmd = extract_argv0_path(argv[0]);
|
||||||
if (!cmd)
|
if (!cmd)
|
||||||
cmd = "perf-help";
|
cmd = "perf-help";
|
||||||
|
@ -549,6 +556,7 @@ int main(int argc, const char **argv)
|
||||||
srandom(time(NULL));
|
srandom(time(NULL));
|
||||||
|
|
||||||
perf_config(perf_default_config, NULL);
|
perf_config(perf_default_config, NULL);
|
||||||
|
set_buildid_dir(NULL);
|
||||||
|
|
||||||
/* get debugfs/tracefs mount point from /proc/mounts */
|
/* get debugfs/tracefs mount point from /proc/mounts */
|
||||||
tracing_path_mount();
|
tracing_path_mount();
|
||||||
|
@ -572,7 +580,6 @@ int main(int argc, const char **argv)
|
||||||
}
|
}
|
||||||
if (!prefixcmp(cmd, "trace")) {
|
if (!prefixcmp(cmd, "trace")) {
|
||||||
#ifdef HAVE_LIBAUDIT_SUPPORT
|
#ifdef HAVE_LIBAUDIT_SUPPORT
|
||||||
set_buildid_dir(NULL);
|
|
||||||
setup_path();
|
setup_path();
|
||||||
argv[0] = "trace";
|
argv[0] = "trace";
|
||||||
return cmd_trace(argc, argv, NULL);
|
return cmd_trace(argc, argv, NULL);
|
||||||
|
@ -587,7 +594,6 @@ int main(int argc, const char **argv)
|
||||||
argc--;
|
argc--;
|
||||||
handle_options(&argv, &argc, NULL);
|
handle_options(&argv, &argc, NULL);
|
||||||
commit_pager_choice();
|
commit_pager_choice();
|
||||||
set_buildid_dir(NULL);
|
|
||||||
|
|
||||||
if (argc > 0) {
|
if (argc > 0) {
|
||||||
if (!prefixcmp(argv[0], "--"))
|
if (!prefixcmp(argv[0], "--"))
|
||||||
|
|
|
@ -52,7 +52,6 @@ struct record_opts {
|
||||||
bool sample_weight;
|
bool sample_weight;
|
||||||
bool sample_time;
|
bool sample_time;
|
||||||
bool sample_time_set;
|
bool sample_time_set;
|
||||||
bool callgraph_set;
|
|
||||||
bool period;
|
bool period;
|
||||||
bool running_time;
|
bool running_time;
|
||||||
bool full_auxtrace;
|
bool full_auxtrace;
|
||||||
|
|
|
@ -34,10 +34,9 @@ import datetime
|
||||||
#
|
#
|
||||||
# ubuntu:
|
# ubuntu:
|
||||||
#
|
#
|
||||||
# $ sudo apt-get install postgresql
|
# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
|
||||||
# $ sudo su - postgres
|
# $ sudo su - postgres
|
||||||
# $ createuser <your user id here>
|
# $ createuser -s <your user id here>
|
||||||
# Shall the new role be a superuser? (y/n) y
|
|
||||||
#
|
#
|
||||||
# An example of using this script with Intel PT:
|
# An example of using this script with Intel PT:
|
||||||
#
|
#
|
||||||
|
@ -224,11 +223,14 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
|
||||||
|
|
||||||
perf_db_export_mode = True
|
perf_db_export_mode = True
|
||||||
perf_db_export_calls = False
|
perf_db_export_calls = False
|
||||||
|
perf_db_export_callchains = False
|
||||||
|
|
||||||
|
|
||||||
def usage():
|
def usage():
|
||||||
print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
|
print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
|
||||||
print >> sys.stderr, "where: columns 'all' or 'branches'"
|
print >> sys.stderr, "where: columns 'all' or 'branches'"
|
||||||
print >> sys.stderr, " calls 'calls' => create calls table"
|
print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
|
||||||
|
print >> sys.stderr, " callchains 'callchains' => create call_paths table"
|
||||||
raise Exception("Too few arguments")
|
raise Exception("Too few arguments")
|
||||||
|
|
||||||
if (len(sys.argv) < 2):
|
if (len(sys.argv) < 2):
|
||||||
|
@ -246,9 +248,11 @@ if columns not in ("all", "branches"):
|
||||||
|
|
||||||
branches = (columns == "branches")
|
branches = (columns == "branches")
|
||||||
|
|
||||||
if (len(sys.argv) >= 4):
|
for i in range(3,len(sys.argv)):
|
||||||
if (sys.argv[3] == "calls"):
|
if (sys.argv[i] == "calls"):
|
||||||
perf_db_export_calls = True
|
perf_db_export_calls = True
|
||||||
|
elif (sys.argv[i] == "callchains"):
|
||||||
|
perf_db_export_callchains = True
|
||||||
else:
|
else:
|
||||||
usage()
|
usage()
|
||||||
|
|
||||||
|
@ -359,14 +363,16 @@ else:
|
||||||
'transaction bigint,'
|
'transaction bigint,'
|
||||||
'data_src bigint,'
|
'data_src bigint,'
|
||||||
'branch_type integer,'
|
'branch_type integer,'
|
||||||
'in_tx boolean)')
|
'in_tx boolean,'
|
||||||
|
'call_path_id bigint)')
|
||||||
|
|
||||||
if perf_db_export_calls:
|
if perf_db_export_calls or perf_db_export_callchains:
|
||||||
do_query(query, 'CREATE TABLE call_paths ('
|
do_query(query, 'CREATE TABLE call_paths ('
|
||||||
'id bigint NOT NULL,'
|
'id bigint NOT NULL,'
|
||||||
'parent_id bigint,'
|
'parent_id bigint,'
|
||||||
'symbol_id bigint,'
|
'symbol_id bigint,'
|
||||||
'ip bigint)')
|
'ip bigint)')
|
||||||
|
if perf_db_export_calls:
|
||||||
do_query(query, 'CREATE TABLE calls ('
|
do_query(query, 'CREATE TABLE calls ('
|
||||||
'id bigint NOT NULL,'
|
'id bigint NOT NULL,'
|
||||||
'thread_id bigint,'
|
'thread_id bigint,'
|
||||||
|
@ -428,7 +434,7 @@ do_query(query, 'CREATE VIEW comm_threads_view AS '
|
||||||
'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
|
'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
|
||||||
' FROM comm_threads')
|
' FROM comm_threads')
|
||||||
|
|
||||||
if perf_db_export_calls:
|
if perf_db_export_calls or perf_db_export_callchains:
|
||||||
do_query(query, 'CREATE VIEW call_paths_view AS '
|
do_query(query, 'CREATE VIEW call_paths_view AS '
|
||||||
'SELECT '
|
'SELECT '
|
||||||
'c.id,'
|
'c.id,'
|
||||||
|
@ -444,6 +450,7 @@ if perf_db_export_calls:
|
||||||
'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
|
'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
|
||||||
'(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
|
'(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
|
||||||
' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
|
' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
|
||||||
|
if perf_db_export_calls:
|
||||||
do_query(query, 'CREATE VIEW calls_view AS '
|
do_query(query, 'CREATE VIEW calls_view AS '
|
||||||
'SELECT '
|
'SELECT '
|
||||||
'calls.id,'
|
'calls.id,'
|
||||||
|
@ -541,8 +548,9 @@ dso_file = open_output_file("dso_table.bin")
|
||||||
symbol_file = open_output_file("symbol_table.bin")
|
symbol_file = open_output_file("symbol_table.bin")
|
||||||
branch_type_file = open_output_file("branch_type_table.bin")
|
branch_type_file = open_output_file("branch_type_table.bin")
|
||||||
sample_file = open_output_file("sample_table.bin")
|
sample_file = open_output_file("sample_table.bin")
|
||||||
if perf_db_export_calls:
|
if perf_db_export_calls or perf_db_export_callchains:
|
||||||
call_path_file = open_output_file("call_path_table.bin")
|
call_path_file = open_output_file("call_path_table.bin")
|
||||||
|
if perf_db_export_calls:
|
||||||
call_file = open_output_file("call_table.bin")
|
call_file = open_output_file("call_table.bin")
|
||||||
|
|
||||||
def trace_begin():
|
def trace_begin():
|
||||||
|
@ -554,8 +562,8 @@ def trace_begin():
|
||||||
comm_table(0, "unknown")
|
comm_table(0, "unknown")
|
||||||
dso_table(0, 0, "unknown", "unknown", "")
|
dso_table(0, 0, "unknown", "unknown", "")
|
||||||
symbol_table(0, 0, 0, 0, 0, "unknown")
|
symbol_table(0, 0, 0, 0, 0, "unknown")
|
||||||
sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
if perf_db_export_calls:
|
if perf_db_export_calls or perf_db_export_callchains:
|
||||||
call_path_table(0, 0, 0, 0)
|
call_path_table(0, 0, 0, 0)
|
||||||
|
|
||||||
unhandled_count = 0
|
unhandled_count = 0
|
||||||
|
@ -571,8 +579,9 @@ def trace_end():
|
||||||
copy_output_file(symbol_file, "symbols")
|
copy_output_file(symbol_file, "symbols")
|
||||||
copy_output_file(branch_type_file, "branch_types")
|
copy_output_file(branch_type_file, "branch_types")
|
||||||
copy_output_file(sample_file, "samples")
|
copy_output_file(sample_file, "samples")
|
||||||
if perf_db_export_calls:
|
if perf_db_export_calls or perf_db_export_callchains:
|
||||||
copy_output_file(call_path_file, "call_paths")
|
copy_output_file(call_path_file, "call_paths")
|
||||||
|
if perf_db_export_calls:
|
||||||
copy_output_file(call_file, "calls")
|
copy_output_file(call_file, "calls")
|
||||||
|
|
||||||
print datetime.datetime.today(), "Removing intermediate files..."
|
print datetime.datetime.today(), "Removing intermediate files..."
|
||||||
|
@ -585,8 +594,9 @@ def trace_end():
|
||||||
remove_output_file(symbol_file)
|
remove_output_file(symbol_file)
|
||||||
remove_output_file(branch_type_file)
|
remove_output_file(branch_type_file)
|
||||||
remove_output_file(sample_file)
|
remove_output_file(sample_file)
|
||||||
if perf_db_export_calls:
|
if perf_db_export_calls or perf_db_export_callchains:
|
||||||
remove_output_file(call_path_file)
|
remove_output_file(call_path_file)
|
||||||
|
if perf_db_export_calls:
|
||||||
remove_output_file(call_file)
|
remove_output_file(call_file)
|
||||||
os.rmdir(output_dir_name)
|
os.rmdir(output_dir_name)
|
||||||
print datetime.datetime.today(), "Adding primary keys"
|
print datetime.datetime.today(), "Adding primary keys"
|
||||||
|
@ -599,8 +609,9 @@ def trace_end():
|
||||||
do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
|
do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
|
||||||
do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
|
do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
|
||||||
do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
|
do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
|
||||||
if perf_db_export_calls:
|
if perf_db_export_calls or perf_db_export_callchains:
|
||||||
do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
|
do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
|
||||||
|
if perf_db_export_calls:
|
||||||
do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
|
do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
|
||||||
|
|
||||||
print datetime.datetime.today(), "Adding foreign keys"
|
print datetime.datetime.today(), "Adding foreign keys"
|
||||||
|
@ -623,10 +634,11 @@ def trace_end():
|
||||||
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
|
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
|
||||||
'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
|
'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
|
||||||
'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
|
'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
|
||||||
if perf_db_export_calls:
|
if perf_db_export_calls or perf_db_export_callchains:
|
||||||
do_query(query, 'ALTER TABLE call_paths '
|
do_query(query, 'ALTER TABLE call_paths '
|
||||||
'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),'
|
'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),'
|
||||||
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)')
|
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)')
|
||||||
|
if perf_db_export_calls:
|
||||||
do_query(query, 'ALTER TABLE calls '
|
do_query(query, 'ALTER TABLE calls '
|
||||||
'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
|
'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
|
||||||
'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
|
'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
|
||||||
|
@ -694,11 +706,11 @@ def branch_type_table(branch_type, name, *x):
|
||||||
value = struct.pack(fmt, 2, 4, branch_type, n, name)
|
value = struct.pack(fmt, 2, 4, branch_type, n, name)
|
||||||
branch_type_file.write(value)
|
branch_type_file.write(value)
|
||||||
|
|
||||||
def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
|
def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
|
||||||
if branches:
|
if branches:
|
||||||
value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
|
value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
|
||||||
else:
|
else:
|
||||||
value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
|
value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
|
||||||
sample_file.write(value)
|
sample_file.write(value)
|
||||||
|
|
||||||
def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
|
def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
|
||||||
|
|
|
@ -37,6 +37,8 @@ perf-y += topology.o
|
||||||
perf-y += cpumap.o
|
perf-y += cpumap.o
|
||||||
perf-y += stat.o
|
perf-y += stat.o
|
||||||
perf-y += event_update.o
|
perf-y += event_update.o
|
||||||
|
perf-y += event-times.o
|
||||||
|
perf-y += backward-ring-buffer.o
|
||||||
|
|
||||||
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
|
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
|
||||||
$(call rule_mkdir)
|
$(call rule_mkdir)
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue