[LITMUS^RT] RFC: record interrupt interference with Feather-Trace
Andrea Bastoni
bastoni at cs.unc.edu
Tue Jan 24 21:46:24 CET 2012
On 01/24/2012 02:05 PM, Björn Brandenburg wrote:
> Hi all,
>
> as you know, interrupt interference is problematic and can cause outliers during tracing. On my todo list I've had plans for a while to include some kind of flag in the Feather-Trace binary format to signal the occurrence of interrupts. This would allow us to filter samples that were disturbed by interrupts. Today I've whipped up a a sample implementation (see patch below). I'll reply with another patch to ft_tools that adds support to ft_dump.
>
> I've tested the below patch in KVM, but it could certainly use some additional testing on real HW. Please let me know what you think.
>
> Thanks,
> Björn
>
> commit 91511443e99056f8cbefb63587e589623d385bf9
> Author: Bjoern B. Brandenburg <bbb at cs.unc.edu>
> Date: Tue Jan 24 09:36:12 2012 +0100
>
> Feather-Trace: keep track of interrupt-related interference.
>
> Increment a processor-local counter whenever an interrupt is handled.
> This allows Feather-Trace to include a (truncated) counter and a flag
> to report interference from interrupts. This could be used to filter
> samples that were disturbed by interrupts.
Thanks a lot! This definitely helps to improve measurement accuracy (and avoid
questions about "How did you cope with outliers?" at conferences ;))
> diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
> index ba36217..4827ce7 100644
> --- a/include/linux/hardirq.h
> +++ b/include/linux/hardirq.h
> @@ -6,6 +6,8 @@
> #include <linux/ftrace_irq.h>
> #include <asm/hardirq.h>
>
> +#include <litmus/trace_irq.h>
> +
> /*
> * We put the hardirq and softirq counter into the preemption
> * counter. The bitmask has the following meaning:
> @@ -186,6 +188,7 @@ extern void rcu_nmi_exit(void);
> account_system_vtime(current); \
> add_preempt_count(HARDIRQ_OFFSET); \
> trace_hardirq_enter(); \
> + ft_irq_fired(); \
> } while (0)
But isn't this one only tracing NMI's ? :)
Thanks,
- Andrea
> /*
> diff --git a/include/litmus/trace.h b/include/litmus/trace.h
> index e2926a0..e809376 100644
> --- a/include/litmus/trace.h
> +++ b/include/litmus/trace.h
> @@ -20,7 +20,9 @@ struct timestamp {
> uint32_t seq_no;
> uint8_t cpu;
> uint8_t event;
> - uint8_t task_type;
> + uint8_t task_type:2;
> + uint8_t irq_flag:1;
> + uint8_t irq_count:5;
> };
>
> /* tracing callbacks */
> diff --git a/include/litmus/trace_irq.h b/include/litmus/trace_irq.h
> new file mode 100644
> index 0000000..f18b127
> --- /dev/null
> +++ b/include/litmus/trace_irq.h
> @@ -0,0 +1,21 @@
> +#ifndef _LITMUS_TRACE_IRQ_H_
> +#define _LITMUS_TRACE_IRQ_H_
> +
> +#ifdef CONFIG_SCHED_OVERHEAD_TRACE
> +
> +extern DEFINE_PER_CPU(atomic_t, irq_fired_count);
> +
> +static inline void ft_irq_fired(void)
> +{
> + /* Only called with preemptions disabled. */
> + atomic_inc(&__get_cpu_var(irq_fired_count));
> +}
> +
> +
> +#else
> +
> +#define ft_irq_fired() /* nothing to do */
> +
> +#endif
> +
> +#endif
> diff --git a/litmus/trace.c b/litmus/trace.c
> index 5d77806..3c35c52 100644
> --- a/litmus/trace.c
> +++ b/litmus/trace.c
> @@ -16,6 +16,35 @@ static struct ftdev overhead_dev;
>
> static unsigned int ts_seq_no = 0;
>
> +DEFINE_PER_CPU(atomic_t, irq_fired_count);
> +
> +static inline void clear_irq_fired(void)
> +{
> + atomic_set(&__raw_get_cpu_var(irq_fired_count), 0);
> +}
> +
> +static inline unsigned int get_and_clear_irq_fired(void)
> +{
> + /* This is potentially not atomic since we might migrate if
> + * preemptions are not disabled. As a tradeoff between
> + * accuracy and tracing overheads, this seems acceptable.
> + * If it proves to be a problem, then one could add a callback
> + * from the migration code to invalidate irq_fired_count.
> + */
> + return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0);
> +}
> +
> +static inline void __save_irq_flags(struct timestamp *ts)
> +{
> + unsigned int irq_count;
> +
> + irq_count = get_and_clear_irq_fired();
> + /* Store how many interrupts occurred. */
> + ts->irq_count = irq_count;
> + /* Extra flag because ts->irq_count overflows quickly. */
> + ts->irq_flag = irq_count > 0;
> +}
> +
> static inline void __save_timestamp_cpu(unsigned long event,
> uint8_t type, uint8_t cpu)
> {
> @@ -24,10 +53,13 @@ static inline void __save_timestamp_cpu(unsigned long event,
> seq_no = fetch_and_inc((int *) &ts_seq_no);
> if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
> ts->event = event;
> - ts->timestamp = ft_timestamp();
> ts->seq_no = seq_no;
> ts->cpu = cpu;
> ts->task_type = type;
> + __save_irq_flags(ts);
> + barrier();
> + /* prevent re-ordering of ft_timestamp() */
> + ts->timestamp = ft_timestamp();
> ft_buffer_finish_write(trace_ts_buf, ts);
> }
> }
> @@ -40,6 +72,7 @@ static void __add_timestamp_user(struct timestamp *pre_recorded)
> if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
> *ts = *pre_recorded;
> ts->seq_no = seq_no;
> + __save_irq_flags(ts);
> ft_buffer_finish_write(trace_ts_buf, ts);
> }
> }
> @@ -90,6 +123,7 @@ feather_callback void save_task_latency(unsigned long event,
> ts->seq_no = seq_no;
> ts->cpu = cpu;
> ts->task_type = TSK_RT;
> + __save_irq_flags(ts);
> ft_buffer_finish_write(trace_ts_buf, ts);
> }
> }
> @@ -107,6 +141,10 @@ feather_callback void save_task_latency(unsigned long event,
> static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
> {
> unsigned int count = NO_TIMESTAMPS;
> +
> + /* An overhead-tracing timestamp should be exactly 16 bytes long. */
> + BUILD_BUG_ON(sizeof(struct timestamp) != 16);
> +
> while (count && !trace_ts_buf) {
> printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
> ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
> @@ -149,7 +187,7 @@ out:
>
> static int __init init_ft_overhead_trace(void)
> {
> - int err;
> + int err, cpu;
>
> printk("Initializing Feather-Trace overhead tracing device.\n");
> err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace");
> @@ -164,6 +202,11 @@ static int __init init_ft_overhead_trace(void)
> if (err)
> goto err_dealloc;
>
> + /* initialize IRQ flags */
> + for (cpu = 0; cpu < NR_CPUS; cpu++) {
> + clear_irq_fired();
> + }
> +
> return 0;
>
> err_dealloc:
>
>
> _______________________________________________
> litmus-dev mailing list
> litmus-dev at lists.litmus-rt.org
> https://lists.litmus-rt.org/listinfo/litmus-dev
>
More information about the litmus-dev
mailing list