[LITMUS^RT] RFC: record interrupt interference with Feather-Trace

Björn Brandenburg bbb at mpi-sws.org
Tue Jan 24 14:05:37 CET 2012


Hi all,

as you know, interrupt interference is problematic and can cause outliers during tracing. On my todo list I've had plans for a while to include some kind of flag in the Feather-Trace binary format to signal the occurrence of interrupts. This would allow us to filter samples that were disturbed by interrupts. Today I've whipped up a  a sample implementation (see patch below). I'll reply with another patch to ft_tools that adds support to ft_dump.

I've tested the below patch in KVM, but it could certainly use some additional testing on real HW. Please let me know what you think.

Thanks,
Björn

commit 91511443e99056f8cbefb63587e589623d385bf9
Author: Bjoern B. Brandenburg <bbb at cs.unc.edu>
Date:   Tue Jan 24 09:36:12 2012 +0100

    Feather-Trace: keep track of interrupt-related interference.
    
    Increment a processor-local counter whenever an interrupt is handled.
    This allows Feather-Trace to include a (truncated) counter and a flag
    to report interference from interrupts. This could be used to filter
    samples that were disturbed by interrupts.

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index ba36217..4827ce7 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -6,6 +6,8 @@
 #include <linux/ftrace_irq.h>
 #include <asm/hardirq.h>
 
+#include <litmus/trace_irq.h>
+
 /*
  * We put the hardirq and softirq counter into the preemption
  * counter. The bitmask has the following meaning:
@@ -186,6 +188,7 @@ extern void rcu_nmi_exit(void);
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
+		ft_irq_fired();				\
 	} while (0)
 
 /*
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index e2926a0..e809376 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -20,7 +20,9 @@ struct timestamp {
 	uint32_t		seq_no;
 	uint8_t			cpu;
 	uint8_t			event;
-	uint8_t			task_type;
+	uint8_t			task_type:2;
+	uint8_t			irq_flag:1;
+	uint8_t			irq_count:5;
 };
 
 /* tracing callbacks */
diff --git a/include/litmus/trace_irq.h b/include/litmus/trace_irq.h
new file mode 100644
index 0000000..f18b127
--- /dev/null
+++ b/include/litmus/trace_irq.h
@@ -0,0 +1,21 @@
+#ifndef _LITMUS_TRACE_IRQ_H_
+#define	_LITMUS_TRACE_IRQ_H_
+
+#ifdef CONFIG_SCHED_OVERHEAD_TRACE
+
+extern DEFINE_PER_CPU(atomic_t, irq_fired_count);
+
+static inline void ft_irq_fired(void)
+{
+	/* Only called with preemptions disabled.  */
+	atomic_inc(&__get_cpu_var(irq_fired_count));
+}
+
+
+#else
+
+#define ft_irq_fired() /* nothing to do */
+
+#endif
+
+#endif
diff --git a/litmus/trace.c b/litmus/trace.c
index 5d77806..3c35c52 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -16,6 +16,35 @@ static struct ftdev overhead_dev;
 
 static unsigned int ts_seq_no = 0;
 
+DEFINE_PER_CPU(atomic_t, irq_fired_count);
+
+static inline void clear_irq_fired(void)
+{
+	atomic_set(&__raw_get_cpu_var(irq_fired_count), 0);
+}
+
+static inline unsigned int get_and_clear_irq_fired(void)
+{
+	/* This is potentially not atomic  since we might migrate if
+	 * preemptions are not disabled. As a tradeoff between
+	 * accuracy and tracing overheads, this seems acceptable.
+	 * If it proves to be a problem, then one could add a callback
+	 * from the migration code to invalidate irq_fired_count.
+	 */
+	return atomic_xchg(&__raw_get_cpu_var(irq_fired_count), 0);
+}
+
+static inline void __save_irq_flags(struct timestamp *ts)
+{
+	unsigned int irq_count;
+
+	irq_count     = get_and_clear_irq_fired();
+	/* Store how many interrupts occurred. */
+	ts->irq_count = irq_count;
+	/* Extra flag because ts->irq_count overflows quickly. */
+	ts->irq_flag  = irq_count > 0;
+}
+
 static inline void __save_timestamp_cpu(unsigned long event,
 					uint8_t type, uint8_t cpu)
 {
@@ -24,10 +53,13 @@ static inline void __save_timestamp_cpu(unsigned long event,
 	seq_no = fetch_and_inc((int *) &ts_seq_no);
 	if (ft_buffer_start_write(trace_ts_buf, (void**)  &ts)) {
 		ts->event     = event;
-		ts->timestamp = ft_timestamp();
 		ts->seq_no    = seq_no;
 		ts->cpu       = cpu;
 		ts->task_type = type;
+		__save_irq_flags(ts);
+		barrier();
+		/* prevent re-ordering of ft_timestamp() */
+		ts->timestamp = ft_timestamp();
 		ft_buffer_finish_write(trace_ts_buf, ts);
 	}
 }
@@ -40,6 +72,7 @@ static void __add_timestamp_user(struct timestamp *pre_recorded)
 	if (ft_buffer_start_write(trace_ts_buf, (void**)  &ts)) {
 		*ts = *pre_recorded;
 		ts->seq_no = seq_no;
+		__save_irq_flags(ts);
 		ft_buffer_finish_write(trace_ts_buf, ts);
 	}
 }
@@ -90,6 +123,7 @@ feather_callback void save_task_latency(unsigned long event,
 		ts->seq_no    = seq_no;
 		ts->cpu       = cpu;
 		ts->task_type = TSK_RT;
+		__save_irq_flags(ts);
 		ft_buffer_finish_write(trace_ts_buf, ts);
 	}
 }
@@ -107,6 +141,10 @@ feather_callback void save_task_latency(unsigned long event,
 static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
 {
 	unsigned int count = NO_TIMESTAMPS;
+
+	/* An overhead-tracing timestamp should be exactly 16 bytes long. */
+	BUILD_BUG_ON(sizeof(struct timestamp) != 16);
+
 	while (count && !trace_ts_buf) {
 		printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
 		ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
@@ -149,7 +187,7 @@ out:
 
 static int __init init_ft_overhead_trace(void)
 {
-	int err;
+	int err, cpu;
 
 	printk("Initializing Feather-Trace overhead tracing device.\n");
 	err = ftdev_init(&overhead_dev, THIS_MODULE, 1, "ft_trace");
@@ -164,6 +202,11 @@ static int __init init_ft_overhead_trace(void)
 	if (err)
 		goto err_dealloc;
 
+	/* initialize IRQ flags */
+	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
+		clear_irq_fired();
+	}
+
 	return 0;
 
 err_dealloc:





More information about the litmus-dev mailing list