Skip to content

Commit 5d1638a

Browse files
Preeti U MurthyKAGA-KOKO
Preeti U Murthy
authored andcommitted
tick: Introduce hrtimer based broadcast
On some architectures, in certain CPU deep idle states the local timers stop. An external clock device is used to wakeup these CPUs. The kernel support for the wakeup of these CPUs is provided by the tick broadcast framework by using the external clock device as the wakeup source. However not all implementations of architectures provide such an external clock device. This patch includes support in the broadcast framework to handle the wakeup of the CPUs in deep idle states on such systems by queuing a hrtimer on one of the CPUs, which is meant to handle the wakeup of CPUs in deep idle states. This patchset introduces a pseudo clock device which can be registered by the archs as tick_broadcast_device in the absence of a real external clock device. Once registered, the broadcast framework will work as is for these architectures as long as the archs take care of the BROADCAST_ENTER notification failing for one of the CPUs. This CPU is made the stand by CPU to handle wakeup of the CPUs in deep idle and it *must not enter deep idle states*. The CPU with the earliest wakeup is chosen to be this CPU. Hence this way the stand by CPU dynamically moves around and so does the hrtimer which is queued to trigger at the next earliest wakeup time. This is consistent with the case where an external clock device is present. The smp affinity of this clock device is set to the CPU with the earliest wakeup. This patchset handles the hotplug of the stand by CPU as well by moving the hrtimer on to the CPU handling the CPU_DEAD notification. Originally-from: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Cc: rafael.j.wysocki@intel.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080632.17187.80532.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
1 parent ba8f20c commit 5d1638a

File tree

4 files changed

+167
-4
lines changed

4 files changed

+167
-4
lines changed

include/linux/clockchips.h

+9
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ enum clock_event_mode {
6262
#define CLOCK_EVT_FEAT_DYNIRQ 0x000020
6363
#define CLOCK_EVT_FEAT_PERCPU 0x000040
6464

65+
/*
66+
* Clockevent device is based on a hrtimer for broadcast
67+
*/
68+
#define CLOCK_EVT_FEAT_HRTIMER 0x000080
69+
6570
/**
6671
* struct clock_event_device - clock event device descriptor
6772
* @event_handler: Assigned by the framework to be called by the low
@@ -83,6 +88,7 @@ enum clock_event_mode {
8388
* @name: ptr to clock event name
8489
* @rating: variable to rate clock event devices
8590
* @irq: IRQ number (only for non CPU local devices)
91+
* @bound_on: Bound on CPU
8692
* @cpumask: cpumask to indicate for which CPUs this device works
8793
* @list: list head for the management code
8894
* @owner: module reference
@@ -113,6 +119,7 @@ struct clock_event_device {
113119
const char *name;
114120
int rating;
115121
int irq;
122+
int bound_on;
116123
const struct cpumask *cpumask;
117124
struct list_head list;
118125
struct module *owner;
@@ -180,9 +187,11 @@ extern int tick_receive_broadcast(void);
180187
#endif
181188

182189
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
190+
extern void tick_setup_hrtimer_broadcast(void);
183191
extern int tick_check_broadcast_expired(void);
184192
#else
185193
static inline int tick_check_broadcast_expired(void) { return 0; }
194+
static void tick_setup_hrtimer_broadcast(void) {};
186195
#endif
187196

188197
#ifdef CONFIG_GENERIC_CLOCKEVENTS

kernel/time/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o
33

44
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
55
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
6-
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
6+
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o tick-broadcast-hrtimer.o
77
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
88
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
99
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o

kernel/time/tick-broadcast-hrtimer.c

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* linux/kernel/time/tick-broadcast-hrtimer.c
3+
* This file emulates a local clock event device
4+
* via a pseudo clock device.
5+
*/
6+
#include <linux/cpu.h>
7+
#include <linux/err.h>
8+
#include <linux/hrtimer.h>
9+
#include <linux/interrupt.h>
10+
#include <linux/percpu.h>
11+
#include <linux/profile.h>
12+
#include <linux/clockchips.h>
13+
#include <linux/sched.h>
14+
#include <linux/smp.h>
15+
#include <linux/module.h>
16+
17+
#include "tick-internal.h"
18+
19+
static struct hrtimer bctimer;
20+
21+
static void bc_set_mode(enum clock_event_mode mode,
22+
struct clock_event_device *bc)
23+
{
24+
switch (mode) {
25+
case CLOCK_EVT_MODE_SHUTDOWN:
26+
/*
27+
* Note, we cannot cancel the timer here as we might
28+
* run into the following live lock scenario:
29+
*
30+
* cpu 0 cpu1
31+
* lock(broadcast_lock);
32+
* hrtimer_interrupt()
33+
* bc_handler()
34+
* tick_handle_oneshot_broadcast();
35+
* lock(broadcast_lock);
36+
* hrtimer_cancel()
37+
* wait_for_callback()
38+
*/
39+
hrtimer_try_to_cancel(&bctimer);
40+
break;
41+
default:
42+
break;
43+
}
44+
}
45+
46+
/*
47+
* This is called from the guts of the broadcast code when the cpu
48+
* which is about to enter idle has the earliest broadcast timer event.
49+
*/
50+
static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
51+
{
52+
/*
53+
* We try to cancel the timer first. If the callback is on
54+
* flight on some other cpu then we let it handle it. If we
55+
* were able to cancel the timer nothing can rearm it as we
56+
* own broadcast_lock.
57+
*
58+
* However we can also be called from the event handler of
59+
* ce_broadcast_hrtimer itself when it expires. We cannot
60+
* restart the timer because we are in the callback, but we
61+
* can set the expiry time and let the callback return
62+
* HRTIMER_RESTART.
63+
*/
64+
if (hrtimer_try_to_cancel(&bctimer) >= 0) {
65+
hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED);
66+
/* Bind the "device" to the cpu */
67+
bc->bound_on = smp_processor_id();
68+
} else if (bc->bound_on == smp_processor_id()) {
69+
hrtimer_set_expires(&bctimer, expires);
70+
}
71+
return 0;
72+
}
73+
74+
static struct clock_event_device ce_broadcast_hrtimer = {
75+
.set_mode = bc_set_mode,
76+
.set_next_ktime = bc_set_next,
77+
.features = CLOCK_EVT_FEAT_ONESHOT |
78+
CLOCK_EVT_FEAT_KTIME |
79+
CLOCK_EVT_FEAT_HRTIMER,
80+
.rating = 0,
81+
.bound_on = -1,
82+
.min_delta_ns = 1,
83+
.max_delta_ns = KTIME_MAX,
84+
.min_delta_ticks = 1,
85+
.max_delta_ticks = KTIME_MAX,
86+
.mult = 1,
87+
.shift = 0,
88+
.cpumask = cpu_all_mask,
89+
};
90+
91+
static enum hrtimer_restart bc_handler(struct hrtimer *t)
92+
{
93+
ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
94+
95+
if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX)
96+
return HRTIMER_NORESTART;
97+
98+
return HRTIMER_RESTART;
99+
}
100+
101+
void tick_setup_hrtimer_broadcast(void)
102+
{
103+
hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
104+
bctimer.function = bc_handler;
105+
clockevents_register_device(&ce_broadcast_hrtimer);
106+
}

kernel/time/tick-broadcast.c

+51-3
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,42 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
643643
raw_spin_unlock(&tick_broadcast_lock);
644644
}
645645

646+
static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
647+
{
648+
if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
649+
return 0;
650+
if (bc->next_event.tv64 == KTIME_MAX)
651+
return 0;
652+
return bc->bound_on == cpu ? -EBUSY : 0;
653+
}
654+
655+
static void broadcast_shutdown_local(struct clock_event_device *bc,
656+
struct clock_event_device *dev)
657+
{
658+
/*
659+
* For hrtimer based broadcasting we cannot shutdown the cpu
660+
* local device if our own event is the first one to expire or
661+
* if we own the broadcast timer.
662+
*/
663+
if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
664+
if (broadcast_needs_cpu(bc, smp_processor_id()))
665+
return;
666+
if (dev->next_event.tv64 < bc->next_event.tv64)
667+
return;
668+
}
669+
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
670+
}
671+
672+
static void broadcast_move_bc(int deadcpu)
673+
{
674+
struct clock_event_device *bc = tick_broadcast_device.evtdev;
675+
676+
if (!bc || !broadcast_needs_cpu(bc, deadcpu))
677+
return;
678+
/* This moves the broadcast assignment to this cpu */
679+
clockevents_program_event(bc, bc->next_event, 1);
680+
}
681+
646682
/*
647683
* Powerstate information: The system enters/leaves a state, where
648684
* affected devices might stop
@@ -661,7 +697,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
661697
* states
662698
*/
663699
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
664-
return;
700+
return 0;
665701

666702
/*
667703
* We are called with preemtion disabled from the depth of the
@@ -672,15 +708,15 @@ int tick_broadcast_oneshot_control(unsigned long reason)
672708
dev = td->evtdev;
673709

674710
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
675-
return;
711+
return 0;
676712

677713
bc = tick_broadcast_device.evtdev;
678714

679715
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
680716
if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
681717
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
682718
WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
683-
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
719+
broadcast_shutdown_local(bc, dev);
684720
/*
685721
* We only reprogram the broadcast timer if we
686722
* did not mark ourself in the force mask and
@@ -693,6 +729,16 @@ int tick_broadcast_oneshot_control(unsigned long reason)
693729
dev->next_event.tv64 < bc->next_event.tv64)
694730
tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
695731
}
732+
/*
733+
* If the current CPU owns the hrtimer broadcast
734+
* mechanism, it cannot go deep idle and we remove the
735+
* CPU from the broadcast mask. We don't have to go
736+
* through the EXIT path as the local timer is not
737+
* shutdown.
738+
*/
739+
ret = broadcast_needs_cpu(bc, cpu);
740+
if (ret)
741+
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
696742
} else {
697743
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
698744
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
@@ -866,6 +912,8 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
866912
cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
867913
cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
868914

915+
broadcast_move_bc(cpu);
916+
869917
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
870918
}
871919

0 commit comments

Comments
 (0)