1/* $NetBSD: clock.c,v 1.64 2016/06/12 09:08:09 jnemeth Exp $ */
2
3/*
4 *
5 * Copyright (c) 2004 Christian Limpach.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "opt_xen.h"
30
31#include <sys/cdefs.h>
32__KERNEL_RCSID(0, "$NetBSD: clock.c,v 1.64 2016/06/12 09:08:09 jnemeth Exp $");
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/time.h>
37#include <sys/timetc.h>
38#include <sys/timevar.h>
39#include <sys/kernel.h>
40#include <sys/device.h>
41#include <sys/sysctl.h>
42
43#include <xen/xen.h>
44#include <xen/hypervisor.h>
45#include <xen/evtchn.h>
46#include <xen/xen-public/vcpu.h>
47#include <machine/cpu_counter.h>
48
49#include <dev/clock_subr.h>
50#include <x86/rtc.h>
51
52static int xen_timer_handler(void *, struct intrframe *);
53
54/* A timecounter: Xen system_time extrapolated with a TSC. */
55u_int xen_get_timecount(struct timecounter*);
56static struct timecounter xen_timecounter = {
57 .tc_get_timecount = xen_get_timecount,
58 .tc_poll_pps = NULL,
59 .tc_counter_mask = ~0U,
60 .tc_frequency = 1000000000ULL,
61 .tc_name = "xen_system_time",
62 .tc_quality = 10000 /*
63 * This needs to take precedence over any hardware
64 * timecounters (e.g., ACPI in Xen3 dom0), because
65 * they can't correct for Xen scheduling latency.
66 */
67};
68
69/* These are periodically updated in shared_info, and then copied here. */
70struct shadow {
71 uint64_t tsc_stamp;
72 uint64_t system_time;
73 unsigned long time_version; /* XXXSMP */
74 uint32_t freq_mul;
75 int8_t freq_shift;
76 struct timespec ts;
77};
78
79/* Protects volatile variables ci_shadow & xen_clock_bias */
80static kmutex_t tmutex;
81
82/* Per CPU shadow time values */
83static volatile struct shadow ci_shadow[MAXCPUS];
84
85/* The time when the last hardclock(9) call should have taken place,
86 * per cpu.
87 */
88static volatile uint64_t vcpu_system_time[MAXCPUS];
89
90/*
91 * The clock (as returned by xen_get_timecount) may need to be held
92 * back to maintain the illusion that hardclock(9) was called when it
93 * was supposed to be, not when Xen got around to scheduling us.
94 */
95static volatile uint64_t xen_clock_bias[MAXCPUS];
96
97#ifdef DOM0OPS
98/* If we're dom0, send our time to Xen every minute or so. */
99int xen_timepush_ticks = 0;
100static callout_t xen_timepush_co;
101#endif
102
103#define NS_PER_TICK (1000000000ULL/hz)
104
105/*
106 * Reads a consistent set of time-base values from Xen, into a shadow data
107 * area. Must be called at splhigh (per timecounter requirements).
108 */
109static void
110get_time_values_from_xen(struct cpu_info *ci)
111{
112
113 volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
114
115 volatile struct vcpu_time_info *t = &ci->ci_vcpu->time;
116 uint32_t tversion;
117
118 KASSERT(mutex_owned(&tmutex));
119
120 do {
121 shadow->time_version = t->version;
122 xen_rmb();
123 shadow->tsc_stamp = t->tsc_timestamp;
124 shadow->system_time = t->system_time;
125 shadow->freq_mul = t->tsc_to_system_mul;
126 shadow->freq_shift = t->tsc_shift;
127 xen_rmb();
128 } while ((t->version & 1) || (shadow->time_version != t->version));
129 do {
130 tversion = HYPERVISOR_shared_info->wc_version;
131 xen_rmb();
132 shadow->ts.tv_sec = HYPERVISOR_shared_info->wc_sec;
133 shadow->ts.tv_nsec = HYPERVISOR_shared_info->wc_nsec;
134 xen_rmb();
135 } while ((HYPERVISOR_shared_info->wc_version & 1) ||
136 (tversion != HYPERVISOR_shared_info->wc_version));
137}
138
139/*
140 * Are the values we have up to date?
141 */
142static inline int
143time_values_up_to_date(struct cpu_info *ci)
144{
145 int rv;
146
147 volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
148
149 KASSERT(ci != NULL);
150 KASSERT(mutex_owned(&tmutex));
151
152 xen_rmb();
153 rv = shadow->time_version == ci->ci_vcpu->time.version;
154 xen_rmb();
155
156 return rv;
157}
158
159/*
160 * Xen 3 helpfully provides the CPU clock speed in the form of a multiplier
161 * and shift that can be used to convert a cycle count into nanoseconds
162 * without using an actual (slow) divide insn.
163 */
164static inline uint64_t
165scale_delta(uint64_t delta, uint32_t mul_frac, int8_t shift)
166{
167 if (shift < 0)
168 delta >>= -shift;
169 else
170 delta <<= shift;
171
172 /*
173 * Here, we multiply a 64-bit and a 32-bit value, and take the top
174 * 64 bits of that 96-bit product. This is broken up into two
175 * 32*32=>64-bit multiplies and a 64-bit add. The casts are needed
176 * to hint to GCC that both multiplicands really are 32-bit; the
177 * generated code is still fairly bad, but not insanely so.
178 */
179 return ((uint64_t)(uint32_t)(delta >> 32) * mul_frac)
180 + ((((uint64_t)(uint32_t)(delta & 0xFFFFFFFF)) * mul_frac) >> 32);
181}
182
183/*
184 * Use cycle counter to determine ns elapsed since last Xen time update.
185 * Must be called at splhigh (per timecounter requirements).
186 */
187static uint64_t
188get_tsc_offset_ns(struct cpu_info *ci)
189{
190 uint64_t tsc_delta, offset;
191 volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
192
193 KASSERT(mutex_owned(&tmutex));
194 tsc_delta = cpu_counter() - shadow->tsc_stamp;
195 offset = scale_delta(tsc_delta, shadow->freq_mul,
196 shadow->freq_shift);
197
198 return offset;
199}
200
201/*
202 * Returns the current system_time on given vcpu, taking care that the
203 * timestamp used is valid for the TSC measurement in question. Xen2
204 * doesn't ensure that this won't step backwards, so we enforce
205 * monotonicity on our own in that case. Must be called at splhigh.
206 */
207static uint64_t
208get_vcpu_time(struct cpu_info *ci)
209{
210 uint64_t offset, stime;
211 volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
212
213
214 KASSERT(mutex_owned(&tmutex));
215 do {
216 get_time_values_from_xen(ci);
217 offset = get_tsc_offset_ns(ci);
218 stime = shadow->system_time + offset;
219 /* if the timestamp went stale before we used it, refresh */
220
221 } while (!time_values_up_to_date(ci));
222
223 return stime;
224}
225
226static void
227xen_wall_time(struct timespec *wt)
228{
229 uint64_t nsec;
230
231 struct cpu_info *ci = curcpu();
232 volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
233
234 mutex_enter(&tmutex);
235 do {
236 /*
237 * Under Xen3, shadow->ts is the wall time less system time
238 * get_vcpu_time() will update shadow
239 */
240 nsec = get_vcpu_time(ci);
241 *wt = shadow->ts;
242 nsec += wt->tv_nsec;
243 } while (!time_values_up_to_date(ci));
244 mutex_exit(&tmutex);
245
246 wt->tv_sec += nsec / 1000000000L;
247 wt->tv_nsec = nsec % 1000000000L;
248}
249
250static int
251xen_rtc_get(todr_chip_handle_t todr, struct timeval *tvp)
252{
253 struct timespec wt;
254
255 xen_wall_time(&wt);
256 tvp->tv_sec = wt.tv_sec;
257 tvp->tv_usec = wt.tv_nsec / 1000;
258
259 return 0;
260}
261
262static int
263xen_rtc_set(todr_chip_handle_t todr, struct timeval *tvp)
264{
265#ifdef DOM0OPS
266#if __XEN_INTERFACE_VERSION__ < 0x00030204
267 dom0_op_t op;
268#else
269 xen_platform_op_t op;
270#endif
271 if (xendomain_is_privileged()) {
272 /* needs to set the RTC chip too */
273 struct clock_ymdhms dt;
274 clock_secs_to_ymdhms(tvp->tv_sec, &dt);
275 rtc_set_ymdhms(NULL, &dt);
276
277#if __XEN_INTERFACE_VERSION__ < 0x00030204
278 op.cmd = DOM0_SETTIME;
279#else
280 op.cmd = XENPF_settime;
281#endif
282 /* XXX is rtc_offset handled correctly everywhere? */
283 op.u.settime.secs = tvp->tv_sec;
284 op.u.settime.nsecs = tvp->tv_usec * 1000;
285 mutex_enter(&tmutex);
286 op.u.settime.system_time = get_vcpu_time(curcpu());
287 mutex_exit(&tmutex);
288#if __XEN_INTERFACE_VERSION__ < 0x00030204
289 return HYPERVISOR_dom0_op(&op);
290#else
291 return HYPERVISOR_platform_op(&op);
292#endif
293 }
294#endif
295
296 return 0;
297}
298
299void
300startrtclock(void)
301{
302 static struct todr_chip_handle tch;
303 tch.todr_gettime = xen_rtc_get;
304 tch.todr_settime = xen_rtc_set;
305 tch.todr_setwen = NULL;
306
307 todr_attach(&tch);
308}
309
310/*
311 * Wait approximately `n' microseconds.
312 */
313void
314xen_delay(unsigned int n)
315{
316 struct cpu_info *ci = curcpu();
317 volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
318
319 if (n < 500000) {
320 /*
321 * shadow->system_time is updated every hz tick, it's not
322 * precise enough for short delays. Use the CPU counter
323 * instead. We assume it's working at this point.
324 */
325 uint64_t cc, cc2, when;
326
327 cc = cpu_counter();
328 when = cc + (uint64_t)n * cpu_frequency(ci) / 1000000LL;
329 if (when < cc) {
330 /* wait for counter to wrap */
331 cc2 = cpu_counter();
332 while (cc2 > cc)
333 cc2 = cpu_counter();
334 }
335 cc2 = cpu_counter();
336 while (cc2 < when)
337 cc2 = cpu_counter();
338
339 return;
340 } else {
341 uint64_t when;
342
343 /* for large delays, shadow->system_time is OK */
344 mutex_enter(&tmutex);
345 get_time_values_from_xen(ci);
346 when = shadow->system_time + n * 1000;
347 while (shadow->system_time < when) {
348 mutex_exit(&tmutex);
349 HYPERVISOR_yield();
350 mutex_enter(&tmutex);
351 get_time_values_from_xen(ci);
352 }
353 mutex_exit(&tmutex);
354 }
355}
356
357#ifdef DOM0OPS
358/* ARGSUSED */
359static void
360xen_timepush(void *arg)
361{
362 callout_t *co = arg;
363
364 resettodr();
365 if (xen_timepush_ticks > 0)
366 callout_schedule(co, xen_timepush_ticks);
367}
368
369/* ARGSUSED */
370static int
371sysctl_xen_timepush(SYSCTLFN_ARGS)
372{
373 int error, new_ticks;
374 struct sysctlnode node;
375
376 new_ticks = xen_timepush_ticks;
377 node = *rnode;
378 node.sysctl_data = &new_ticks;
379 error = sysctl_lookup(SYSCTLFN_CALL(&node));
380 if (error || newp == NULL)
381 return error;
382
383 if (new_ticks < 0)
384 return EINVAL;
385 if (new_ticks != xen_timepush_ticks) {
386 xen_timepush_ticks = new_ticks;
387 if (new_ticks > 0)
388 callout_schedule(&xen_timepush_co, new_ticks);
389 else
390 callout_stop(&xen_timepush_co);
391 }
392
393 return 0;
394}
395#endif
396
397/* ARGSUSED */
398u_int
399xen_get_timecount(struct timecounter *tc)
400{
401 uint64_t ns;
402
403 struct cpu_info *ci = curcpu();
404
405 mutex_enter(&tmutex);
406 ns = get_vcpu_time(ci) - xen_clock_bias[ci->ci_cpuid];
407 mutex_exit(&tmutex);
408
409 return (u_int)ns;
410}
411
412/*
413 * Needs to be called per-cpu, from the local cpu, since VIRQ_TIMER is
414 * bound per-cpu
415 */
416
417static struct evcnt hardclock_called[MAXCPUS];
418
419void
420xen_initclocks(void)
421{
422 int err __diagused;
423 static bool tcdone = false;
424
425 struct cpu_info *ci = curcpu();
426 volatile struct shadow *shadow = &ci_shadow[ci->ci_cpuid];
427
428 xen_clock_bias[ci->ci_cpuid] = 0;
429
430 evcnt_attach_dynamic(&hardclock_called[ci->ci_cpuid],
431 EVCNT_TYPE_INTR,
432 NULL,
433 device_xname(ci->ci_dev),
434 "hardclock");
435
436#ifdef DOM0OPS
437 if (!tcdone) { /* Do this only once */
438 callout_init(&xen_timepush_co, 0);
439 }
440#endif
441
442 if (!tcdone) { /* Do this only once */
443 mutex_init(&tmutex, MUTEX_DEFAULT, IPL_CLOCK);
444 }
445 mutex_enter(&tmutex);
446 get_time_values_from_xen(ci);
447 vcpu_system_time[ci->ci_cpuid] = shadow->system_time;
448 mutex_exit(&tmutex);
449 if (!tcdone) { /* Do this only once */
450 tc_init(&xen_timecounter);
451 }
452
453 /* The splhigh requirements start here. */
454 xen_resumeclocks(ci);
455
456 /*
457 * The periodic timer looks buggy, we stop receiving events
458 * after a while. Use the one-shot timer every NS_PER_TICK
459 * and rearm it from the event handler.
460 */
461 if (XEN_MAJOR(xen_version) > 3 || XEN_MINOR(xen_version) > 0) {
462 /* exists only on Xen 3.1 and later */
463 err = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
464 ci->ci_cpuid,
465 NULL);
466 KASSERT(err == 0);
467 }
468
469 err = HYPERVISOR_set_timer_op(
470 vcpu_system_time[ci->ci_cpuid] + NS_PER_TICK);
471 KASSERT(err == 0);
472
473#ifdef DOM0OPS
474 const struct sysctlnode *node = NULL;
475
476 if (!tcdone) { /* Do this only once */
477
478 xen_timepush_ticks = 53 * hz + 3; /* avoid exact # of min/sec */
479 if (xendomain_is_privileged()) {
480 sysctl_createv(NULL, 0, NULL, &node, 0,
481 CTLTYPE_NODE, "xen",
482 SYSCTL_DESCR("Xen top level node"),
483 NULL, 0, NULL, 0,
484 CTL_MACHDEP, CTL_CREATE, CTL_EOL);
485 if (node != NULL) {
486 sysctl_createv(NULL, 0, &node, NULL,
487 CTLFLAG_READWRITE, CTLTYPE_INT,
488 "timepush_ticks",
489 SYSCTL_DESCR("How often to update the "
490 "hypervisor's time-of-day; 0 to disable"),
491 sysctl_xen_timepush, 0,
492 &xen_timepush_ticks, 0,
493 CTL_CREATE, CTL_EOL);
494 }
495 callout_reset(&xen_timepush_co, xen_timepush_ticks,
496 &xen_timepush, &xen_timepush_co);
497 }
498 }
499#endif
500 tcdone = true;
501}
502
503void
504xen_suspendclocks(struct cpu_info *ci)
505{
506 int evtch;
507
508 evtch = unbind_virq_from_evtch(VIRQ_TIMER);
509 KASSERT(evtch != -1);
510
511 hypervisor_mask_event(evtch);
512 event_remove_handler(evtch, (int (*)(void *))xen_timer_handler, ci);
513
514 aprint_verbose("Xen clock: removed event channel %d\n", evtch);
515}
516
517void
518xen_resumeclocks(struct cpu_info *ci)
519{
520 int evtch;
521
522 evtch = bind_virq_to_evtch(VIRQ_TIMER);
523 KASSERT(evtch != -1);
524
525 event_set_handler(evtch, (int (*)(void *))xen_timer_handler,
526 ci, IPL_CLOCK, "clock");
527 hypervisor_enable_event(evtch);
528
529 aprint_verbose("Xen clock: using event channel %d\n", evtch);
530}
531
532/* ARGSUSED */
533static int
534xen_timer_handler(void *arg, struct intrframe *regs)
535{
536 int64_t delta;
537 struct cpu_info *ci = curcpu();
538 KASSERT(arg == ci);
539 int err;
540again:
541 mutex_enter(&tmutex);
542 delta = (int64_t)(get_vcpu_time(ci) - vcpu_system_time[ci->ci_cpuid]);
543 mutex_exit(&tmutex);
544
545 /* Several ticks may have passed without our being run; catch up. */
546 while (delta >= (int64_t)NS_PER_TICK) {
547 mutex_enter(&tmutex);
548 vcpu_system_time[ci->ci_cpuid] += NS_PER_TICK;
549 xen_clock_bias[ci->ci_cpuid] = (delta -= NS_PER_TICK);
550 mutex_exit(&tmutex);
551 hardclock((struct clockframe *)regs);
552 hardclock_called[ci->ci_cpuid].ev_count++;
553 }
554
555 /*
556 * rearm the timer. If it fails it's probably because the date
557 * is in the past, update our local time and try again.
558 */
559 err = HYPERVISOR_set_timer_op(
560 vcpu_system_time[ci->ci_cpuid] + NS_PER_TICK);
561 if (err)
562 goto again;
563
564 if (xen_clock_bias[ci->ci_cpuid]) {
565 mutex_enter(&tmutex);
566 xen_clock_bias[ci->ci_cpuid] = 0;
567 mutex_exit(&tmutex);
568 }
569
570 return 0;
571}
572
573void
574setstatclockrate(int arg)
575{
576}
577
578void
579idle_block(void)
580{
581 KASSERT(curcpu()->ci_ipending == 0);
582 HYPERVISOR_block();
583}
584