1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
#include <sys/clock.h>
#include <sys/panic.h>
#include <sys/atomic.h>
#include <sys/hypervisor.h>
#include <sys/archsystm.h>
/*
* On the hypervisor, we have a virtualized system time based upon the
* information provided for each VCPU, which is updated every time it is
* scheduled onto a real CPU. Thus, none of the traditional code in
* i86pc/os/timestamp.c applies, our gethrtime() implementation is run through
* the PSM, and there is no scaling step to apply.
*
* However, the platform does not guarantee monotonicity; thus we have to fake
* this up, which is a deeply unpleasant thing to have to do.
*
* Note that the virtualized interface still relies on the current TSC to
* calculate the time in nanoseconds since the VCPU was scheduled, and is thus
* subject to all the problems with that. For the most part, the hypervisor is
* supposed to deal with them.
*
* Another wrinkle involves suspend/resume/migration. If we come back and time
* is apparently less, we may have resumed on a different machine or on the
* same machine after a reboot. In this case we need to maintain an addend to
* ensure time continues reasonably. Otherwise we could end up taking a very
* long time to expire cyclics in the heap. Thus we have two functions:
*
* xpv_getsystime()
*
* The unadulterated system time from the hypervisor. This is only to be
* used when programming the hypervisor (setting a timer or calculating
* the TOD).
*
* xpv_gethrtime()
*
* This is the monotonic hrtime counter to be used by everything else such
* as the cyclic subsystem. We should never pass an hrtime directly into
* a hypervisor interface, as hrtime_addend may well be non-zero.
*/
int hrtime_fake_mt = 1;
static volatile hrtime_t hrtime_last;
static hrtime_t hrtime_suspend_time;
static hrtime_t hrtime_addend;
volatile uint32_t hres_lock;
hrtime_t hres_last_tick;
int64_t hrestime_adj;
volatile timestruc_t hrestime;
/*
* These functions are used in DTrace probe context, and must be removed from
* fbt consideration. Currently fbt ignores all weak symbols, so this will
* achieve that.
*/
#pragma weak xpv_gethrtime = dtrace_xpv_gethrtime
#pragma weak xpv_getsystime = dtrace_xpv_getsystime
#pragma weak dtrace_gethrtime = dtrace_xpv_gethrtime
#pragma weak tsc_read = dtrace_xpv_gethrtime
hrtime_t
dtrace_xpv_getsystime(void)
{
vcpu_time_info_t *src;
vcpu_time_info_t __vti, *dst = &__vti;
uint64_t tsc_delta;
uint64_t tsc;
hrtime_t result;
uint32_t stamp;
src = &CPU->cpu_m.mcpu_vcpu_info->time;
/*
* Loop until version has not been changed during our update, and a Xen
* update is not under way (lowest bit is set).
*/
do {
dst->version = src->version;
stamp = CPU->cpu_m.mcpu_istamp;
membar_consumer();
dst->tsc_timestamp = src->tsc_timestamp;
dst->system_time = src->system_time;
dst->tsc_to_system_mul = src->tsc_to_system_mul;
dst->tsc_shift = src->tsc_shift;
/*
* Note that this use of the -actual- TSC register
* should probably be the SOLE one in the system on this
* paravirtualized platform.
*/
tsc = __rdtsc_insn();
tsc_delta = tsc - dst->tsc_timestamp;
membar_consumer();
} while (((src->version & 1) | (dst->version ^ src->version)) ||
CPU->cpu_m.mcpu_istamp != stamp);
if (dst->tsc_shift >= 0)
tsc_delta <<= dst->tsc_shift;
else if (dst->tsc_shift < 0)
tsc_delta >>= -dst->tsc_shift;
result = dst->system_time +
((uint64_t)(tsc_delta * (uint64_t)dst->tsc_to_system_mul) >> 32);
return (result);
}
hrtime_t
dtrace_xpv_gethrtime(void)
{
hrtime_t result = xpv_getsystime() + hrtime_addend;
if (hrtime_fake_mt) {
hrtime_t last;
do {
last = hrtime_last;
if (result < last)
result = last + 1;
} while (atomic_cas_64((volatile uint64_t *)&hrtime_last,
last, result) != last);
}
return (result);
}
void
xpv_time_suspend(void)
{
hrtime_suspend_time = xpv_getsystime();
}
void
xpv_time_resume(void)
{
hrtime_t delta = xpv_getsystime() - hrtime_suspend_time;
if (delta < 0)
hrtime_addend += -delta;
}
|