aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/tsc.c19
-rw-r--r--arch/x86/kernel/vmlinux.lds.S34
-rw-r--r--arch/x86/kernel/vread_tsc_64.c36
-rw-r--r--arch/x86/kernel/vsyscall_64.c48
6 files changed, 74 insertions, 73 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2508064..f5abe3a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -8,7 +8,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
-CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_pvclock.o = -pg
@@ -24,13 +23,16 @@ endif
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
CFLAGS_hpet.o := $(nostackp)
-CFLAGS_tsc.o := $(nostackp)
+CFLAGS_vread_tsc_64.o := $(nostackp)
CFLAGS_paravirt.o := $(nostackp)
GCOV_PROFILE_vsyscall_64.o := n
GCOV_PROFILE_hpet.o := n
GCOV_PROFILE_tsc.o := n
GCOV_PROFILE_paravirt.o := n
+# vread_tsc_64 is hot and should be fully optimized:
+CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls
+
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
@@ -39,7 +41,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
-obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
+obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 25a28a2..00cbb27 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -23,7 +23,7 @@
#include <asm/time.h>
#ifdef CONFIG_X86_64
-volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
#endif
unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 9335bf7..6cc6922 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -763,25 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs)
ret : clocksource_tsc.cycle_last;
}
-#ifdef CONFIG_X86_64
-static cycle_t __vsyscall_fn vread_tsc(void)
-{
- cycle_t ret;
-
- /*
- * Surround the RDTSC by barriers, to make sure it's not
- * speculated to outside the seqlock critical section and
- * does not cause time warps:
- */
- rdtsc_barrier();
- ret = (cycle_t)vget_cycles();
- rdtsc_barrier();
-
- return ret >= __vsyscall_gtod_data.clock.cycle_last ?
- ret : __vsyscall_gtod_data.clock.cycle_last;
-}
-#endif
-
static void resume_tsc(struct clocksource *cs)
{
clocksource_tsc.cycle_last = 0;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 61682f0..89aed99 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -161,6 +161,12 @@ SECTIONS
#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+#define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \
+ ADDR(.vsyscall_0) + offset \
+ : AT(VLOAD(.vsyscall_var_ ## x)) { \
+ *(.vsyscall_var_ ## x) \
+ } \
+ x = VVIRT(.vsyscall_var_ ## x);
. = ALIGN(4096);
__vsyscall_0 = .;
@@ -175,18 +181,6 @@ SECTIONS
*(.vsyscall_fn)
}
- . = ALIGN(L1_CACHE_BYTES);
- .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
- *(.vsyscall_gtod_data)
- }
-
- vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
- .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
- *(.vsyscall_clock)
- }
- vsyscall_clock = VVIRT(.vsyscall_clock);
-
-
.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
*(.vsyscall_1)
}
@@ -194,21 +188,14 @@ SECTIONS
*(.vsyscall_2)
}
- .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
- *(.vgetcpu_mode)
- }
- vgetcpu_mode = VVIRT(.vgetcpu_mode);
-
- . = ALIGN(L1_CACHE_BYTES);
- .jiffies : AT(VLOAD(.jiffies)) {
- *(.jiffies)
- }
- jiffies = VVIRT(.jiffies);
-
.vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
*(.vsyscall_3)
}
+#define __VVAR_KERNEL_LDS
+#include <asm/vvar.h>
+#undef __VVAR_KERNEL_LDS
+
. = __vsyscall_0 + PAGE_SIZE;
#undef VSYSCALL_ADDR
@@ -216,6 +203,7 @@ SECTIONS
#undef VLOAD
#undef VVIRT_OFFSET
#undef VVIRT
+#undef EMIT_VVAR
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c
new file mode 100644
index 0000000..a81aa9e
--- /dev/null
+++ b/arch/x86/kernel/vread_tsc_64.c
@@ -0,0 +1,36 @@
+/* This code runs in userspace. */
+
+#define DISABLE_BRANCH_PROFILING
+#include <asm/vgtod.h>
+
+notrace cycle_t __vsyscall_fn vread_tsc(void)
+{
+ cycle_t ret;
+ u64 last;
+
+ /*
+ * Empirically, a fence (of type that depends on the CPU)
+ * before rdtsc is enough to ensure that rdtsc is ordered
+ * with respect to loads. The various CPU manuals are unclear
+ * as to whether rdtsc can be reordered with later loads,
+ * but no one has ever seen it happen.
+ */
+ rdtsc_barrier();
+ ret = (cycle_t)vget_cycles();
+
+ last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+
+ if (likely(ret >= last))
+ return ret;
+
+ /*
+ * GCC likes to generate cmov here, but this branch is extremely
+ * predictable (it's just a funciton of time and the likely is
+ * very likely) and there's a data dependence, so force GCC
+ * to generate a branch instead. I don't barrier() because
+ * we don't actually need a barrier, and if this function
+ * ever gets inlined it will generate worse code.
+ */
+ asm volatile ("");
+ return last;
+}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c..3e68218 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -49,17 +49,10 @@
__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
#define __syscall_clobber "r11","cx","memory"
-/*
- * vsyscall_gtod_data contains data that is :
- * - readonly from vsyscalls
- * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
- * Try to keep this structure as small as possible to avoid cache line ping pongs
- */
-int __vgetcpu_mode __section_vgetcpu_mode;
-
-struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
+DEFINE_VVAR(int, vgetcpu_mode);
+DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
{
- .lock = SEQLOCK_UNLOCKED,
+ .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
.sysctl_enabled = 1,
};
@@ -97,7 +90,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
*/
static __always_inline void do_get_tz(struct timezone * tz)
{
- *tz = __vsyscall_gtod_data.sys_tz;
+ *tz = VVAR(vsyscall_gtod_data).sys_tz;
}
static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
@@ -126,23 +119,24 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
unsigned long mult, shift, nsec;
cycle_t (*vread)(void);
do {
- seq = read_seqbegin(&__vsyscall_gtod_data.lock);
+ seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
- vread = __vsyscall_gtod_data.clock.vread;
- if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
+ vread = VVAR(vsyscall_gtod_data).clock.vread;
+ if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled ||
+ !vread)) {
gettimeofday(tv,NULL);
return;
}
now = vread();
- base = __vsyscall_gtod_data.clock.cycle_last;
- mask = __vsyscall_gtod_data.clock.mask;
- mult = __vsyscall_gtod_data.clock.mult;
- shift = __vsyscall_gtod_data.clock.shift;
+ base = VVAR(vsyscall_gtod_data).clock.cycle_last;
+ mask = VVAR(vsyscall_gtod_data).clock.mask;
+ mult = VVAR(vsyscall_gtod_data).clock.mult;
+ shift = VVAR(vsyscall_gtod_data).clock.shift;
- tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
- nsec = __vsyscall_gtod_data.wall_time_nsec;
- } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
+ tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec;
+ nsec = VVAR(vsyscall_gtod_data).wall_time_nsec;
+ } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
/* calculate interval: */
cycle_delta = (now - base) & mask;
@@ -171,15 +165,15 @@ time_t __vsyscall(1) vtime(time_t *t)
{
unsigned seq;
time_t result;
- if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
+ if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
return time_syscall(t);
do {
- seq = read_seqbegin(&__vsyscall_gtod_data.lock);
+ seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
- result = __vsyscall_gtod_data.wall_time_sec;
+ result = VVAR(vsyscall_gtod_data).wall_time_sec;
- } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
+ } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
if (t)
*t = result;
@@ -208,9 +202,9 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
We do this here because otherwise user space would do it on
its own in a likely inferior way (no access to jiffies).
If you don't like it pass NULL. */
- if (tcache && tcache->blob[0] == (j = __jiffies)) {
+ if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) {
p = tcache->blob[1];
- } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
+ } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
native_read_tscp(&p);
} else {