aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S12
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S32
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c66
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c10
-rw-r--r--arch/x86/kernel/crash_dump_32.c1
-rw-r--r--arch/x86/kernel/e820_32.c3
-rw-r--r--arch/x86/kernel/e820_64.c3
-rw-r--r--arch/x86/kernel/machine_kexec_32.c22
-rw-r--r--arch/x86/kernel/machine_kexec_64.c27
-rw-r--r--arch/x86/kernel/mce_64.c39
-rw-r--r--arch/x86/kernel/msr.c35
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/setup_32.c49
-rw-r--r--arch/x86/kernel/setup_64.c39
-rw-r--r--arch/x86/kernel/signal_32.c4
-rw-r--r--arch/x86/kernel/suspend_64.c101
-rw-r--r--arch/x86/kernel/suspend_asm_64.S49
-rw-r--r--arch/x86/kernel/traps_32.c4
-rw-r--r--arch/x86/kernel/vsyscall_64.c23
19 files changed, 332 insertions, 189 deletions
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index f22ba85..a97313b 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -11,7 +11,7 @@
#
# If physical address of wakeup_code is 0x12345, BIOS should call us with
# cs = 0x1234, eip = 0x05
-#
+#
#define BEEP \
inb $97, %al; \
@@ -52,7 +52,6 @@ wakeup_code:
BEEP
1:
mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
- movw $0x0e00 + 'S', %fs:(0x12)
pushl $0 # Kill any dangerous flags
popfl
@@ -90,9 +89,6 @@ wakeup_code:
# make sure %cr4 is set correctly (features, etc)
movl real_save_cr4 - wakeup_code, %eax
movl %eax, %cr4
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'i', %fs:(0x12)
# need a gdt -- use lgdtl to force 32-bit operands, in case
# the GDT is located past 16 megabytes.
@@ -102,8 +98,6 @@ wakeup_code:
movl %eax, %cr0
jmp 1f
1:
- movw $0x0e00 + 'n', %fs:(0x14)
-
movl real_magic - wakeup_code, %eax
cmpl $0x12345678, %eax
jne bogus_real_magic
@@ -122,13 +116,11 @@ real_save_cr4: .long 0
real_magic: .long 0
video_mode: .long 0
realmode_flags: .long 0
-beep_flags: .long 0
real_efer_save_restore: .long 0
real_save_efer_edx: .long 0
real_save_efer_eax: .long 0
bogus_real_magic:
- movw $0x0e00 + 'B', %fs:(0x12)
jmp bogus_real_magic
/* This code uses an extended set of video mode numbers. These include:
@@ -194,7 +186,6 @@ wakeup_pmode_return:
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
- movw $0x0e00 + 'u', 0xb8016
# reload the gdt, as we need the full 32 bit address
lgdt saved_gdt
@@ -218,7 +209,6 @@ wakeup_pmode_return:
jmp *%eax
bogus_magic:
- movw $0x0e00 + 'B', 0xb8018
jmp bogus_magic
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8b4357e..55608ec 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -41,7 +41,6 @@ wakeup_code:
# Running in *copy* of this code, somewhere in low 1MB.
- movb $0xa1, %al ; outb %al, $0x80
cli
cld
# setup data segment
@@ -65,11 +64,6 @@ wakeup_code:
cmpl $0x12345678, %eax
jne bogus_real_magic
- call verify_cpu # Verify the cpu supports long
- # mode
- testl %eax, %eax
- jnz no_longmode
-
testl $1, realmode_flags - wakeup_code
jz 1f
lcall $0xc000,$3
@@ -84,12 +78,6 @@ wakeup_code:
call mode_set
1:
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'L', %fs:(0x10)
-
- movb $0xa2, %al ; outb %al, $0x80
-
mov %ds, %ax # Find 32bit wakeup_code addr
movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
shll $4, %esi
@@ -117,14 +105,10 @@ wakeup_32_vector:
.code32
wakeup_32:
# Running in this code, but at low address; paging is not yet turned on.
- movb $0xa5, %al ; outb %al, $0x80
movl $__KERNEL_DS, %eax
movl %eax, %ds
- movw $0x0e00 + 'i', %ds:(0xb8012)
- movb $0xa8, %al ; outb %al, $0x80;
-
/*
* Prepare for entering 64bits mode
*/
@@ -200,16 +184,11 @@ wakeup_long64:
*/
lgdt cpu_gdt_descr
- movw $0x0e00 + 'n', %ds:(0xb8014)
- movb $0xa9, %al ; outb %al, $0x80
-
movq saved_magic, %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
jne bogus_64_magic
- movw $0x0e00 + 'u', %ds:(0xb8016)
-
nop
nop
movw $__KERNEL_DS, %ax
@@ -220,13 +199,11 @@ wakeup_long64:
movw %ax, %gs
movq saved_rsp, %rsp
- movw $0x0e00 + 'x', %ds:(0xb8018)
movq saved_rbx, %rbx
movq saved_rdi, %rdi
movq saved_rsi, %rsi
movq saved_rbp, %rbp
- movw $0x0e00 + '!', %ds:(0xb801a)
movq saved_rip, %rax
jmp *%rax
@@ -256,21 +233,12 @@ realmode_flags: .quad 0
.code16
bogus_real_magic:
- movb $0xba,%al ; outb %al,$0x80
jmp bogus_real_magic
.code64
bogus_64_magic:
- movb $0xb3,%al ; outb %al,$0x80
jmp bogus_64_magic
-.code16
-no_longmode:
- movb $0xbc,%al ; outb %al,$0x80
- jmp no_longmode
-
-#include "../verify_cpu_64.S"
-
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
* NORMAL_VGA (-1)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1826395..297a241 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -499,6 +499,11 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {
static void free_cache_attributes(unsigned int cpu)
{
+ int i;
+
+ for (i = 0; i < num_cache_leaves; i++)
+ cache_remove_shared_cpu_map(cpu, i);
+
kfree(cpuid4_info[cpu]);
cpuid4_info[cpu] = NULL;
}
@@ -506,8 +511,8 @@ static void free_cache_attributes(unsigned int cpu)
static int __cpuinit detect_cache_attributes(unsigned int cpu)
{
struct _cpuid4_info *this_leaf;
- unsigned long j;
- int retval;
+ unsigned long j;
+ int retval;
cpumask_t oldmask;
if (num_cache_leaves == 0)
@@ -524,19 +529,26 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
goto out;
/* Do cpuid and store the results */
- retval = 0;
for (j = 0; j < num_cache_leaves; j++) {
this_leaf = CPUID4_INFO_IDX(cpu, j);
retval = cpuid4_cache_lookup(j, this_leaf);
- if (unlikely(retval < 0))
+ if (unlikely(retval < 0)) {
+ int i;
+
+ for (i = 0; i < j; i++)
+ cache_remove_shared_cpu_map(cpu, i);
break;
+ }
cache_shared_cpu_map_setup(cpu, j);
}
set_cpus_allowed(current, oldmask);
out:
- if (retval)
- free_cache_attributes(cpu);
+ if (retval) {
+ kfree(cpuid4_info[cpu]);
+ cpuid4_info[cpu] = NULL;
+ }
+
return retval;
}
@@ -669,7 +681,7 @@ static struct kobj_type ktype_percpu_entry = {
.sysfs_ops = &sysfs_ops,
};
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
+static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
{
kfree(cache_kobject[cpu]);
kfree(index_kobject[cpu]);
@@ -680,13 +692,14 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu)
static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
{
+ int err;
if (num_cache_leaves == 0)
return -ENOENT;
- detect_cache_attributes(cpu);
- if (cpuid4_info[cpu] == NULL)
- return -ENOENT;
+ err = detect_cache_attributes(cpu);
+ if (err)
+ return err;
/* Allocate all required memory */
cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -705,13 +718,15 @@ err_out:
return -ENOMEM;
}
+static cpumask_t cache_dev_map = CPU_MASK_NONE;
+
/* Add/Remove cache interface for CPU device */
static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i, j;
struct _index_kobject *this_object;
- int retval = 0;
+ int retval;
retval = cpuid4_cache_sysfs_init(cpu);
if (unlikely(retval < 0))
@@ -721,6 +736,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
kobject_set_name(cache_kobject[cpu], "%s", "cache");
cache_kobject[cpu]->ktype = &ktype_percpu_entry;
retval = kobject_register(cache_kobject[cpu]);
+ if (retval < 0) {
+ cpuid4_cache_sysfs_exit(cpu);
+ return retval;
+ }
for (i = 0; i < num_cache_leaves; i++) {
this_object = INDEX_KOBJECT_PTR(cpu,i);
@@ -740,6 +759,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
break;
}
}
+ if (!retval)
+ cpu_set(cpu, cache_dev_map);
+
return retval;
}
@@ -750,13 +772,14 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
if (cpuid4_info[cpu] == NULL)
return;
- for (i = 0; i < num_cache_leaves; i++) {
- cache_remove_shared_cpu_map(cpu, i);
+ if (!cpu_isset(cpu, cache_dev_map))
+ return;
+ cpu_clear(cpu, cache_dev_map);
+
+ for (i = 0; i < num_cache_leaves; i++)
kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
- }
kobject_unregister(cache_kobject[cpu]);
cpuid4_cache_sysfs_exit(cpu);
- return;
}
static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
@@ -781,7 +804,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
{
- .notifier_call = cacheinfo_cpu_callback,
+ .notifier_call = cacheinfo_cpu_callback,
};
static int __cpuinit cache_sysfs_init(void)
@@ -791,14 +814,15 @@ static int __cpuinit cache_sysfs_init(void)
if (num_cache_leaves == 0)
return 0;
- register_hotcpu_notifier(&cacheinfo_cpu_notifier);
-
for_each_online_cpu(i) {
- struct sys_device *sys_dev = get_cpu_sysdev((unsigned int)i);
+ int err;
+ struct sys_device *sys_dev = get_cpu_sysdev(i);
- cache_add_dev(sys_dev);
+ err = cache_add_dev(sys_dev);
+ if (err)
+ return err;
}
-
+ register_hotcpu_notifier(&cacheinfo_cpu_notifier);
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 494d320..24885be 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -131,17 +131,19 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
- int err;
+ int err = 0;
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(sys_dev);
mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
mutex_lock(&therm_cpu_lock);
@@ -149,7 +151,7 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
mutex_unlock(&therm_cpu_lock);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 32e75d0..72d0c56 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -47,6 +47,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!kdump_buf_page) {
printk(KERN_WARNING "Kdump: Kdump buffer page not"
" allocated\n");
+ kunmap_atomic(vaddr, KM_PTE0);
return -EFAULT;
}
copy_page(kdump_buf_page, vaddr);
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 3c86b97..d58039e 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -288,7 +288,8 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
request_resource(res, code_resource);
request_resource(res, data_resource);
#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
+ if (crashk_res.start != crashk_res.end)
+ request_resource(res, &crashk_res);
#endif
}
}
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index e422b81..5761686 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -226,7 +226,8 @@ void __init e820_reserve_resources(void)
request_resource(res, &code_resource);
request_resource(res, &data_resource);
#ifdef CONFIG_KEXEC
- request_resource(res, &crashk_res);
+ if (crashk_res.start != crashk_res.end)
+ request_resource(res, &crashk_res);
#endif
}
}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8459ca6..11b935f 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -149,28 +149,6 @@ NORET_TYPE void machine_kexec(struct kimage *image)
image->start, cpu_has_pae);
}
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never sets it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init parse_crashkernel(char *arg)
-{
- unsigned long size, base;
- size = memparse(arg, &arg);
- if (*arg == '@') {
- base = memparse(arg+1, &arg);
- /* FIXME: Do I want a sanity check
- * to validate the memory range?
- */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- return 0;
-}
-early_param("crashkernel", parse_crashkernel);
-
void arch_crash_save_vmcoreinfo(void)
{
#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7450b69..0d8577f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -231,33 +231,6 @@ NORET_TYPE void machine_kexec(struct kimage *image)
image->start);
}
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel. By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init setup_crashkernel(char *arg)
-{
- unsigned long size, base;
- char *p;
- if (!arg)
- return -EINVAL;
- size = memparse(arg, &p);
- if (arg == p)
- return -EINVAL;
- if (*p == '@') {
- base = memparse(p+1, &p);
- /* FIXME: Do I want a sanity check to validate the
- * memory range? Yes you do, but it's too early for
- * e820 -AK */
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- return 0;
-}
-early_param("crashkernel", setup_crashkernel);
-
void arch_crash_save_vmcoreinfo(void)
{
#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/mce_64.c
index 8ca8f86..66e6b79 100644
--- a/arch/x86/kernel/mce_64.c
+++ b/arch/x86/kernel/mce_64.c
@@ -802,16 +802,29 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if (!mce_available(&cpu_data[cpu]))
return -EIO;
+ memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
per_cpu(device_mce,cpu).id = cpu;
per_cpu(device_mce,cpu).cls = &mce_sysclass;
err = sysdev_register(&per_cpu(device_mce,cpu));
+ if (err)
+ return err;
+
+ for (i = 0; mce_attributes[i]; i++) {
+ err = sysdev_create_file(&per_cpu(device_mce,cpu),
+ mce_attributes[i]);
+ if (err)
+ goto error;
+ }
- if (!err) {
- for (i = 0; mce_attributes[i]; i++)
- sysdev_create_file(&per_cpu(device_mce,cpu),
- mce_attributes[i]);
+ return 0;
+error:
+ while (i--) {
+ sysdev_remove_file(&per_cpu(device_mce,cpu),
+ mce_attributes[i]);
}
+ sysdev_unregister(&per_cpu(device_mce,cpu));
+
return err;
}
@@ -823,7 +836,6 @@ static void mce_remove_device(unsigned int cpu)
sysdev_remove_file(&per_cpu(device_mce,cpu),
mce_attributes[i]);
sysdev_unregister(&per_cpu(device_mce,cpu));
- memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
@@ -831,18 +843,21 @@ static int
mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
+ int err = 0;
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- mce_create_device(cpu);
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ err = mce_create_device(cpu);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
mce_remove_device(cpu);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
static struct notifier_block mce_cpu_notifier = {
@@ -857,9 +872,13 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;
err = sysdev_class_register(&mce_sysclass);
+ if (err)
+ return err;
for_each_online_cpu(i) {
- mce_create_device(i);
+ err = mce_create_device(i);
+ if (err)
+ return err;
}
register_hotcpu_notifier(&mce_cpu_notifier);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index df85c9c..e18e516 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -133,37 +133,42 @@ static const struct file_operations msr_fops = {
.open = msr_open,
};
-static int __cpuinit msr_device_create(int i)
+static int __cpuinit msr_device_create(int cpu)
{
- int err = 0;
struct device *dev;
- dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), "msr%d",i);
- if (IS_ERR(dev))
- err = PTR_ERR(dev);
- return err;
+ dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu),
+ "msr%d", cpu);
+ return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+static void msr_device_destroy(int cpu)
+{
+ device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
}
static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
+ int err = 0;
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- msr_device_create(cpu);
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ err = msr_device_create(cpu);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
+ msr_device_destroy(cpu);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata msr_class_cpu_notifier =
-{
+static struct notifier_block __cpuinitdata msr_class_cpu_notifier = {
.notifier_call = msr_class_cpu_callback,
};
@@ -196,7 +201,7 @@ static int __init msr_init(void)
out_class:
i = 0;
for_each_online_cpu(i)
- device_destroy(msr_class, MKDEV(MSR_MAJOR, i));
+ msr_device_destroy(i);
class_destroy(msr_class);
out_chrdev:
unregister_chrdev(MSR_MAJOR, "cpu/msr");
@@ -208,7 +213,7 @@ static void __exit msr_exit(void)
{
int cpu = 0;
for_each_online_cpu(cpu)
- device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
+ msr_device_destroy(cpu);
class_destroy(msr_class);
unregister_chrdev(MSR_MAJOR, "cpu/msr");
unregister_hotcpu_notifier(&msr_class_cpu_notifier);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 097aeaf..044a477 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -301,7 +301,7 @@ void show_regs(struct pt_regs * regs)
unsigned long d0, d1, d2, d3, d6, d7;
printk("\n");
- printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+ printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
print_symbol("EIP is at %s\n", regs->eip);
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index b87a6fd..978dc01 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -378,6 +378,49 @@ extern unsigned long __init setup_memory(void);
extern void zone_sizes_init(void);
#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+static inline unsigned long long get_total_mem(void)
+{
+ unsigned long long total;
+
+ total = max_low_pfn - min_low_pfn;
+#ifdef CONFIG_HIGHMEM
+ total += highend_pfn - highstart_pfn;
+#endif
+
+ return total << PAGE_SHIFT;
+}
+
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+ unsigned long long total_mem;
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ total_mem = get_total_mem();
+
+ ret = parse_crashkernel(boot_command_line, total_mem,
+ &crash_size, &crash_base);
+ if (ret == 0 && crash_size > 0) {
+ if (crash_base > 0) {
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(total_mem >> 20));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ reserve_bootmem(crash_base, crash_size);
+ } else
+ printk(KERN_INFO "crashkernel reservation failed - "
+ "you have to specify a base address\n");
+ }
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
void __init setup_bootmem_allocator(void)
{
unsigned long bootmap_size;
@@ -453,11 +496,7 @@ void __init setup_bootmem_allocator(void)
}
}
#endif
-#ifdef CONFIG_KEXEC
- if (crashk_res.start != crashk_res.end)
- reserve_bootmem(crashk_res.start,
- crashk_res.end - crashk_res.start + 1);
-#endif
+ reserve_crashkernel();
}
/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 5a19f0c..cdcba69 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -191,6 +191,37 @@ static inline void copy_edd(void)
}
#endif
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+ unsigned long long free_mem;
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+
+ ret = parse_crashkernel(boot_command_line, free_mem,
+ &crash_size, &crash_base);
+ if (ret == 0 && crash_size) {
+ if (crash_base > 0) {
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(free_mem >> 20));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ reserve_bootmem(crash_base, crash_size);
+ } else
+ printk(KERN_INFO "crashkernel reservation failed - "
+ "you have to specify a base address\n");
+ }
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
#define EBDA_ADDR_POINTER 0x40E
unsigned __initdata ebda_addr;
@@ -357,13 +388,7 @@ void __init setup_arch(char **cmdline_p)
}
}
#endif
-#ifdef CONFIG_KEXEC
- if (crashk_res.start != crashk_res.end) {
- reserve_bootmem_generic(crashk_res.start,
- crashk_res.end - crashk_res.start + 1);
- }
-#endif
-
+ reserve_crashkernel();
paging_init();
#ifdef CONFIG_PCI
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 0d79df3..6dc394b 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -200,8 +200,8 @@ badframe:
if (show_unhandled_signals && printk_ratelimit())
printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
" esp:%lx oeax:%lx\n",
- current->pid > 1 ? KERN_INFO : KERN_EMERG,
- current->comm, current->pid, frame, regs->eip,
+ task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+ current->comm, task_pid_nr(current), frame, regs->eip,
regs->esp, regs->orig_eax);
force_sig(SIGSEGV, current);
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index 573c0a6..f8fafe5 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -150,8 +150,22 @@ void fix_processor_context(void)
/* Defined in arch/x86_64/kernel/suspend_asm.S */
extern int restore_image(void);
+/*
+ * Address to jump to in the last phase of restore in order to get to the image
+ * kernel's text (this value is passed in the image header).
+ */
+unsigned long restore_jump_address;
+
+/*
+ * Value of the cr3 register from before the hibernation (this value is passed
+ * in the image header).
+ */
+unsigned long restore_cr3;
+
pgd_t *temp_level4_pgt;
+void *relocated_restore_code;
+
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
long i, j;
@@ -175,7 +189,7 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en
if (paddr >= end)
break;
- pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
+ pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
pe &= __supported_pte_mask;
set_pmd(pmd, __pmd(pe));
}
@@ -183,25 +197,42 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en
return 0;
}
+static int res_kernel_text_pud_init(pud_t *pud, unsigned long start)
+{
+ pmd_t *pmd;
+ unsigned long paddr;
+
+ pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pmd)
+ return -ENOMEM;
+ set_pud(pud + pud_index(start), __pud(__pa(pmd) | _KERNPG_TABLE));
+ for (paddr = 0; paddr < KERNEL_TEXT_SIZE; pmd++, paddr += PMD_SIZE) {
+ unsigned long pe;
+
+ pe = __PAGE_KERNEL_LARGE_EXEC | _PAGE_GLOBAL | paddr;
+ pe &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(pe));
+ }
+
+ return 0;
+}
+
static int set_up_temporary_mappings(void)
{
unsigned long start, end, next;
+ pud_t *pud;
int error;
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!temp_level4_pgt)
return -ENOMEM;
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
/* Set up the direct mapping from scratch */
start = (unsigned long)pfn_to_kaddr(0);
end = (unsigned long)pfn_to_kaddr(end_pfn);
for (; start < end; start = next) {
- pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
next = start + PGDIR_SIZE;
@@ -212,7 +243,17 @@ static int set_up_temporary_mappings(void)
set_pgd(temp_level4_pgt + pgd_index(start),
mk_kernel_pgd(__pa(pud)));
}
- return 0;
+
+ /* Set up the kernel text mapping from scratch */
+ pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!pud)
+ return -ENOMEM;
+ error = res_kernel_text_pud_init(pud, __START_KERNEL_map);
+ if (!error)
+ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+ __pgd(__pa(pud) | _PAGE_TABLE));
+
+ return error;
}
int swsusp_arch_resume(void)
@@ -222,6 +263,13 @@ int swsusp_arch_resume(void)
/* We have got enough memory and from now on we cannot recover */
if ((error = set_up_temporary_mappings()))
return error;
+
+ relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
+ if (!relocated_restore_code)
+ return -ENOMEM;
+ memcpy(relocated_restore_code, &core_restore_code,
+ &restore_registers - &core_restore_code);
+
restore_image();
return 0;
}
@@ -236,4 +284,43 @@ int pfn_is_nosave(unsigned long pfn)
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
+
+struct restore_data_record {
+ unsigned long jump_address;
+ unsigned long cr3;
+ unsigned long magic;
+};
+
+#define RESTORE_MAGIC 0x0123456789ABCDEFUL
+
+/**
+ * arch_hibernation_header_save - populate the architecture specific part
+ * of a hibernation image header
+ * @addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct restore_data_record *rdr = addr;
+
+ if (max_size < sizeof(struct restore_data_record))
+ return -EOVERFLOW;
+ rdr->jump_address = restore_jump_address;
+ rdr->cr3 = restore_cr3;
+ rdr->magic = RESTORE_MAGIC;
+ return 0;
+}
+
+/**
+ * arch_hibernation_header_restore - read the architecture specific data
+ * from the hibernation image header
+ * @addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+ struct restore_data_record *rdr = addr;
+
+ restore_jump_address = rdr->jump_address;
+ restore_cr3 = rdr->cr3;
+ return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}
#endif /* CONFIG_HIBERNATION */
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S
index 16d183f..48344b6 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/kernel/suspend_asm_64.S
@@ -2,8 +2,8 @@
*
* Distribute under GPLv2.
*
- * swsusp_arch_resume may not use any stack, nor any variable that is
- * not "NoSave" during copying pages:
+ * swsusp_arch_resume must not use any stack or any nonlocal variables while
+ * copying pages:
*
* Its rewriting one kernel image with another. What is stack in "old"
* image could very well be data page in "new" image, and overwriting
@@ -36,6 +36,13 @@ ENTRY(swsusp_arch_suspend)
movq %r15, saved_context_r15(%rip)
pushfq ; popq saved_context_eflags(%rip)
+ /* save the address of restore_registers */
+ movq $restore_registers, %rax
+ movq %rax, restore_jump_address(%rip)
+ /* save cr3 */
+ movq %cr3, %rax
+ movq %rax, restore_cr3(%rip)
+
call swsusp_save
ret
@@ -54,7 +61,17 @@ ENTRY(restore_image)
movq %rcx, %cr3;
movq %rax, %cr4; # turn PGE back on
+ /* prepare to jump to the image kernel */
+ movq restore_jump_address(%rip), %rax
+ movq restore_cr3(%rip), %rbx
+
+ /* prepare to copy image data to their original locations */
movq restore_pblist(%rip), %rdx
+ movq relocated_restore_code(%rip), %rcx
+ jmpq *%rcx
+
+ /* code below has been relocated to a safe page */
+ENTRY(core_restore_code)
loop:
testq %rdx, %rdx
jz done
@@ -62,7 +79,7 @@ loop:
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
movq pbe_orig_address(%rdx), %rdi
- movq $512, %rcx
+ movq $(PAGE_SIZE >> 3), %rcx
rep
movsq
@@ -70,10 +87,22 @@ loop:
movq pbe_next(%rdx), %rdx
jmp loop
done:
+ /* jump to the restore_registers address from the image header */
+ jmpq *%rax
+ /*
+ * NOTE: This assumes that the boot kernel's text mapping covers the
+ * image kernel's page containing restore_registers and the address of
+ * this page is the same as in the image kernel's text mapping (it
+ * should always be true, because the text mapping is linear, starting
+ * from 0, and is supposed to cover the entire kernel text for every
+ * kernel).
+ *
+ * code below belongs to the image kernel
+ */
+
+ENTRY(restore_registers)
/* go back to the original page tables */
- movq $(init_level4_pgt - __START_KERNEL_map), %rax
- addq phys_base(%rip), %rax
- movq %rax, %cr3
+ movq %rbx, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
@@ -84,12 +113,9 @@ done:
movq %rcx, %cr3
movq %rax, %cr4; # turn PGE back on
- movl $24, %eax
- movl %eax, %ds
-
movq saved_context_esp(%rip), %rsp
movq saved_context_ebp(%rip), %rbp
- /* Don't restore %rax, it must be 0 anyway */
+ /* restore GPRs (we don't restore %rax, it must be 0 anyway) */
movq saved_context_ebx(%rip), %rbx
movq saved_context_ecx(%rip), %rcx
movq saved_context_edx(%rip), %rdx
@@ -107,4 +133,7 @@ done:
xorq %rax, %rax
+ /* tell the hibernation core that we've just restored the memory */
+ movq %rax, in_suspend(%rip)
+
ret
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index b132d39..1e9d572 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -316,7 +316,7 @@ void show_registers(struct pt_regs *regs)
printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
- TASK_COMM_LEN, current->comm, current->pid,
+ TASK_COMM_LEN, current->comm, task_pid_nr(current),
current_thread_info(), current, task_thread_info(current));
/*
* When in-kernel, we also print out the stack and code at the
@@ -622,7 +622,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
printk_ratelimit())
printk(KERN_INFO
"%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
- current->comm, current->pid,
+ current->comm, task_pid_nr(current),
regs->eip, regs->esp, error_code);
force_sig(SIGSEGV, current);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8a67e282..585541c 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -64,6 +64,16 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
.sysctl_enabled = 1,
};
+void update_vsyscall_tz(void)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ /* sys_tz has changed */
+ vsyscall_gtod_data.sys_tz = sys_tz;
+ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+}
+
void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
{
unsigned long flags;
@@ -77,7 +87,6 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
vsyscall_gtod_data.clock.shift = clock->shift;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
- vsyscall_gtod_data.sys_tz = sys_tz;
vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
@@ -163,7 +172,7 @@ time_t __vsyscall(1) vtime(time_t *t)
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
return time_syscall(t);
- vgettimeofday(&tv, 0);
+ vgettimeofday(&tv, NULL);
result = tv.tv_sec;
if (t)
*t = result;
@@ -257,18 +266,10 @@ out:
return ret;
}
-static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
static ctl_table kernel_table2[] = {
- { .ctl_name = 99, .procname = "vsyscall64",
+ { .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
.mode = 0644,
- .strategy = vsyscall_sysctl_nostrat,
.proc_handler = vsyscall_sysctl_change },
{}
};