aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Makefile2
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c17
-rw-r--r--arch/x86/kernel/head.c53
-rw-r--r--arch/x86/mm/fault.c9
-rw-r--r--drivers/block/xen-blkback/xenbus.c49
-rw-r--r--drivers/dca/dca-core.c2
-rw-r--r--drivers/firewire/core-device.c4
-rw-r--r--drivers/media/rc/rc-main.c4
-rw-r--r--drivers/staging/comedi/comedi_fops.c8
-rw-r--r--drivers/staging/comedi/drivers/ni_labpc.c35
-rw-r--r--drivers/target/target_core_fabric_configfs.c8
-rw-r--r--drivers/vhost/vhost.c2
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ocfs2/suballoc.c7
-rw-r--r--fs/ocfs2/suballoc.h2
-rw-r--r--include/linux/auto_fs.h25
-rw-r--r--include/linux/quota.h1
-rw-r--r--include/linux/sched.h11
-rw-r--r--kernel/cpuset.c12
-rw-r--r--kernel/posix-timers.c7
-rw-r--r--kernel/ptrace.c63
-rw-r--r--kernel/sched.c3
-rw-r--r--kernel/signal.c17
-rw-r--r--kernel/sysctl_binary.c3
-rw-r--r--kernel/trace/ftrace.c46
-rw-r--r--lib/idr.c9
-rw-r--r--net/sunrpc/svc_xprt.c15
30 files changed, 279 insertions, 158 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 397ee05..3899234 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -531,6 +531,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
UART at the specified I/O port or MMIO address,
switching to the matching ttyS device later. The
options are the same as for ttyS, above.
+ hvc<n> Use the hypervisor console device <n>. This is for
+ both Xen and PowerPC hypervisors.
If the device connected to the port is not a TTY but a braille
device, prepend "brl," before the device type, for instance
@@ -679,6 +681,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
earlyprintk= [X86,SH,BLACKFIN]
earlyprintk=vga
+ earlyprintk=xen
earlyprintk=serial[,ttySn[,baudrate]]
earlyprintk=ttySn[,baudrate]
earlyprintk=dbgp[debugController#]
@@ -696,6 +699,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
The VGA output is eventually overwritten by the real
console.
+ The xen output can only be used by Xen PV guests.
+
ekgdboc= [X86,KGDB] Allow early kernel console debugging
ekgdboc=kbd
diff --git a/Makefile b/Makefile
index c077b6af..4eb8f72 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 3
PATCHLEVEL = 0
-SUBLEVEL = 67
+SUBLEVEL = 68
EXTRAVERSION =
NAME = Sneaky Weasel
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 25ab200..f9804b7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -590,7 +590,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
* it into the save area
*/
save_fp_regs(&vcpu->arch.guest_fpregs);
- save_access_regs(vcpu->run->s.regs.acrs);
+ save_access_regs(vcpu->arch.guest_acrs);
if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
vcpu->arch.guest_fpregs.fprs, 128, prefix))
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 33df6e8..d86aa3f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1363,6 +1363,7 @@ static struct syscore_ops amd_iommu_syscore_ops = {
*/
static int __init amd_iommu_init(void)
{
+ struct amd_iommu *iommu;
int i, ret = 0;
/*
@@ -1411,9 +1412,6 @@ static int __init amd_iommu_init(void)
if (amd_iommu_pd_alloc_bitmap == NULL)
goto free;
- /* init the device table */
- init_device_table();
-
/*
* let all alias entries point to itself
*/
@@ -1463,6 +1461,12 @@ static int __init amd_iommu_init(void)
if (ret)
goto free_disable;
+ /* init the device table */
+ init_device_table();
+
+ for_each_iommu(iommu)
+ iommu_flush_all_caches(iommu);
+
amd_iommu_init_api();
amd_iommu_init_notifier();
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f5373df..db4f704 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,12 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
}
early_param("x2apic_phys", set_x2apic_phys_mode);
+static bool x2apic_fadt_phys(void)
+{
+ if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+ printk(KERN_DEBUG "System requires x2apic physical mode\n");
+ return true;
+ }
+ return false;
+}
+
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- if (x2apic_phys)
- return x2apic_enabled();
- else
- return 0;
+ return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
}
static void
@@ -108,7 +115,7 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && x2apic_phys)
+ if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699b..f6c4674 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -5,8 +5,6 @@
#include <asm/setup.h>
#include <asm/bios_ebda.h>
-#define BIOS_LOWMEM_KILOBYTES 0x413
-
/*
* The BIOS places the EBDA/XBDA at the top of conventional
* memory, and usually decreases the reported amount of
@@ -16,17 +14,30 @@
* chipset: reserve a page before VGA to prevent PCI prefetch
* into it (errata #56). Usually the page is reserved anyways,
* unless you have no PS/2 mouse plugged in.
+ *
+ * This functions is deliberately very conservative. Losing
+ * memory in the bottom megabyte is rarely a problem, as long
+ * as we have enough memory to install the trampoline. Using
+ * memory that is in use by the BIOS or by some DMA device
+ * the BIOS didn't shut down *is* a big problem.
*/
+
+#define BIOS_LOWMEM_KILOBYTES 0x413
+#define LOWMEM_CAP 0x9f000U /* Absolute maximum */
+#define INSANE_CUTOFF 0x20000U /* Less than this = insane */
+
void __init reserve_ebda_region(void)
{
unsigned int lowmem, ebda_addr;
- /* To determine the position of the EBDA and the */
- /* end of conventional memory, we need to look at */
- /* the BIOS data area. In a paravirtual environment */
- /* that area is absent. We'll just have to assume */
- /* that the paravirt case can handle memory setup */
- /* correctly, without our help. */
+ /*
+ * To determine the position of the EBDA and the
+ * end of conventional memory, we need to look at
+ * the BIOS data area. In a paravirtual environment
+ * that area is absent. We'll just have to assume
+ * that the paravirt case can handle memory setup
+ * correctly, without our help.
+ */
if (paravirt_enabled())
return;
@@ -37,19 +48,23 @@ void __init reserve_ebda_region(void)
/* start of EBDA area */
ebda_addr = get_bios_ebda();
- /* Fixup: bios puts an EBDA in the top 64K segment */
- /* of conventional memory, but does not adjust lowmem. */
- if ((lowmem - ebda_addr) <= 0x10000)
- lowmem = ebda_addr;
+ /*
+ * Note: some old Dells seem to need 4k EBDA without
+ * reporting so, so just consider the memory above 0x9f000
+ * to be off limits (bugzilla 2990).
+ */
+
+ /* If the EBDA address is below 128K, assume it is bogus */
+ if (ebda_addr < INSANE_CUTOFF)
+ ebda_addr = LOWMEM_CAP;
- /* Fixup: bios does not report an EBDA at all. */
- /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
- if ((ebda_addr == 0) && (lowmem >= 0x9f000))
- lowmem = 0x9f000;
+ /* If lowmem is less than 128K, assume it is bogus */
+ if (lowmem < INSANE_CUTOFF)
+ lowmem = LOWMEM_CAP;
- /* Paranoia: should never happen, but... */
- if ((lowmem == 0) || (lowmem >= 0x100000))
- lowmem = 0x9f000;
+ /* Use the lower of the lowmem and EBDA markers as the cutoff */
+ lowmem = min(lowmem, ebda_addr);
+ lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */
/* reserve all memory between lowmem and the 1MB mark */
memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2dbf6bf..3b2ad91 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -720,12 +720,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (is_errata100(regs, address))
return;
- if (unlikely(show_unhandled_signals))
+ /* Kernel addresses are always protection faults: */
+ if (address >= TASK_SIZE)
+ error_code |= PF_PROT;
+
+ if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
- /* Kernel addresses are always protection faults: */
tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 6cc0db1..97ded25 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -400,6 +400,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
be->blkif = NULL;
}
+ kfree(be->mode);
kfree(be);
dev_set_drvdata(&dev->dev, NULL);
return 0;
@@ -482,6 +483,7 @@ static void backend_changed(struct xenbus_watch *watch,
= container_of(watch, struct backend_info, backend_watch);
struct xenbus_device *dev = be->dev;
int cdrom = 0;
+ unsigned long handle;
char *device_type;
DPRINTK("");
@@ -501,10 +503,10 @@ static void backend_changed(struct xenbus_watch *watch,
return;
}
- if ((be->major || be->minor) &&
- ((be->major != major) || (be->minor != minor))) {
- pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
- be->major, be->minor, major, minor);
+ if (be->major | be->minor) {
+ if (be->major != major || be->minor != minor)
+ pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
+ be->major, be->minor, major, minor);
return;
}
@@ -522,36 +524,33 @@ static void backend_changed(struct xenbus_watch *watch,
kfree(device_type);
}
- if (be->major == 0 && be->minor == 0) {
- /* Front end dir is a number, which is used as the handle. */
-
- char *p = strrchr(dev->otherend, '/') + 1;
- long handle;
- err = strict_strtoul(p, 0, &handle);
- if (err)
- return;
+ /* Front end dir is a number, which is used as the handle. */
+ err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
+ if (err)
+ return;
- be->major = major;
- be->minor = minor;
+ be->major = major;
+ be->minor = minor;
- err = xen_vbd_create(be->blkif, handle, major, minor,
- (NULL == strchr(be->mode, 'w')), cdrom);
- if (err) {
- be->major = 0;
- be->minor = 0;
- xenbus_dev_fatal(dev, err, "creating vbd structure");
- return;
- }
+ err = xen_vbd_create(be->blkif, handle, major, minor,
+ !strchr(be->mode, 'w'), cdrom);
+ if (err)
+ xenbus_dev_fatal(dev, err, "creating vbd structure");
+ else {
err = xenvbd_sysfs_addif(dev);
if (err) {
xen_vbd_free(&be->blkif->vbd);
- be->major = 0;
- be->minor = 0;
xenbus_dev_fatal(dev, err, "creating sysfs entries");
- return;
}
+ }
+ if (err) {
+ kfree(be->mode);
+ be->mode = NULL;
+ be->major = 0;
+ be->minor = 0;
+ } else {
/* We're potentially connected now */
xen_update_blkif_status(be->blkif);
}
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 7065851..605fd20 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -410,7 +410,7 @@ void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
spin_lock_irqsave(&dca_lock, flags);
if (list_empty(&dca_domains)) {
- raw_spin_unlock_irqrestore(&dca_lock, flags);
+ spin_unlock_irqrestore(&dca_lock, flags);
return;
}
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 9f661e0..812cea3 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -995,6 +995,10 @@ static void fw_device_init(struct work_struct *work)
ret = idr_pre_get(&fw_device_idr, GFP_KERNEL) ?
idr_get_new(&fw_device_idr, device, &minor) :
-ENOMEM;
+ if (minor >= 1 << MINORBITS) {
+ idr_remove(&fw_device_idr, minor);
+ minor = -ENOSPC;
+ }
up_write(&fw_device_rwsem);
if (ret < 0)
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 9cfb56d..62910ac 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -775,8 +775,10 @@ static ssize_t show_protocols(struct device *device,
} else if (dev->raw) {
enabled = dev->raw->enabled_protocols;
allowed = ir_raw_get_allowed_protocols();
- } else
+ } else {
+ mutex_unlock(&dev->lock);
return -ENODEV;
+ }
IR_dprintk(1, "allowed - 0x%llx, enabled - 0x%llx\n",
(long long)allowed,
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 4b9d8f0..ee33cba 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1577,7 +1577,7 @@ static unsigned int comedi_poll(struct file *file, poll_table * wait)
mask = 0;
read_subdev = comedi_get_read_subdevice(dev_file_info);
- if (read_subdev) {
+ if (read_subdev && read_subdev->async) {
poll_wait(file, &read_subdev->async->wait_head, wait);
if (!read_subdev->busy
|| comedi_buf_read_n_available(read_subdev->async) > 0
@@ -1587,7 +1587,7 @@ static unsigned int comedi_poll(struct file *file, poll_table * wait)
}
}
write_subdev = comedi_get_write_subdevice(dev_file_info);
- if (write_subdev) {
+ if (write_subdev && write_subdev->async) {
poll_wait(file, &write_subdev->async->wait_head, wait);
comedi_buf_write_alloc(write_subdev->async,
write_subdev->async->prealloc_bufsz);
@@ -1629,7 +1629,7 @@ static ssize_t comedi_write(struct file *file, const char __user *buf,
}
s = comedi_get_write_subdevice(dev_file_info);
- if (s == NULL) {
+ if (s == NULL || s->async == NULL) {
retval = -EIO;
goto done;
}
@@ -1740,7 +1740,7 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes,
}
s = comedi_get_read_subdevice(dev_file_info);
- if (s == NULL) {
+ if (s == NULL || s->async == NULL) {
retval = -EIO;
goto done;
}
diff --git a/drivers/staging/comedi/drivers/ni_labpc.c b/drivers/staging/comedi/drivers/ni_labpc.c
index ab8f370..897359d7 100644
--- a/drivers/staging/comedi/drivers/ni_labpc.c
+++ b/drivers/staging/comedi/drivers/ni_labpc.c
@@ -1241,7 +1241,9 @@ static int labpc_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
else
channel = CR_CHAN(cmd->chanlist[0]);
/* munge channel bits for differential / scan disabled mode */
- if (labpc_ai_scan_mode(cmd) != MODE_SINGLE_CHAN && aref == AREF_DIFF)
+ if ((labpc_ai_scan_mode(cmd) == MODE_SINGLE_CHAN ||
+ labpc_ai_scan_mode(cmd) == MODE_SINGLE_CHAN_INTERVAL) &&
+ aref == AREF_DIFF)
channel *= 2;
devpriv->command1_bits |= ADC_CHAN_BITS(channel);
devpriv->command1_bits |= thisboard->ai_range_code[range];
@@ -1257,21 +1259,6 @@ static int labpc_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
devpriv->write_byte(devpriv->command1_bits,
dev->iobase + COMMAND1_REG);
}
- /* setup any external triggering/pacing (command4 register) */
- devpriv->command4_bits = 0;
- if (cmd->convert_src != TRIG_EXT)
- devpriv->command4_bits |= EXT_CONVERT_DISABLE_BIT;
- /* XXX should discard first scan when using interval scanning
- * since manual says it is not synced with scan clock */
- if (labpc_use_continuous_mode(cmd) == 0) {
- devpriv->command4_bits |= INTERVAL_SCAN_EN_BIT;
- if (cmd->scan_begin_src == TRIG_EXT)
- devpriv->command4_bits |= EXT_SCAN_EN_BIT;
- }
- /* single-ended/differential */
- if (aref == AREF_DIFF)
- devpriv->command4_bits |= ADC_DIFF_BIT;
- devpriv->write_byte(devpriv->command4_bits, dev->iobase + COMMAND4_REG);
devpriv->write_byte(cmd->chanlist_len,
dev->iobase + INTERVAL_COUNT_REG);
@@ -1349,6 +1336,22 @@ static int labpc_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s)
devpriv->command3_bits &= ~ADC_FNE_INTR_EN_BIT;
devpriv->write_byte(devpriv->command3_bits, dev->iobase + COMMAND3_REG);
+ /* setup any external triggering/pacing (command4 register) */
+ devpriv->command4_bits = 0;
+ if (cmd->convert_src != TRIG_EXT)
+ devpriv->command4_bits |= EXT_CONVERT_DISABLE_BIT;
+ /* XXX should discard first scan when using interval scanning
+ * since manual says it is not synced with scan clock */
+ if (labpc_use_continuous_mode(cmd) == 0) {
+ devpriv->command4_bits |= INTERVAL_SCAN_EN_BIT;
+ if (cmd->scan_begin_src == TRIG_EXT)
+ devpriv->command4_bits |= EXT_SCAN_EN_BIT;
+ }
+ /* single-ended/differential */
+ if (aref == AREF_DIFF)
+ devpriv->command4_bits |= ADC_DIFF_BIT;
+ devpriv->write_byte(devpriv->command4_bits, dev->iobase + COMMAND4_REG);
+
/* startup acquisition */
/* command2 reg */
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 07ab5a3..6246f28 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -355,6 +355,14 @@ static struct config_group *target_fabric_make_mappedlun(
ret = -EINVAL;
goto out;
}
+ if (mapped_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) {
+ pr_err("Mapped LUN: %lu exceeds TRANSPORT_MAX_LUNS_PER_TPG"
+ "-1: %u for Target Portal Group: %u\n", mapped_lun,
+ TRANSPORT_MAX_LUNS_PER_TPG-1,
+ se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg));
+ ret = -EINVAL;
+ goto out;
+ }
lacl = core_dev_init_initiator_node_lun_acl(se_tpg, mapped_lun,
config_item_name(acl_ci), &ret);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 61047fe..e3fac28 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -986,7 +986,7 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
}
_iov = iov + ret;
size = reg->memory_size - addr + reg->guest_phys_addr;
- _iov->iov_len = min((u64)len, size);
+ _iov->iov_len = min((u64)len - s, size);
_iov->iov_base = (void __user *)(unsigned long)
(reg->userspace_addr + addr - reg->guest_phys_addr);
s += size;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b6adf68..31bbdb5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4111,7 +4111,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
/* The max size of hash table is PREALLOC_TB_SIZE */
order = PREALLOC_TB_SIZE - 1;
/* Add the prealloc space to lg */
- rcu_read_lock();
+ spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
pa_inode_list) {
spin_lock(&tmp_pa->pa_lock);
@@ -4135,12 +4135,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
if (!added)
list_add_tail_rcu(&pa->pa_inode_list,
&lg->lg_prealloc_list[order]);
- rcu_read_unlock();
+ spin_unlock(&lg->lg_prealloc_lock);
/* Now trim the list to be not more than 8 elements */
if (lg_prealloc_count > 8) {
ext4_mb_discard_lg_preallocations(sb, lg,
- order, lg_prealloc_count);
+ order, lg_prealloc_count);
return;
}
return ;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f169da4..b7e74b5 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
* cluster groups will be staying in cache for the duration of
* this operation.
*/
- ac->ac_allow_chain_relink = 0;
+ ac->ac_disable_chain_relink = 1;
/* Claim the first region */
status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
@@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
* Do this *after* figuring out how many bits we're taking out
* of our target group.
*/
- if (ac->ac_allow_chain_relink &&
+ if (!ac->ac_disable_chain_relink &&
(prev_group_bh) &&
(ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
status = ocfs2_relink_block_group(handle, alloc_inode,
@@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim;
- ac->ac_allow_chain_relink = 1;
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
@@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
* searching each chain in order. Don't allow chain relinking
* because we only calculate enough journal credits for one
* relink per alloc. */
- ac->ac_allow_chain_relink = 0;
+ ac->ac_disable_chain_relink = 1;
for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
if (i == victim)
continue;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index b8afabf..a36d0aa 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,7 +49,7 @@ struct ocfs2_alloc_context {
/* these are used by the chain search */
u16 ac_chain;
- int ac_allow_chain_relink;
+ int ac_disable_chain_relink;
group_search_t *ac_group_search;
u64 ac_last_group;
diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h
index da64e15..6cdabb4 100644
--- a/include/linux/auto_fs.h
+++ b/include/linux/auto_fs.h
@@ -31,25 +31,16 @@
#define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION
/*
- * Architectures where both 32- and 64-bit binaries can be executed
- * on 64-bit kernels need this. This keeps the structure format
- * uniform, and makes sure the wait_queue_token isn't too big to be
- * passed back down to the kernel.
- *
- * This assumes that on these architectures:
- * mode 32 bit 64 bit
- * -------------------------
- * int 32 bit 32 bit
- * long 32 bit 64 bit
- *
- * If so, 32-bit user-space code should be backwards compatible.
+ * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
+ * back to the kernel via ioctl from userspace. On architectures where 32- and
+ * 64-bit userspace binaries can be executed it's important that the size of
+ * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we
+ * do not break the binary ABI interface by changing the structure size.
*/
-
-#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \
- || defined(__powerpc__) || defined(__s390__)
-typedef unsigned int autofs_wqt_t;
-#else
+#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */
typedef unsigned long autofs_wqt_t;
+#else
+typedef unsigned int autofs_wqt_t;
#endif
/* Packet types */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 9a85412..a6dd995 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -413,6 +413,7 @@ struct quota_module_name {
#define INIT_QUOTA_MODULE_NAMES {\
{QFMT_VFS_OLD, "quota_v1"},\
{QFMT_VFS_V0, "quota_v2"},\
+ {QFMT_VFS_V1, "quota_v2"},\
{0, NULL}}
#else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5039e0a..b5fb60f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2567,7 +2567,16 @@ static inline void thread_group_cputime_init(struct signal_struct *sig)
extern void recalc_sigpending_and_wake(struct task_struct *t);
extern void recalc_sigpending(void);
-extern void signal_wake_up(struct task_struct *t, int resume_stopped);
+extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
+
+static inline void signal_wake_up(struct task_struct *t, bool resume)
+{
+ signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
+}
+static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+{
+ signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
+}
/*
* Wrappers for p->thread_info->cpu access. No-op on UP.
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6cbe033..ea76c9c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2499,8 +2499,16 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk)
dentry = task_cs(tsk)->css.cgroup->dentry;
spin_lock(&cpuset_buffer_lock);
- snprintf(cpuset_name, CPUSET_NAME_LEN,
- dentry ? (const char *)dentry->d_name.name : "/");
+
+ if (!dentry) {
+ strcpy(cpuset_name, "/");
+ } else {
+ spin_lock(&dentry->d_lock);
+ strlcpy(cpuset_name, (const char *)dentry->d_name.name,
+ CPUSET_NAME_LEN);
+ spin_unlock(&dentry->d_lock);
+ }
+
nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
tsk->mems_allowed);
printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 4556182..d2da8ad 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -639,6 +639,13 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
{
struct k_itimer *timr;
+ /*
+ * timer_t could be any type >= int and we want to make sure any
+ * @timer_id outside positive int range fails lookup.
+ */
+ if ((unsigned long long)timer_id > INT_MAX)
+ return NULL;
+
rcu_read_lock();
timr = idr_find(&posix_timers_id, (int)timer_id);
if (timr) {
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 2df1157..40581ee 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -38,6 +38,36 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
child->parent = new_parent;
}
+/* Ensure that nothing can wake it up, even SIGKILL */
+static bool ptrace_freeze_traced(struct task_struct *task)
+{
+ bool ret = false;
+
+ spin_lock_irq(&task->sighand->siglock);
+ if (task_is_traced(task) && !__fatal_signal_pending(task)) {
+ task->state = __TASK_TRACED;
+ ret = true;
+ }
+ spin_unlock_irq(&task->sighand->siglock);
+
+ return ret;
+}
+
+static void ptrace_unfreeze_traced(struct task_struct *task)
+{
+ if (task->state != __TASK_TRACED)
+ return;
+
+ WARN_ON(!task->ptrace || task->parent != current);
+
+ spin_lock_irq(&task->sighand->siglock);
+ if (__fatal_signal_pending(task))
+ wake_up_state(task, __TASK_TRACED);
+ else
+ task->state = TASK_TRACED;
+ spin_unlock_irq(&task->sighand->siglock);
+}
+
/**
* __ptrace_unlink - unlink ptracee and restore its execution state
* @child: ptracee to be unlinked
@@ -92,7 +122,7 @@ void __ptrace_unlink(struct task_struct *child)
* TASK_KILLABLE sleeps.
*/
if (child->group_stop & GROUP_STOP_PENDING || task_is_traced(child))
- signal_wake_up(child, task_is_traced(child));
+ ptrace_signal_wake_up(child, true);
spin_unlock(&child->sighand->siglock);
}
@@ -112,23 +142,29 @@ int ptrace_check_attach(struct task_struct *child, int kill)
* be changed by us so it's not changing right after this.
*/
read_lock(&tasklist_lock);
- if ((child->ptrace & PT_PTRACED) && child->parent == current) {
+ if (child->ptrace && child->parent == current) {
+ WARN_ON(child->state == __TASK_TRACED);
/*
* child->sighand can't be NULL, release_task()
* does ptrace_unlink() before __exit_signal().
*/
- spin_lock_irq(&child->sighand->siglock);
- WARN_ON_ONCE(task_is_stopped(child));
- if (task_is_traced(child) || kill)
+ if (kill || ptrace_freeze_traced(child))
ret = 0;
- spin_unlock_irq(&child->sighand->siglock);
}
read_unlock(&tasklist_lock);
- if (!ret && !kill)
- ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
+ if (!ret && !kill) {
+ if (!wait_task_inactive(child, __TASK_TRACED)) {
+ /*
+ * This can only happen if may_ptrace_stop() fails and
+ * ptrace_stop() changes ->state back to TASK_RUNNING,
+ * so we should not worry about leaking __TASK_TRACED.
+ */
+ WARN_ON(child->state == __TASK_TRACED);
+ ret = -ESRCH;
+ }
+ }
- /* All systems go.. */
return ret;
}
@@ -245,7 +281,7 @@ static int ptrace_attach(struct task_struct *task)
*/
if (task_is_stopped(task)) {
task->group_stop |= GROUP_STOP_PENDING | GROUP_STOP_TRAPPING;
- signal_wake_up(task, 1);
+ signal_wake_up_state(task, __TASK_STOPPED);
wait_trap = true;
}
@@ -777,6 +813,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
goto out_put_task_struct;
ret = arch_ptrace(child, request, addr, data);
+ if (ret || request != PTRACE_DETACH)
+ ptrace_unfreeze_traced(child);
out_put_task_struct:
put_task_struct(child);
@@ -915,8 +953,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
}
ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (!ret)
+ if (!ret) {
ret = compat_arch_ptrace(child, request, addr, data);
+ if (ret || request != PTRACE_DETACH)
+ ptrace_unfreeze_traced(child);
+ }
out_put_task_struct:
put_task_struct(child);
diff --git a/kernel/sched.c b/kernel/sched.c
index e788b66..89a9c34 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2779,7 +2779,8 @@ out:
*/
int wake_up_process(struct task_struct *p)
{
- return try_to_wake_up(p, TASK_ALL, 0);
+ WARN_ON(task_is_stopped_or_traced(p));
+ return try_to_wake_up(p, TASK_NORMAL, 0);
}
EXPORT_SYMBOL(wake_up_process);
diff --git a/kernel/signal.c b/kernel/signal.c
index 43fee1c..51f2e69 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -631,23 +631,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
* No need to set need_resched since signal event passing
* goes through ->blocked
*/
-void signal_wake_up(struct task_struct *t, int resume)
+void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
- unsigned int mask;
-
set_tsk_thread_flag(t, TIF_SIGPENDING);
-
/*
- * For SIGKILL, we want to wake it up in the stopped/traced/killable
+ * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
* case. We don't check t->state here because there is a race with it
* executing another processor and just now entering stopped state.
* By using wake_up_state, we ensure the process will wake up and
* handle its death signal.
*/
- mask = TASK_INTERRUPTIBLE;
- if (resume)
- mask |= TASK_WAKEKILL;
- if (!wake_up_state(t, mask))
+ if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
kick_process(t);
}
@@ -1675,6 +1669,10 @@ static inline int may_ptrace_stop(void)
* If SIGKILL was already sent before the caller unlocked
* ->siglock we must see ->core_state != NULL. Otherwise it
* is safe to enter schedule().
+ *
+ * This is almost outdated, a task with the pending SIGKILL can't
+ * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
+ * after SIGKILL was already dequeued.
*/
if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
@@ -1806,6 +1804,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
if (gstop_done)
do_notify_parent_cldstop(current, false, why);
+ /* tasklist protects us from ptrace_freeze_traced() */
__set_current_state(TASK_RUNNING);
if (clear_code)
current->exit_code = 0;
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index e055e8b..17c20c7 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1194,9 +1194,10 @@ static ssize_t bin_dn_node_address(struct file *file,
/* Convert the decnet address to binary */
result = -EIO;
- nodep = strchr(buf, '.') + 1;
+ nodep = strchr(buf, '.');
if (!nodep)
goto out;
+ ++nodep;
area = simple_strtoul(buf, NULL, 10);
node = simple_strtoul(nodep, NULL, 10);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e96eee3..86fd417 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3432,37 +3432,51 @@ static void ftrace_init_module(struct module *mod,
ftrace_process_locs(mod, start, end);
}
-static int ftrace_module_notify(struct notifier_block *self,
- unsigned long val, void *data)
+static int ftrace_module_notify_enter(struct notifier_block *self,
+ unsigned long val, void *data)
{
struct module *mod = data;
- switch (val) {
- case MODULE_STATE_COMING:
+ if (val == MODULE_STATE_COMING)
ftrace_init_module(mod, mod->ftrace_callsites,
mod->ftrace_callsites +
mod->num_ftrace_callsites);
- break;
- case MODULE_STATE_GOING:
+ return 0;
+}
+
+static int ftrace_module_notify_exit(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+
+ if (val == MODULE_STATE_GOING)
ftrace_release_mod(mod);
- break;
- }
return 0;
}
#else
-static int ftrace_module_notify(struct notifier_block *self,
- unsigned long val, void *data)
+static int ftrace_module_notify_enter(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ return 0;
+}
+static int ftrace_module_notify_exit(struct notifier_block *self,
+ unsigned long val, void *data)
{
return 0;
}
#endif /* CONFIG_MODULES */
-struct notifier_block ftrace_module_nb = {
- .notifier_call = ftrace_module_notify,
+struct notifier_block ftrace_module_enter_nb = {
+ .notifier_call = ftrace_module_notify_enter,
.priority = INT_MAX, /* Run before anything that can use kprobes */
};
+struct notifier_block ftrace_module_exit_nb = {
+ .notifier_call = ftrace_module_notify_exit,
+ .priority = INT_MIN, /* Run after anything that can remove kprobes */
+};
+
extern unsigned long __start_mcount_loc[];
extern unsigned long __stop_mcount_loc[];
@@ -3494,9 +3508,13 @@ void __init ftrace_init(void)
__start_mcount_loc,
__stop_mcount_loc);
- ret = register_module_notifier(&ftrace_module_nb);
+ ret = register_module_notifier(&ftrace_module_enter_nb);
+ if (ret)
+ pr_warning("Failed to register trace ftrace module enter notifier\n");
+
+ ret = register_module_notifier(&ftrace_module_exit_nb);
if (ret)
- pr_warning("Failed to register trace ftrace module notifier\n");
+ pr_warning("Failed to register trace ftrace module exit notifier\n");
set_ftrace_early_filters();
diff --git a/lib/idr.c b/lib/idr.c
index e15502e..b0540c6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -621,7 +621,14 @@ void *idr_get_next(struct idr *idp, int *nextidp)
return p;
}
- id += 1 << n;
+ /*
+ * Proceed to the next layer at the current level. Unlike
+ * idr_for_each(), @id isn't guaranteed to be aligned to
+ * layer boundary at this point and adding 1 << n may
+ * incorrectly skip IDs. Make sure we jump to the
+ * beginning of the next layer using round_up().
+ */
+ id = round_up(id + 1, 1 << n);
while (n < fls(id)) {
n += IDR_BITS;
p = *--paa;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 05dbccf..e47876c 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -824,7 +824,6 @@ static void svc_age_temp_xprts(unsigned long closure)
struct svc_serv *serv = (struct svc_serv *)closure;
struct svc_xprt *xprt;
struct list_head *le, *next;
- LIST_HEAD(to_be_aged);
dprintk("svc_age_temp_xprts\n");
@@ -845,25 +844,15 @@ static void svc_age_temp_xprts(unsigned long closure)
if (atomic_read(&xprt->xpt_ref.refcount) > 1 ||
test_bit(XPT_BUSY, &xprt->xpt_flags))
continue;
- svc_xprt_get(xprt);
- list_move(le, &to_be_aged);
+ list_del_init(le);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
set_bit(XPT_DETACHED, &xprt->xpt_flags);
- }
- spin_unlock_bh(&serv->sv_lock);
-
- while (!list_empty(&to_be_aged)) {
- le = to_be_aged.next;
- /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */
- list_del_init(le);
- xprt = list_entry(le, struct svc_xprt, xpt_list);
-
dprintk("queuing xprt %p for closing\n", xprt);
/* a thread will dequeue and close it soon */
svc_xprt_enqueue(xprt);
- svc_xprt_put(xprt);
}
+ spin_unlock_bh(&serv->sv_lock);
mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
}