diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 12 | ||||
-rw-r--r-- | kernel/audit_tree.c | 28 | ||||
-rw-r--r-- | kernel/auditfilter.c | 15 | ||||
-rw-r--r-- | kernel/auditsc.c | 33 | ||||
-rw-r--r-- | kernel/cgroup.c | 153 | ||||
-rw-r--r-- | kernel/exit.c | 12 | ||||
-rw-r--r-- | kernel/fork.c | 18 | ||||
-rw-r--r-- | kernel/futex.c | 52 | ||||
-rw-r--r-- | kernel/futex_compat.c | 11 | ||||
-rw-r--r-- | kernel/hrtimer.c | 48 | ||||
-rw-r--r-- | kernel/irq/chip.c | 20 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 3 | ||||
-rw-r--r-- | kernel/kmod.c | 5 | ||||
-rw-r--r-- | kernel/marker.c | 4 | ||||
-rw-r--r-- | kernel/module.c | 7 | ||||
-rw-r--r-- | kernel/posix-timers.c | 8 | ||||
-rw-r--r-- | kernel/power/disk.c | 4 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 42 | ||||
-rw-r--r-- | kernel/sched.c | 5 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 4 |
20 files changed, 291 insertions, 193 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index c8555b1..2eeea9a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1312,26 +1312,26 @@ void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) /* This is a helper-function to print the escaped d_path */ void audit_log_d_path(struct audit_buffer *ab, const char *prefix, - struct dentry *dentry, struct vfsmount *vfsmnt) + struct path *path) { - char *p, *path; + char *p, *pathname; if (prefix) audit_log_format(ab, " %s", prefix); /* We will allow 11 spaces for ' (deleted)' to be appended */ - path = kmalloc(PATH_MAX+11, ab->gfp_mask); - if (!path) { + pathname = kmalloc(PATH_MAX+11, ab->gfp_mask); + if (!pathname) { audit_log_format(ab, "<no memory>"); return; } - p = d_path(dentry, vfsmnt, path, PATH_MAX+11); + p = d_path(path, pathname, PATH_MAX+11); if (IS_ERR(p)) { /* Should never happen since we send PATH_MAX */ /* FIXME: can we save some information here? */ audit_log_format(ab, "<too long>"); } else audit_log_untrustedstring(ab, p); - kfree(path); + kfree(pathname); } /** diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f4fcf58..9ef5e0a 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -549,8 +549,8 @@ void audit_trim_trees(void) if (err) goto skip_it; - root_mnt = collect_mounts(nd.mnt, nd.dentry); - path_release(&nd); + root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry); + path_put(&nd.path); if (!root_mnt) goto skip_it; @@ -583,17 +583,17 @@ skip_it: static int is_under(struct vfsmount *mnt, struct dentry *dentry, struct nameidata *nd) { - if (mnt != nd->mnt) { + if (mnt != nd->path.mnt) { for (;;) { if (mnt->mnt_parent == mnt) return 0; - if (mnt->mnt_parent == nd->mnt) + if (mnt->mnt_parent == nd->path.mnt) break; mnt = mnt->mnt_parent; } dentry = mnt->mnt_mountpoint; } - return is_subdir(dentry, nd->dentry); + return is_subdir(dentry, nd->path.dentry); } int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) @@ -641,8 +641,8 @@ int audit_add_tree_rule(struct audit_krule *rule) err = path_lookup(tree->pathname, 0, &nd); if (err) goto Err; - mnt = collect_mounts(nd.mnt, nd.dentry); - path_release(&nd); + mnt = collect_mounts(nd.path.mnt, nd.path.dentry); + path_put(&nd.path); if (!mnt) { err = -ENOMEM; goto Err; @@ -701,8 +701,8 @@ int audit_tag_tree(char *old, char *new) err = path_lookup(new, 0, &nd); if (err) return err; - tagged = collect_mounts(nd.mnt, nd.dentry); - path_release(&nd); + tagged = collect_mounts(nd.path.mnt, nd.path.dentry); + path_put(&nd.path); if (!tagged) return -ENOMEM; @@ -711,9 +711,9 @@ int audit_tag_tree(char *old, char *new) drop_collected_mounts(tagged); return err; } - mnt = mntget(nd.mnt); - dentry = dget(nd.dentry); - path_release(&nd); + mnt = mntget(nd.path.mnt); + dentry = dget(nd.path.dentry); + path_put(&nd.path); if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) follow_up(&mnt, &dentry); @@ -744,13 +744,13 @@ int audit_tag_tree(char *old, char *new) spin_lock(&vfsmount_lock); if (!is_under(mnt, dentry, &nd)) { spin_unlock(&vfsmount_lock); - path_release(&nd); + path_put(&nd.path); put_tree(tree); mutex_lock(&audit_filter_mutex); continue; } spin_unlock(&vfsmount_lock); - path_release(&nd); + path_put(&nd.path); list_for_each_entry(p, &list, mnt_list) { failed = tag_chunk(p->mnt_root->d_inode, tree); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 6f19fd4..2f2914b 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -169,8 +169,8 @@ static struct audit_parent *audit_init_parent(struct nameidata *ndp) inotify_init_watch(&parent->wdata); /* grab a ref so inotify watch hangs around until we take audit_filter_mutex */ get_inotify_watch(&parent->wdata); - wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode, - AUDIT_IN_WATCH); + wd = inotify_add_watch(audit_ih, &parent->wdata, + ndp->path.dentry->d_inode, AUDIT_IN_WATCH); if (wd < 0) { audit_free_parent(&parent->wdata); return ERR_PTR(wd); @@ -1161,11 +1161,11 @@ static int audit_get_nd(char *path, struct nameidata **ndp, static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) { if (ndp) { - path_release(ndp); + path_put(&ndp->path); kfree(ndp); } if (ndw) { - path_release(ndw); + path_put(&ndw->path); kfree(ndw); } } @@ -1214,8 +1214,8 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp, /* update watch filter fields */ if (ndw) { - watch->dev = ndw->dentry->d_inode->i_sb->s_dev; - watch->ino = ndw->dentry->d_inode->i_ino; + watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev; + watch->ino = ndw->path.dentry->d_inode->i_ino; } /* The audit_filter_mutex must not be held during inotify calls because @@ -1225,7 +1225,8 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp, */ mutex_unlock(&audit_filter_mutex); - if (inotify_find_watch(audit_ih, ndp->dentry->d_inode, &i_watch) < 0) { + if (inotify_find_watch(audit_ih, ndp->path.dentry->d_inode, + &i_watch) < 0) { parent = audit_init_parent(ndp); if (IS_ERR(parent)) { /* caller expects mutex locked */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1c06ecf..2087d6d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -208,8 +208,7 @@ struct audit_context { int name_count; struct audit_names names[AUDIT_NAMES]; char * filterkey; /* key for rule that triggered record */ - struct dentry * pwd; - struct vfsmount * pwdmnt; + struct path pwd; struct audit_context *previous; /* For nested syscalls */ struct audit_aux_data *aux; struct audit_aux_data *aux_pids; @@ -786,12 +785,9 @@ static inline void audit_free_names(struct audit_context *context) __putname(context->names[i].name); } context->name_count = 0; - if (context->pwd) - dput(context->pwd); - if (context->pwdmnt) - mntput(context->pwdmnt); - context->pwd = NULL; - context->pwdmnt = NULL; + path_put(&context->pwd); + context->pwd.dentry = NULL; + context->pwd.mnt = NULL; } static inline void audit_free_aux(struct audit_context *context) @@ -930,8 +926,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) { audit_log_d_path(ab, "exe=", - vma->vm_file->f_path.dentry, - vma->vm_file->f_path.mnt); + &vma->vm_file->f_path); break; } vma = vma->vm_next; @@ -1005,9 +1000,10 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (unlikely((len = -1) || len > MAX_ARG_STRLEN - 1)) { + if (unlikely((len == -1) || len > MAX_ARG_STRLEN - 1)) { WARN_ON(1); send_sig(SIGKILL, current, 0); + return -1; } /* walk the whole argument looking for non-ascii chars */ @@ -1025,6 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, if (ret) { WARN_ON(1); send_sig(SIGKILL, current, 0); + return -1; } buf[to_send] = '\0'; has_cntl = audit_string_contains_control(buf, to_send); @@ -1088,6 +1085,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, if (ret) { WARN_ON(1); send_sig(SIGKILL, current, 0); + return -1; } buf[to_send] = '\0'; @@ -1341,10 +1339,10 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->target_sid, context->target_comm)) call_panic = 1; - if (context->pwd && context->pwdmnt) { + if (context->pwd.dentry && context->pwd.mnt) { ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); if (ab) { - audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); + audit_log_d_path(ab, "cwd=", &context->pwd); audit_log_end(ab); } } @@ -1367,8 +1365,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts case 0: /* name was specified as a relative path and the * directory component is the cwd */ - audit_log_d_path(ab, " name=", context->pwd, - context->pwdmnt); + audit_log_d_path(ab, " name=", &context->pwd); break; default: /* log the name's directory component */ @@ -1695,10 +1692,10 @@ void __audit_getname(const char *name) context->names[context->name_count].ino = (unsigned long)-1; context->names[context->name_count].osid = 0; ++context->name_count; - if (!context->pwd) { + if (!context->pwd.dentry) { read_lock(¤t->fs->lock); - context->pwd = dget(current->fs->pwd); - context->pwdmnt = mntget(current->fs->pwdmnt); + context->pwd = current->fs->pwd; + path_get(¤t->fs->pwd); read_unlock(¤t->fs->lock); } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4766bb6..d8abe99 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -113,9 +113,9 @@ static int root_count; #define dummytop (&rootnode.top_cgroup) /* This flag indicates whether tasks in the fork and exit paths should - * take callback_mutex and check for fork/exit handlers to call. This - * avoids us having to do extra work in the fork/exit path if none of the - * subsystems need to be called. + * check for fork/exit handlers to call. This avoids us having to do + * extra work in the fork/exit path if none of the subsystems need to + * be called. */ static int need_forkexit_callback; @@ -307,7 +307,6 @@ static inline void put_css_set_taskexit(struct css_set *cg) * template: location in which to build the desired set of subsystem * state objects for the new cgroup group */ - static struct css_set *find_existing_css_set( struct css_set *oldcg, struct cgroup *cgrp, @@ -320,7 +319,7 @@ static struct css_set *find_existing_css_set( /* Built the set of subsystem state objects that we want to * see in the new css_set */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - if (root->subsys_bits & (1ull << i)) { + if (root->subsys_bits & (1UL << i)) { /* Subsystem is in this hierarchy. So we want * the subsystem state from the new * cgroup */ @@ -354,7 +353,6 @@ static struct css_set *find_existing_css_set( * and chains them on tmp through their cgrp_link_list fields. Returns 0 on * success or a negative error */ - static int allocate_cg_links(int count, struct list_head *tmp) { struct cg_cgroup_link *link; @@ -396,7 +394,6 @@ static void free_cg_links(struct list_head *tmp) * substituted into the appropriate hierarchy. Must be called with * cgroup_mutex held */ - static struct css_set *find_css_set( struct css_set *oldcg, struct cgroup *cgrp) { @@ -473,7 +470,6 @@ static struct css_set *find_css_set( /* Link this cgroup group into the list */ list_add(&res->list, &init_css_set.list); css_set_count++; - INIT_LIST_HEAD(&res->tasks); write_unlock(&css_set_lock); return res; @@ -507,8 +503,8 @@ static struct css_set *find_css_set( * critical pieces of code here. The exception occurs on cgroup_exit(), * when a task in a notify_on_release cgroup exits. Then cgroup_mutex * is taken, and if the cgroup count is zero, a usermode call made - * to /sbin/cgroup_release_agent with the name of the cgroup (path - * relative to the root of cgroup file system) as the argument. + * to the release agent with the name of the cgroup (path relative to + * the root of cgroup file system) as the argument. * * A cgroup can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cgroups is empty. Since all @@ -521,7 +517,7 @@ static struct css_set *find_css_set( * * The need for this exception arises from the action of * cgroup_attach_task(), which overwrites one tasks cgroup pointer with - * another. It does so using cgroup_mutexe, however there are + * another. It does so using cgroup_mutex, however there are * several performance critical places that need to reference * task->cgroup without the expense of grabbing a system global * mutex. Therefore except as noted below, when dereferencing or, as @@ -537,7 +533,6 @@ static struct css_set *find_css_set( * cgroup_lock - lock out any changes to cgroup structures * */ - void cgroup_lock(void) { mutex_lock(&cgroup_mutex); @@ -548,7 +543,6 @@ void cgroup_lock(void) * * Undo the lock taken in a previous cgroup_lock() call. */ - void cgroup_unlock(void) { mutex_unlock(&cgroup_mutex); @@ -590,7 +584,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) * Call subsys's pre_destroy handler. * This is called before css refcnt check. */ - static void cgroup_call_pre_destroy(struct cgroup *cgrp) { struct cgroup_subsys *ss; @@ -600,7 +593,6 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp) return; } - static void cgroup_diput(struct dentry *dentry, struct inode *inode) { /* is dentry a directory ? if so, kfree() associated cgroup */ @@ -696,7 +688,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, added_bits = final_bits & ~root->actual_subsys_bits; /* Check that any added subsystems are currently free */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - unsigned long long bit = 1ull << i; + unsigned long bit = 1UL << i; struct cgroup_subsys *ss = subsys[i]; if (!(bit & added_bits)) continue; @@ -927,7 +919,6 @@ static int cgroup_get_rootdir(struct super_block *sb) if (!inode) return -ENOMEM; - inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_op = &cgroup_dir_inode_operations; /* directories start off with i_nlink == 2 (for "." entry) */ @@ -961,8 +952,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type, } root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) + if (!root) { + if (opts.release_agent) + kfree(opts.release_agent); return -ENOMEM; + } init_cgroup_root(root); root->subsys_bits = opts.subsys_bits; @@ -1129,8 +1123,13 @@ static inline struct cftype *__d_cft(struct dentry *dentry) return dentry->d_fsdata; } -/* - * Called with cgroup_mutex held. Writes path of cgroup into buf. +/** + * cgroup_path - generate the path of a cgroup + * @cgrp: the cgroup in question + * @buf: the buffer to write the path into + * @buflen: the length of the buffer + * + * Called with cgroup_mutex held. Writes path of cgroup into buf. * Returns 0 on success, -errno on error. */ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) @@ -1188,11 +1187,13 @@ static void get_first_subsys(const struct cgroup *cgrp, *subsys_id = test_ss->subsys_id; } -/* - * Attach task 'tsk' to cgroup 'cgrp' +/** + * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' + * @cgrp: the cgroup the task is attaching to + * @tsk: the task to be attached * - * Call holding cgroup_mutex. May take task_lock of - * the task 'pid' during call. + * Call holding cgroup_mutex. May take task_lock of + * the task 'tsk' during call. */ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { @@ -1293,7 +1294,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) } /* The various types of files and directories in a cgroup file system */ - enum cgroup_filetype { FILE_ROOT, FILE_DIR, @@ -1584,12 +1584,11 @@ static int cgroup_create_file(struct dentry *dentry, int mode, } /* - * cgroup_create_dir - create a directory for an object. - * cgrp: the cgroup we create the directory for. - * It must have a valid ->parent field - * And we are going to fill its ->dentry field. - * dentry: dentry of the new cgroup - * mode: mode to set on new directory. + * cgroup_create_dir - create a directory for an object. + * @cgrp: the cgroup we create the directory for. It must have a valid + * ->parent field. And we are going to fill its ->dentry field. + * @dentry: dentry of the new cgroup + * @mode: mode to set on new directory. */ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, int mode) @@ -1651,8 +1650,12 @@ int cgroup_add_files(struct cgroup *cgrp, return 0; } -/* Count the number of tasks in a cgroup. */ - +/** + * cgroup_task_count - count the number of tasks in a cgroup. + * @cgrp: the cgroup in question + * + * Return the number of tasks in the cgroup. + */ int cgroup_task_count(const struct cgroup *cgrp) { int count = 0; @@ -1962,12 +1965,13 @@ static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) } /** - * Build and fill cgroupstats so that taskstats can export it to user - * space. - * + * cgroupstats_build - build and fill cgroupstats * @stats: cgroupstats to fill information into * @dentry: A dentry entry belonging to the cgroup for which stats have * been requested. + * + * Build and fill cgroupstats so that taskstats can export it to user + * space. */ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { @@ -2199,14 +2203,13 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, } /* - * cgroup_create - create a cgroup - * parent: cgroup that will be parent of the new cgroup. - * name: name of the new cgroup. Will be strcpy'ed. - * mode: mode to set on new inode + * cgroup_create - create a cgroup + * @parent: cgroup that will be parent of the new cgroup + * @dentry: dentry of the new cgroup + * @mode: mode to set on new inode * - * Must be called with the mutex on the parent inode held + * Must be called with the mutex on the parent inode held */ - static long cgroup_create(struct cgroup *parent, struct dentry *dentry, int mode) { @@ -2349,13 +2352,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) parent = cgrp->parent; root = cgrp->root; sb = root->sb; + /* - * Call pre_destroy handlers of subsys + * Call pre_destroy handlers of subsys. Notify subsystems + * that rmdir() request comes. */ cgroup_call_pre_destroy(cgrp); - /* - * Notify subsyses that rmdir() request comes. - */ if (cgroup_has_css_refs(cgrp)) { mutex_unlock(&cgroup_mutex); @@ -2431,8 +2433,10 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) } /** - * cgroup_init_early - initialize cgroups at system boot, and - * initialize any subsystems that request early init. + * cgroup_init_early - cgroup initialization at system boot + * + * Initialize cgroups at system boot, and initialize any + * subsystems that request early init. */ int __init cgroup_init_early(void) { @@ -2474,8 +2478,10 @@ int __init cgroup_init_early(void) } /** - * cgroup_init - register cgroup filesystem and /proc file, and - * initialize any subsystems that didn't request early init. + * cgroup_init - cgroup initialization + * + * Register cgroup filesystem and /proc file, and initialize + * any subsystems that didn't request early init. */ int __init cgroup_init(void) { @@ -2618,7 +2624,7 @@ static struct file_operations proc_cgroupstats_operations = { /** * cgroup_fork - attach newly forked task to its parents cgroup. - * @tsk: pointer to task_struct of forking parent process. + * @child: pointer to task_struct of forking parent process. * * Description: A task inherits its parent's cgroup at fork(). * @@ -2642,9 +2648,12 @@ void cgroup_fork(struct task_struct *child) } /** - * cgroup_fork_callbacks - called on a new task very soon before - * adding it to the tasklist. No need to take any locks since no-one - * can be operating on this task + * cgroup_fork_callbacks - run fork callbacks + * @child: the new task + * + * Called on a new task very soon before adding it to the + * tasklist. No need to take any locks since no-one can + * be operating on this task. */ void cgroup_fork_callbacks(struct task_struct *child) { @@ -2659,11 +2668,14 @@ void cgroup_fork_callbacks(struct task_struct *child) } /** - * cgroup_post_fork - called on a new task after adding it to the - * task list. Adds the task to the list running through its css_set - * if necessary. Has to be after the task is visible on the task list - * in case we race with the first call to cgroup_iter_start() - to - * guarantee that the new task ends up on its list. */ + * cgroup_post_fork - called on a new task after adding it to the task list + * @child: the task in question + * + * Adds the task to the list running through its css_set if necessary. + * Has to be after the task is visible on the task list in case we race + * with the first call to cgroup_iter_start() - to guarantee that the + * new task ends up on its list. + */ void cgroup_post_fork(struct task_struct *child) { if (use_task_css_set_links) { @@ -2676,6 +2688,7 @@ void cgroup_post_fork(struct task_struct *child) /** * cgroup_exit - detach cgroup from exiting task * @tsk: pointer to task_struct of exiting process + * @run_callback: run exit callbacks? * * Description: Detach cgroup from @tsk and release it. * @@ -2706,7 +2719,6 @@ void cgroup_post_fork(struct task_struct *child) * top_cgroup isn't going away, and either task has PF_EXITING set, * which wards off any cgroup_attach_task() attempts, or task is a failed * fork, never visible to cgroup_attach_task. - * */ void cgroup_exit(struct task_struct *tsk, int run_callbacks) { @@ -2743,9 +2755,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) } /** - * cgroup_clone - duplicate the current cgroup in the hierarchy - * that the given subsystem is attached to, and move this task into - * the new child + * cgroup_clone - clone the cgroup the given subsystem is attached to + * @tsk: the task to be moved + * @subsys: the given subsystem + * + * Duplicate the current cgroup in the hierarchy that the given + * subsystem is attached to, and move this task into the new + * child. */ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) { @@ -2858,9 +2874,12 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) return ret; } -/* - * See if "cgrp" is a descendant of the current task's cgroup in - * the appropriate hierarchy +/** + * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp + * @cgrp: the cgroup in question + * + * See if @cgrp is a descendant of the current task's cgroup in + * the appropriate hierarchy. * * If we are sending in dummytop, then presumably we are creating * the top cgroup in the subsystem. @@ -2939,9 +2958,7 @@ void __css_put(struct cgroup_subsys_state *css) * release agent task. We don't bother to wait because the caller of * this routine has no use for the exit status of the release agent * task, so no sense holding our caller up for that. - * */ - static void cgroup_release_agent(struct work_struct *work) { BUG_ON(work != &release_agent_work); diff --git a/kernel/exit.c b/kernel/exit.c index 3b893e7..506a957 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -512,14 +512,10 @@ static void __put_fs_struct(struct fs_struct *fs) { /* No need to hold fs->lock if we are killing it */ if (atomic_dec_and_test(&fs->count)) { - dput(fs->root); - mntput(fs->rootmnt); - dput(fs->pwd); - mntput(fs->pwdmnt); - if (fs->altroot) { - dput(fs->altroot); - mntput(fs->altrootmnt); - } + path_put(&fs->root); + path_put(&fs->pwd); + if (fs->altroot.dentry) + path_put(&fs->altroot); kmem_cache_free(fs_cachep, fs); } } diff --git a/kernel/fork.c b/kernel/fork.c index 4363a4e..dd249c3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -600,16 +600,16 @@ static struct fs_struct *__copy_fs_struct(struct fs_struct *old) rwlock_init(&fs->lock); fs->umask = old->umask; read_lock(&old->lock); - fs->rootmnt = mntget(old->rootmnt); - fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); - if (old->altroot) { - fs->altrootmnt = mntget(old->altrootmnt); - fs->altroot = dget(old->altroot); + fs->root = old->root; + path_get(&old->root); + fs->pwd = old->pwd; + path_get(&old->pwd); + if (old->altroot.dentry) { + fs->altroot = old->altroot; + path_get(&old->altroot); } else { - fs->altrootmnt = NULL; - fs->altroot = NULL; + fs->altroot.mnt = NULL; + fs->altroot.dentry = NULL; } read_unlock(&old->lock); } diff --git a/kernel/futex.c b/kernel/futex.c index a6baaec..06968cd 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -60,6 +60,8 @@ #include "rtmutex_common.h" +int __read_mostly futex_cmpxchg_enabled; + #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) /* @@ -469,6 +471,8 @@ void exit_pi_state_list(struct task_struct *curr) struct futex_hash_bucket *hb; union futex_key key; + if (!futex_cmpxchg_enabled) + return; /* * We are a ZOMBIE and nobody can enqueue itself on * pi_state_list anymore, but we have to be careful @@ -1870,6 +1874,8 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len) { + if (!futex_cmpxchg_enabled) + return -ENOSYS; /* * The kernel knows only one size for now: */ @@ -1894,6 +1900,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, struct robust_list_head __user *head; unsigned long ret; + if (!futex_cmpxchg_enabled) + return -ENOSYS; + if (!pid) head = current->robust_list; else { @@ -1997,6 +2006,9 @@ void exit_robust_list(struct task_struct *curr) unsigned long futex_offset; int rc; + if (!futex_cmpxchg_enabled) + return; + /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): @@ -2051,7 +2063,7 @@ void exit_robust_list(struct task_struct *curr) long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { - int ret; + int ret = -ENOSYS; int cmd = op & FUTEX_CMD_MASK; struct rw_semaphore *fshared = NULL; @@ -2083,13 +2095,16 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); break; case FUTEX_LOCK_PI: - ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); + if (futex_cmpxchg_enabled) + ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); break; case FUTEX_UNLOCK_PI: - ret = futex_unlock_pi(uaddr, fshared); + if (futex_cmpxchg_enabled) + ret = futex_unlock_pi(uaddr, fshared); break; case FUTEX_TRYLOCK_PI: - ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); + if (futex_cmpxchg_enabled) + ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); break; default: ret = -ENOSYS; @@ -2116,7 +2131,7 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, t = timespec_to_ktime(ts); if (cmd == FUTEX_WAIT) - t = ktime_add(ktime_get(), t); + t = ktime_add_safe(ktime_get(), t); tp = &t; } /* @@ -2145,8 +2160,29 @@ static struct file_system_type futex_fs_type = { static int __init init(void) { - int i = register_filesystem(&futex_fs_type); + u32 curval; + int i; + + /* + * This will fail and we want it. Some arch implementations do + * runtime detection of the futex_atomic_cmpxchg_inatomic() + * functionality. We want to know that before we call in any + * of the complex code paths. Also we want to prevent + * registration of robust lists in that case. NULL is + * guaranteed to fault and we get -EFAULT on functional + * implementation, the non functional ones will return + * -ENOSYS. + */ + curval = cmpxchg_futex_value_locked(NULL, 0, 0); + if (curval == -EFAULT) + futex_cmpxchg_enabled = 1; + for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { + plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); + spin_lock_init(&futex_queues[i].lock); + } + + i = register_filesystem(&futex_fs_type); if (i) return i; @@ -2156,10 +2192,6 @@ static int __init init(void) return PTR_ERR(futex_mnt); } - for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { - plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock); - spin_lock_init(&futex_queues[i].lock); - } return 0; } __initcall(init); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 133d558..ff90f04 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -54,6 +54,9 @@ void compat_exit_robust_list(struct task_struct *curr) compat_long_t futex_offset; int rc; + if (!futex_cmpxchg_enabled) + return; + /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): @@ -115,6 +118,9 @@ asmlinkage long compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_size_t len) { + if (!futex_cmpxchg_enabled) + return -ENOSYS; + if (unlikely(len != sizeof(*head))) return -EINVAL; @@ -130,6 +136,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, struct compat_robust_list_head __user *head; unsigned long ret; + if (!futex_cmpxchg_enabled) + return -ENOSYS; + if (!pid) head = current->compat_robust_list; else { @@ -176,7 +185,7 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, t = timespec_to_ktime(ts); if (cmd == FUTEX_WAIT) - t = ktime_add(ktime_get(), t); + t = ktime_add_safe(ktime_get(), t); tp = &t; } if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3f4a57c..98bee01 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -326,6 +326,23 @@ u64 ktime_divns(const ktime_t kt, s64 div) #endif /* BITS_PER_LONG >= 64 */ /* + * Add two ktime values and do a safety check for overflow: + */ +ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) +{ + ktime_t res = ktime_add(lhs, rhs); + + /* + * We use KTIME_SEC_MAX here, the maximum timeout which we can + * return to user space in a timespec: + */ + if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64) + res = ktime_set(KTIME_SEC_MAX, 0); + + return res; +} + +/* * Check, whether the timer is on the callback pending list */ static inline int hrtimer_cb_pending(const struct hrtimer *timer) @@ -425,6 +442,8 @@ static int hrtimer_reprogram(struct hrtimer *timer, ktime_t expires = ktime_sub(timer->expires, base->offset); int res; + WARN_ON_ONCE(timer->expires.tv64 < 0); + /* * When the callback is running, we do not reprogram the clock event * device. The timer callback is either running on a different CPU or @@ -435,6 +454,15 @@ static int hrtimer_reprogram(struct hrtimer *timer, if (hrtimer_callback_running(timer)) return 0; + /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Nothing wrong + * about that, just avoid to call into the tick code, which + * has now objections against negative expiry values. + */ + if (expires.tv64 < 0) + return -ETIME; + if (expires.tv64 >= expires_next->tv64) return 0; @@ -682,13 +710,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) */ orun++; } - timer->expires = ktime_add(timer->expires, interval); - /* - * Make sure, that the result did not wrap with a very large - * interval. - */ - if (timer->expires.tv64 < 0) - timer->expires = ktime_set(KTIME_SEC_MAX, 0); + timer->expires = ktime_add_safe(timer->expires, interval); return orun; } @@ -839,7 +861,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) new_base = switch_hrtimer_base(timer, base); if (mode == HRTIMER_MODE_REL) { - tim = ktime_add(tim, new_base->get_time()); + tim = ktime_add_safe(tim, new_base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures * to signal that they simply return xtime in @@ -848,16 +870,8 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) * timeouts. This will go away with the GTOD framework. */ #ifdef CONFIG_TIME_LOW_RES - tim = ktime_add(tim, base->resolution); + tim = ktime_add_safe(tim, base->resolution); #endif - /* - * Careful here: User space might have asked for a - * very long sleep, so the add above might result in a - * negative number, which enqueues the timer in front - * of the queue. - */ - if (tim.tv64 < 0) - tim.tv64 = KTIME_MAX; } timer->expires = tim; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index cc54c62..fdb3fbe 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -246,6 +246,17 @@ static unsigned int default_startup(unsigned int irq) } /* + * default shutdown function + */ +static void default_shutdown(unsigned int irq) +{ + struct irq_desc *desc = irq_desc + irq; + + desc->chip->mask(irq); + desc->status |= IRQ_MASKED; +} + +/* * Fixup enable/disable function pointers */ void irq_chip_set_defaults(struct irq_chip *chip) @@ -256,8 +267,15 @@ void irq_chip_set_defaults(struct irq_chip *chip) chip->disable = default_disable; if (!chip->startup) chip->startup = default_startup; + /* + * We use chip->disable, when the user provided its own. When + * we have default_disable set for chip->disable, then we need + * to use default_shutdown, otherwise the irq line is not + * disabled on free_irq(): + */ if (!chip->shutdown) - chip->shutdown = chip->disable; + chip->shutdown = chip->disable != default_disable ? + chip->disable : default_shutdown; if (!chip->name) chip->name = chip->typename; if (!chip->end) diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index a6b2bc8..088dabb 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -6,6 +6,7 @@ * This file contains spurious interrupt handling. */ +#include <linux/jiffies.h> #include <linux/irq.h> #include <linux/module.h> #include <linux/kallsyms.h> @@ -179,7 +180,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, * otherwise the couter becomes a doomsday timer for otherwise * working systems */ - if (jiffies - desc->last_unhandled > HZ/10) + if (time_after(jiffies, desc->last_unhandled + HZ/10)) desc->irqs_unhandled = 1; else desc->irqs_unhandled++; diff --git a/kernel/kmod.c b/kernel/kmod.c index bb7df2a..22be3ff 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -173,10 +173,7 @@ static int ____call_usermodehelper(void *data) */ set_user_nice(current, 0); - retval = -EPERM; - if (current->fs->root) - retval = kernel_execve(sub_info->path, - sub_info->argv, sub_info->envp); + retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); /* Exec failed? */ sub_info->retval = retval; diff --git a/kernel/marker.c b/kernel/marker.c index c4c2cd8..50effc0 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -61,8 +61,8 @@ struct marker_entry { int refcount; /* Number of times armed. 0 if disarmed. */ struct rcu_head rcu; void *oldptr; - char rcu_pending:1; - char ptype:1; + unsigned char rcu_pending:1; + unsigned char ptype:1; char name[0]; /* Contains name'\0'format'\0' */ }; diff --git a/kernel/module.c b/kernel/module.c index 92595ba..901cd6ac 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -987,12 +987,11 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs, return ret; } - /* * /sys/module/foo/sections stuff * J. Corbet <corbet@lwn.net> */ -#ifdef CONFIG_KALLSYMS +#if defined(CONFIG_KALLSYMS) && defined(CONFIG_SYSFS) static ssize_t module_sect_show(struct module_attribute *mattr, struct module *mod, char *buf) { @@ -1188,7 +1187,7 @@ static inline void add_notes_attrs(struct module *mod, unsigned int nsect, static inline void remove_notes_attrs(struct module *mod) { } -#endif /* CONFIG_KALLSYMS */ +#endif #ifdef CONFIG_SYSFS int module_add_modinfo_attrs(struct module *mod) @@ -1231,9 +1230,7 @@ void module_remove_modinfo_attrs(struct module *mod) } kfree(mod->modinfo_attrs); } -#endif -#ifdef CONFIG_SYSFS int mod_sysfs_init(struct module *mod) { int err; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 022c9c3..a9b0420 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -767,9 +767,11 @@ common_timer_set(struct k_itimer *timr, int flags, /* SIGEV_NONE timers are not queued ! See common_timer_get */ if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { /* Setup correct expiry time for relative timers */ - if (mode == HRTIMER_MODE_REL) - timer->expires = ktime_add(timer->expires, - timer->base->get_time()); + if (mode == HRTIMER_MODE_REL) { + timer->expires = + ktime_add_safe(timer->expires, + timer->base->get_time()); + } return 0; } diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 859a8e5..14a656c 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -391,7 +391,7 @@ int hibernation_platform_enter(void) goto Close; suspend_console(); - error = device_suspend(PMSG_SUSPEND); + error = device_suspend(PMSG_HIBERNATE); if (error) goto Resume_console; @@ -404,7 +404,7 @@ int hibernation_platform_enter(void) goto Finish; local_irq_disable(); - error = device_power_down(PMSG_SUSPEND); + error = device_power_down(PMSG_HIBERNATE); if (!error) { hibernation_ops->enter(); /* We should never get here */ diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 95250d7..72a020c 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -875,8 +875,8 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } #endif /* CONFIG_HIGHMEM */ /** - * saveable - Determine whether a non-highmem page should be included in - * the suspend image. + * saveable_page - Determine whether a non-highmem page should be included + * in the suspend image. * * We should save the page if it isn't Nosave, and is not in the range * of pages statically defined as 'unsaveable', and it isn't a part of @@ -897,7 +897,8 @@ static struct page *saveable_page(unsigned long pfn) if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) return NULL; - if (PageReserved(page) && pfn_is_nosave(pfn)) + if (PageReserved(page) + && (!kernel_page_present(page) || pfn_is_nosave(pfn))) return NULL; return page; @@ -938,6 +939,25 @@ static inline void do_copy_page(long *dst, long *src) *dst++ = *src++; } + +/** + * safe_copy_page - check if the page we are going to copy is marked as + * present in the kernel page tables (this always is the case if + * CONFIG_DEBUG_PAGEALLOC is not set and in that case + * kernel_page_present() always returns 'true'). + */ +static void safe_copy_page(void *dst, struct page *s_page) +{ + if (kernel_page_present(s_page)) { + do_copy_page(dst, page_address(s_page)); + } else { + kernel_map_pages(s_page, 1, 1); + do_copy_page(dst, page_address(s_page)); + kernel_map_pages(s_page, 1, 0); + } +} + + #ifdef CONFIG_HIGHMEM static inline struct page * page_is_saveable(struct zone *zone, unsigned long pfn) @@ -946,8 +966,7 @@ page_is_saveable(struct zone *zone, unsigned long pfn) saveable_highmem_page(pfn) : saveable_page(pfn); } -static inline void -copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) { struct page *s_page, *d_page; void *src, *dst; @@ -961,29 +980,26 @@ copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) kunmap_atomic(src, KM_USER0); kunmap_atomic(dst, KM_USER1); } else { - src = page_address(s_page); if (PageHighMem(d_page)) { /* Page pointed to by src may contain some kernel * data modified by kmap_atomic() */ - do_copy_page(buffer, src); + safe_copy_page(buffer, s_page); dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); memcpy(dst, buffer, PAGE_SIZE); kunmap_atomic(dst, KM_USER0); } else { - dst = page_address(d_page); - do_copy_page(dst, src); + safe_copy_page(page_address(d_page), s_page); } } } #else #define page_is_saveable(zone, pfn) saveable_page(pfn) -static inline void -copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) { - do_copy_page(page_address(pfn_to_page(dst_pfn)), - page_address(pfn_to_page(src_pfn))); + safe_copy_page(page_address(pfn_to_page(dst_pfn)), + pfn_to_page(src_pfn)); } #endif /* CONFIG_HIGHMEM */ diff --git a/kernel/sched.c b/kernel/sched.c index f28f19e..b387a8d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1831,6 +1831,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) long old_state; struct rq *rq; + smp_wmb(); rq = task_rq_lock(p, &flags); old_state = p->state; if (!(old_state & state)) @@ -3766,7 +3767,7 @@ void scheduler_tick(void) #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) -void add_preempt_count(int val) +void __kprobes add_preempt_count(int val) { /* * Underflow? @@ -3782,7 +3783,7 @@ void add_preempt_count(int val) } EXPORT_SYMBOL(add_preempt_count); -void sub_preempt_count(int val) +void __kprobes sub_preempt_count(int val) { /* * Underflow? diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index d3d94c1..67fe8fc 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -65,9 +65,9 @@ print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); #endif SEQ_printf(m, "\n"); - SEQ_printf(m, " # expires at %Lu nsecs [in %Lu nsecs]\n", + SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n", (unsigned long long)ktime_to_ns(timer->expires), - (unsigned long long)(ktime_to_ns(timer->expires) - now)); + (long long)(ktime_to_ns(timer->expires) - now)); } static void |