From 30e49c263e36341b60b735cbef5ca37912549264 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 18 Oct 2007 23:40:10 -0700 Subject: pid namespaces: allow cloning of new namespace When clone() is invoked with CLONE_NEWPID, create a new pid namespace and then create a new struct pid for the new process. Allocate pid_t's for the new process in the new pid namespace and all ancestor pid namespaces. Make the newly cloned process the session and process group leader. Since the active pid namespace is special and expected to be the first entry in pid->upid_list, preserve the order of pid namespaces. The size of 'struct pid' is dependent on the the number of pid namespaces the process exists in, so we use multiple pid-caches'. Only one pid cache is created during system startup and this used by processes that exist only in init_pid_ns. When a process clones its pid namespace, we create additional pid caches as necessary and use the pid cache to allocate 'struct pids' for that depth. Note, that with this patch the newly created namespace won't work, since the rest of the kernel still uses global pids, but this is to be fixed soon. Init pid namespace still works. [oleg@tv-sign.ru: merge fix] Signed-off-by: Pavel Emelyanov Signed-off-by: Sukadev Bhattiprolu Cc: Paul Menage Cc: "Eric W. Biederman" Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index bab3419..f252784 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -973,7 +973,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, - int __user *parent_tidptr, int __user *child_tidptr, struct pid *pid) { @@ -1043,11 +1042,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); - retval = -EFAULT; - if (clone_flags & CLONE_PARENT_SETTID) - if (put_user(p->pid, parent_tidptr)) - goto bad_fork_cleanup_delays_binfmt; - INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; @@ -1289,11 +1283,22 @@ static struct task_struct *copy_process(unsigned long clone_flags, __ptrace_link(p, current->parent); if (thread_group_leader(p)) { - p->signal->tty = current->signal->tty; - p->signal->pgrp = task_pgrp_nr(current); - set_task_session(p, task_session_nr(current)); - attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); - attach_pid(p, PIDTYPE_SID, task_session(current)); + if (clone_flags & CLONE_NEWPID) { + p->nsproxy->pid_ns->child_reaper = p; + p->signal->tty = NULL; + p->signal->pgrp = p->pid; + set_task_session(p, p->pid); + attach_pid(p, PIDTYPE_PGID, pid); + attach_pid(p, PIDTYPE_SID, pid); + } else { + p->signal->tty = current->signal->tty; + p->signal->pgrp = task_pgrp_nr(current); + set_task_session(p, task_session_nr(current)); + attach_pid(p, PIDTYPE_PGID, + task_pgrp(current)); + attach_pid(p, PIDTYPE_SID, + task_session(current)); + } list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; @@ -1339,7 +1344,6 @@ bad_fork_cleanup_policy: bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); -bad_fork_cleanup_delays_binfmt: delayacct_tsk_free(p); if (p->binfmt) module_put(p->binfmt->module); @@ -1366,7 +1370,7 @@ struct task_struct * __cpuinit fork_idle(int cpu) struct task_struct *task; struct pt_regs regs; - task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, + task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, &init_struct_pid); if (!IS_ERR(task)) init_idle(task, cpu); @@ -1414,7 +1418,7 @@ long do_fork(unsigned long clone_flags, } p = copy_process(clone_flags, stack_start, regs, stack_size, - parent_tidptr, child_tidptr, NULL); + child_tidptr, NULL); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. @@ -1422,7 +1426,16 @@ long do_fork(unsigned long clone_flags, if (!IS_ERR(p)) { struct completion vfork; - nr = pid_nr(task_pid(p)); + /* + * this is enough to call pid_nr_ns here, but this if + * improves optimisation of regular fork() + */ + nr = (clone_flags & CLONE_NEWPID) ? + task_pid_nr_ns(p, current->nsproxy->pid_ns) : + task_pid_vnr(p); + + if (clone_flags & CLONE_PARENT_SETTID) + put_user(nr, parent_tidptr); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; -- cgit v1.1