kernel/sysctl.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ kernel/user_namespace.c | 8 +++++++ 2 files changed, 64 insertions(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 083be6af29d7..6f8920bd5bcf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -128,6 +128,10 @@ static int ten_thousand = 10000; #ifdef CONFIG_PERF_EVENTS static int six_hundred_forty_kb = 640 * 1024; #endif +#ifdef CONFIG_USER_NS +extern int sysctl_userns_restrict; +#endif + /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; @@ -1657,6 +1661,29 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, return err; } +#ifdef CONFIG_USER_NS +static int sysctl_unprivileged_userns_clone(struct ctl_table *table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = !sysctl_userns_restrict; + + t = *table; + t.data = &state; + + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + + if (write) + sysctl_userns_restrict = !state; + + return 0; +} +#endif + #else /* CONFIG_PROC_SYSCTL */ int proc_dostring(struct ctl_table *table, int write, @@ -1737,6 +1764,15 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, return -ENOSYS; } +#ifdef CONFIG_USER_NS +static int sysctl_unprivileged_userns_clone(struct ctl_table *table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + return -ENOSYS; +} +#endif + #endif /* CONFIG_PROC_SYSCTL */ #if defined(CONFIG_SYSCTL) @@ -2306,6 +2342,26 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, +#endif +#ifdef CONFIG_USER_NS + { + .procname = "userns_restrict", + .data = &sysctl_userns_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "unprivileged_userns_clone", + .data = NULL /* filled in by the handler */, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_unprivileged_userns_clone, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif { .procname = "ngroups_max", diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6b2e3ca7ee99..8a3806a7fb67 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -23,6 +23,7 @@ static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); +int sysctl_userns_restrict __read_mostly = 1; static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, @@ -74,6 +75,12 @@ int create_user_ns(struct cred *new) struct ucounts *ucounts; int ret, i; + ret = -EPERM; + if (sysctl_userns_restrict && !(capable(CAP_SYS_ADMIN) && + capable(CAP_SETUID) && + capable(CAP_SETGID))) + goto fail; + ret = -ENOSPC; if (parent_ns->level > 32) goto fail; @@ -102,6 +109,7 @@ int create_user_ns(struct cred *new) goto fail_dec; ret = -ENOMEM; + ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); if (!ns) goto fail_dec;