Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6:
Remove two unneeded exports and make two symbols static in fs/mpage.c
Cleanup after commit 585d3bc06f4ca57f975a5a1f698f65a45ea66225
Trim includes of fdtable.h
Don't crap into descriptor table in binfmt_som
Trim includes in binfmt_elf
Don't mess with descriptor table in load_elf_binary()
Get rid of indirect include of fs_struct.h
New helper - current_umask()
check_unsafe_exec() doesn't care about signal handlers sharing
New locking/refcounting for fs_struct
Take fs_struct handling to new file (fs/fs_struct.c)
Get rid of bumping fs_struct refcount in pivot_root(2)
Kill unsharing fs_struct in __set_personality()
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 60816e8..4df0b32 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -19,7 +19,6 @@
#include <asm/system.h>
#include <linux/module.h>
#include <linux/spinlock.h>
-#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include <linux/sched.h>
#include <linux/fs.h>
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 64f0685..706eb5c 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -635,7 +635,7 @@
if (dentry->d_inode)
goto out_dput;
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
if (flags & SPU_CREATE_GANG)
ret = spufs_create_gang(nd->path.dentry->d_inode,
diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c
index 34ab6d7..55ba6f1 100644
--- a/drivers/char/tty_audit.c
+++ b/drivers/char/tty_audit.c
@@ -10,8 +10,6 @@
*/
#include <linux/audit.h>
-#include <linux/file.h>
-#include <linux/fdtable.h>
#include <linux/tty.h>
struct tty_audit_buf {
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index 7a84b40..f78f5b0 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -10,7 +10,6 @@
#include <linux/tty_flip.h>
#include <linux/devpts_fs.h>
#include <linux/file.h>
-#include <linux/fdtable.h>
#include <linux/console.h>
#include <linux/timer.h>
#include <linux/ctype.h>
diff --git a/fs/Makefile b/fs/Makefile
index 6e82a30..b5cd8e1 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o
+ stack.o fs_struct.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 33b7235..40381df 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -12,8 +12,6 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/time.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/errno.h>
@@ -21,20 +19,15 @@
#include <linux/binfmts.h>
#include <linux/string.h>
#include <linux/file.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
#include <linux/slab.h>
-#include <linux/shm.h>
#include <linux/personality.h>
#include <linux/elfcore.h>
#include <linux/init.h>
#include <linux/highuid.h>
-#include <linux/smp.h>
#include <linux/compiler.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/security.h>
-#include <linux/syscalls.h>
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/utsname.h>
@@ -576,7 +569,6 @@
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata;
unsigned long elf_bss, elf_brk;
- int elf_exec_fileno;
int retval, i;
unsigned int size;
unsigned long elf_entry;
@@ -631,12 +623,6 @@
goto out_free_ph;
}
- retval = get_unused_fd();
- if (retval < 0)
- goto out_free_ph;
- get_file(bprm->file);
- fd_install(elf_exec_fileno = retval, bprm->file);
-
elf_ppnt = elf_phdata;
elf_bss = 0;
elf_brk = 0;
@@ -655,13 +641,13 @@
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX ||
elf_ppnt->p_filesz < 2)
- goto out_free_file;
+ goto out_free_ph;
retval = -ENOMEM;
elf_interpreter = kmalloc(elf_ppnt->p_filesz,
GFP_KERNEL);
if (!elf_interpreter)
- goto out_free_file;
+ goto out_free_ph;
retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
@@ -956,8 +942,6 @@
kfree(elf_phdata);
- sys_close(elf_exec_fileno);
-
set_binfmt(&elf_format);
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
@@ -1028,8 +1012,6 @@
fput(interpreter);
out_free_interp:
kfree(elf_interpreter);
-out_free_file:
- sys_close(elf_exec_fileno);
out_free_ph:
kfree(elf_phdata);
goto out;
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 08644a6..eff74b9 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -188,7 +188,6 @@
static int
load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
{
- int som_exec_fileno;
int retval;
unsigned int size;
unsigned long som_entry;
@@ -220,12 +219,6 @@
goto out_free;
}
- retval = get_unused_fd();
- if (retval < 0)
- goto out_free;
- get_file(bprm->file);
- fd_install(som_exec_fileno = retval, bprm->file);
-
/* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);
if (retval)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 8c3c689..f45dbc1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -204,6 +204,7 @@
}
return sync_blockdev(bdev);
}
+EXPORT_SYMBOL(fsync_bdev);
/**
* freeze_bdev -- lock a filesystem and force it into a consistent state
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 1d53b62..7fdd184 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -256,7 +256,7 @@
}
if (!acl)
- inode->i_mode &= ~current->fs->umask;
+ inode->i_mode &= ~current_umask();
}
if (IS_POSIXACL(dir) && acl) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bca729f..7594bec 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -267,7 +267,7 @@
goto out_dput;
if (!IS_POSIXACL(parent->dentry->d_inode))
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
error = mnt_want_write(parent->mnt);
if (error)
diff --git a/fs/buffer.c b/fs/buffer.c
index 2963858..c2fa1be 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3315,7 +3315,6 @@
EXPORT_SYMBOL(end_buffer_read_sync);
EXPORT_SYMBOL(end_buffer_write_sync);
EXPORT_SYMBOL(file_fsync);
-EXPORT_SYMBOL(fsync_bdev);
EXPORT_SYMBOL(generic_block_bmap);
EXPORT_SYMBOL(generic_cont_expand_simple);
EXPORT_SYMBOL(init_buffer);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 2f35cccf..54dce78 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -254,7 +254,7 @@
return -ENOMEM;
}
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
if (oplockEnabled)
oplock = REQ_OPLOCK;
@@ -479,7 +479,7 @@
rc = -ENOMEM;
else if (pTcon->unix_ext) {
struct cifs_unix_set_info_args args = {
- .mode = mode & ~current->fs->umask,
+ .mode = mode & ~current_umask(),
.ctime = NO_CHANGE_64,
.atime = NO_CHANGE_64,
.mtime = NO_CHANGE_64,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a8797cc..f121a80 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1125,7 +1125,7 @@
goto mkdir_out;
}
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT,
mode, NULL /* netfid */, pInfo, &oplock,
full_path, cifs_sb->local_nls,
@@ -1204,7 +1204,7 @@
if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
direntry->d_inode->i_nlink = 2;
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
/* must turn on setgid bit if parent dir has it */
if (inode->i_mode & S_ISGID)
mode |= S_ISGID;
diff --git a/fs/compat.c b/fs/compat.c
index 440a019..1c859da 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -51,6 +51,7 @@
#include <linux/poll.h>
#include <linux/mm.h>
#include <linux/eventpoll.h>
+#include <linux/fs_struct.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -1502,12 +1503,15 @@
bprm->cred = prepare_exec_creds();
if (!bprm->cred)
goto out_unlock;
- check_unsafe_exec(bprm);
+
+ retval = check_unsafe_exec(bprm);
+ if (retval)
+ goto out_unlock;
file = open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
- goto out_unlock;
+ goto out_unmark;
sched_exec();
@@ -1549,6 +1553,9 @@
goto out;
/* execve succeeded */
+ write_lock(¤t->fs->lock);
+ current->fs->in_exec = 0;
+ write_unlock(¤t->fs->lock);
current->in_execve = 0;
mutex_unlock(¤t->cred_exec_mutex);
acct_update_integrals(current);
@@ -1567,6 +1574,11 @@
fput(bprm->file);
}
+out_unmark:
+ write_lock(¤t->fs->lock);
+ current->fs->in_exec = 0;
+ write_unlock(¤t->fs->lock);
+
out_unlock:
current->in_execve = 0;
mutex_unlock(¤t->cred_exec_mutex);
diff --git a/fs/dcache.c b/fs/dcache.c
index 90bbd7e..761d30b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -17,7 +17,6 @@
#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
-#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/slab.h>
@@ -32,6 +31,7 @@
#include <linux/seqlock.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
+#include <linux/fs_struct.h>
#include "internal.h"
int sysctl_vfs_cache_pressure __read_mostly = 100;
diff --git a/fs/exec.c b/fs/exec.c
index c5128fb..052a961 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -53,6 +53,7 @@
#include <linux/tracehook.h>
#include <linux/kmod.h>
#include <linux/fsnotify.h>
+#include <linux/fs_struct.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -1056,28 +1057,35 @@
* - the caller must hold current->cred_exec_mutex to protect against
* PTRACE_ATTACH
*/
-void check_unsafe_exec(struct linux_binprm *bprm)
+int check_unsafe_exec(struct linux_binprm *bprm)
{
struct task_struct *p = current, *t;
unsigned long flags;
- unsigned n_fs, n_sighand;
+ unsigned n_fs;
+ int res = 0;
bprm->unsafe = tracehook_unsafe_exec(p);
n_fs = 1;
- n_sighand = 1;
+ write_lock(&p->fs->lock);
lock_task_sighand(p, &flags);
for (t = next_thread(p); t != p; t = next_thread(t)) {
if (t->fs == p->fs)
n_fs++;
- n_sighand++;
}
- if (atomic_read(&p->fs->count) > n_fs ||
- atomic_read(&p->sighand->count) > n_sighand)
+ if (p->fs->users > n_fs) {
bprm->unsafe |= LSM_UNSAFE_SHARE;
+ } else {
+ if (p->fs->in_exec)
+ res = -EAGAIN;
+ p->fs->in_exec = 1;
+ }
unlock_task_sighand(p, &flags);
+ write_unlock(&p->fs->lock);
+
+ return res;
}
/*
@@ -1296,12 +1304,15 @@
bprm->cred = prepare_exec_creds();
if (!bprm->cred)
goto out_unlock;
- check_unsafe_exec(bprm);
+
+ retval = check_unsafe_exec(bprm);
+ if (retval)
+ goto out_unlock;
file = open_exec(filename);
retval = PTR_ERR(file);
if (IS_ERR(file))
- goto out_unlock;
+ goto out_unmark;
sched_exec();
@@ -1344,6 +1355,9 @@
goto out;
/* execve succeeded */
+ write_lock(¤t->fs->lock);
+ current->fs->in_exec = 0;
+ write_unlock(¤t->fs->lock);
current->in_execve = 0;
mutex_unlock(¤t->cred_exec_mutex);
acct_update_integrals(current);
@@ -1362,6 +1376,11 @@
fput(bprm->file);
}
+out_unmark:
+ write_lock(¤t->fs->lock);
+ current->fs->in_exec = 0;
+ write_unlock(¤t->fs->lock);
+
out_unlock:
current->in_execve = 0;
mutex_unlock(¤t->cred_exec_mutex);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ae8c4f8..d46e38c 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -318,7 +318,7 @@
return PTR_ERR(acl);
}
if (!acl)
- inode->i_mode &= ~current->fs->umask;
+ inode->i_mode &= ~current_umask();
}
if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
struct posix_acl *clone;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index b60bb24..d81ef2f 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -323,7 +323,7 @@
return PTR_ERR(acl);
}
if (!acl)
- inode->i_mode &= ~current->fs->umask;
+ inode->i_mode &= ~current_umask();
}
if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
struct posix_acl *clone;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 694ed6f..647e0d6 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -323,7 +323,7 @@
return PTR_ERR(acl);
}
if (!acl)
- inode->i_mode &= ~current->fs->umask;
+ inode->i_mode &= ~current_umask();
}
if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
struct posix_acl *clone;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 2cc952e..296785a 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -934,7 +934,7 @@
opts->fs_uid = current_uid();
opts->fs_gid = current_gid();
- opts->fs_fmask = opts->fs_dmask = current->fs->umask;
+ opts->fs_fmask = current_umask();
opts->allow_utime = -1;
opts->codepage = fat_default_codepage;
opts->iocharset = fat_default_iocharset;
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
new file mode 100644
index 0000000..eee0590
--- /dev/null
+++ b/fs/fs_struct.c
@@ -0,0 +1,177 @@
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
+#include <linux/slab.h>
+#include <linux/fs_struct.h>
+
+/*
+ * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
+ * It can block.
+ */
+void set_fs_root(struct fs_struct *fs, struct path *path)
+{
+ struct path old_root;
+
+ write_lock(&fs->lock);
+ old_root = fs->root;
+ fs->root = *path;
+ path_get(path);
+ write_unlock(&fs->lock);
+ if (old_root.dentry)
+ path_put(&old_root);
+}
+
+/*
+ * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
+ * It can block.
+ */
+void set_fs_pwd(struct fs_struct *fs, struct path *path)
+{
+ struct path old_pwd;
+
+ write_lock(&fs->lock);
+ old_pwd = fs->pwd;
+ fs->pwd = *path;
+ path_get(path);
+ write_unlock(&fs->lock);
+
+ if (old_pwd.dentry)
+ path_put(&old_pwd);
+}
+
+void chroot_fs_refs(struct path *old_root, struct path *new_root)
+{
+ struct task_struct *g, *p;
+ struct fs_struct *fs;
+ int count = 0;
+
+ read_lock(&tasklist_lock);
+ do_each_thread(g, p) {
+ task_lock(p);
+ fs = p->fs;
+ if (fs) {
+ write_lock(&fs->lock);
+ if (fs->root.dentry == old_root->dentry
+ && fs->root.mnt == old_root->mnt) {
+ path_get(new_root);
+ fs->root = *new_root;
+ count++;
+ }
+ if (fs->pwd.dentry == old_root->dentry
+ && fs->pwd.mnt == old_root->mnt) {
+ path_get(new_root);
+ fs->pwd = *new_root;
+ count++;
+ }
+ write_unlock(&fs->lock);
+ }
+ task_unlock(p);
+ } while_each_thread(g, p);
+ read_unlock(&tasklist_lock);
+ while (count--)
+ path_put(old_root);
+}
+
+void free_fs_struct(struct fs_struct *fs)
+{
+ path_put(&fs->root);
+ path_put(&fs->pwd);
+ kmem_cache_free(fs_cachep, fs);
+}
+
+void exit_fs(struct task_struct *tsk)
+{
+ struct fs_struct *fs = tsk->fs;
+
+ if (fs) {
+ int kill;
+ task_lock(tsk);
+ write_lock(&fs->lock);
+ tsk->fs = NULL;
+ kill = !--fs->users;
+ write_unlock(&fs->lock);
+ task_unlock(tsk);
+ if (kill)
+ free_fs_struct(fs);
+ }
+}
+
+struct fs_struct *copy_fs_struct(struct fs_struct *old)
+{
+ struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+ /* We don't need to lock fs - think why ;-) */
+ if (fs) {
+ fs->users = 1;
+ fs->in_exec = 0;
+ rwlock_init(&fs->lock);
+ fs->umask = old->umask;
+ read_lock(&old->lock);
+ fs->root = old->root;
+ path_get(&old->root);
+ fs->pwd = old->pwd;
+ path_get(&old->pwd);
+ read_unlock(&old->lock);
+ }
+ return fs;
+}
+
+int unshare_fs_struct(void)
+{
+ struct fs_struct *fs = current->fs;
+ struct fs_struct *new_fs = copy_fs_struct(fs);
+ int kill;
+
+ if (!new_fs)
+ return -ENOMEM;
+
+ task_lock(current);
+ write_lock(&fs->lock);
+ kill = !--fs->users;
+ current->fs = new_fs;
+ write_unlock(&fs->lock);
+ task_unlock(current);
+
+ if (kill)
+ free_fs_struct(fs);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(unshare_fs_struct);
+
+int current_umask(void)
+{
+ return current->fs->umask;
+}
+EXPORT_SYMBOL(current_umask);
+
+/* to be mentioned only in INIT_TASK */
+struct fs_struct init_fs = {
+ .users = 1,
+ .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
+ .umask = 0022,
+};
+
+void daemonize_fs_struct(void)
+{
+ struct fs_struct *fs = current->fs;
+
+ if (fs) {
+ int kill;
+
+ task_lock(current);
+
+ write_lock(&init_fs.lock);
+ init_fs.users++;
+ write_unlock(&init_fs.lock);
+
+ write_lock(&fs->lock);
+ current->fs = &init_fs;
+ kill = !--fs->users;
+ write_unlock(&fs->lock);
+
+ task_unlock(current);
+ if (kill)
+ free_fs_struct(fs);
+ }
+}
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 995d63b..e0b53aa 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -134,7 +134,7 @@
mode_t mode = inode->i_mode;
int error;
- inode->i_mode = mode & ~current->fs->umask;
+ inode->i_mode = mode & ~current_umask();
if (!S_ISLNK(inode->i_mode))
acl = ops->getacl(dir, ACL_TYPE_DEFAULT);
if (acl) {
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 43764f4..fa881bd 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -215,7 +215,7 @@
if (error)
return error;
if (!acl) {
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
if (mode != ip->i_inode.i_mode)
error = munge_mode(ip, mode);
return error;
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index bab7f8d..3fcbb0e 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -48,7 +48,7 @@
opts->creator = HFSPLUS_DEF_CR_TYPE;
opts->type = HFSPLUS_DEF_CR_TYPE;
- opts->umask = current->fs->umask;
+ opts->umask = current_umask();
opts->uid = current_uid();
opts->gid = current_gid();
opts->part = -1;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c40f6e2..fecf402 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -480,7 +480,7 @@
uid = current_uid();
gid = current_gid();
- umask = current->fs->umask;
+ umask = current_umask();
lowercase = 0;
conv = CONV_BINARY;
eas = 2;
diff --git a/fs/internal.h b/fs/internal.h
index 53af885..b4dac4f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -11,6 +11,7 @@
struct super_block;
struct linux_binprm;
+struct path;
/*
* block_dev.c
@@ -43,7 +44,7 @@
/*
* exec.c
*/
-extern void check_unsafe_exec(struct linux_binprm *);
+extern int check_unsafe_exec(struct linux_binprm *);
/*
* namespace.c
@@ -60,3 +61,8 @@
extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
extern void __init mnt_init(void);
+
+/*
+ * fs_struct.c
+ */
+extern void chroot_fs_refs(struct path *, struct path *);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index d987137..77ccf8cb 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -336,7 +336,7 @@
return PTR_ERR(acl);
if (!acl) {
- *i_mode &= ~current->fs->umask;
+ *i_mode &= ~current_umask();
} else {
if (S_ISDIR(*i_mode))
jffs2_iset_acl(inode, &f->i_acl_default, acl);
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index a166c166..06ca1b8 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -182,7 +182,7 @@
cleanup:
posix_acl_release(acl);
} else
- inode->i_mode &= ~current->fs->umask;
+ inode->i_mode &= ~current_umask();
JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) |
inode->i_mode;
diff --git a/fs/mpage.c b/fs/mpage.c
index 16c3ef3..680ba60 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -82,7 +82,7 @@
bio_put(bio);
}
-struct bio *mpage_bio_submit(int rw, struct bio *bio)
+static struct bio *mpage_bio_submit(int rw, struct bio *bio)
{
bio->bi_end_io = mpage_end_io_read;
if (rw == WRITE)
@@ -90,7 +90,6 @@
submit_bio(rw, bio);
return NULL;
}
-EXPORT_SYMBOL(mpage_bio_submit);
static struct bio *
mpage_alloc(struct block_device *bdev,
@@ -439,7 +438,14 @@
* just allocate full-size (16-page) BIOs.
*/
-int __mpage_writepage(struct page *page, struct writeback_control *wbc,
+struct mpage_data {
+ struct bio *bio;
+ sector_t last_block_in_bio;
+ get_block_t *get_block;
+ unsigned use_writepage;
+};
+
+static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
struct mpage_data *mpd = data;
@@ -648,7 +654,6 @@
mpd->bio = bio;
return ret;
}
-EXPORT_SYMBOL(__mpage_writepage);
/**
* mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
diff --git a/fs/namei.c b/fs/namei.c
index d040ce1..b8433eb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -32,6 +32,7 @@
#include <linux/file.h>
#include <linux/fcntl.h>
#include <linux/device_cgroup.h>
+#include <linux/fs_struct.h>
#include <asm/uaccess.h>
#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
@@ -1578,7 +1579,7 @@
struct dentry *dir = nd->path.dentry;
if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
error = security_path_mknod(&nd->path, path->dentry, mode, 0);
if (error)
goto out_unlock;
@@ -1989,7 +1990,7 @@
goto out_unlock;
}
if (!IS_POSIXACL(nd.path.dentry->d_inode))
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
error = may_mknod(mode);
if (error)
goto out_dput;
@@ -2067,7 +2068,7 @@
goto out_unlock;
if (!IS_POSIXACL(nd.path.dentry->d_inode))
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
error = mnt_want_write(nd.path.mnt);
if (error)
goto out_dput;
@@ -2897,10 +2898,3 @@
EXPORT_SYMBOL(vfs_unlink);
EXPORT_SYMBOL(dentry_unhash);
EXPORT_SYMBOL(generic_readlink);
-
-/* to be mentioned only in INIT_TASK */
-struct fs_struct init_fs = {
- .count = ATOMIC_INIT(1),
- .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
- .umask = 0022,
-};
diff --git a/fs/namespace.c b/fs/namespace.c
index 0a42e0e..c6f54e4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,7 @@
#include <linux/ramfs.h>
#include <linux/log2.h>
#include <linux/idr.h>
+#include <linux/fs_struct.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include "pnode.h"
@@ -2093,66 +2094,6 @@
}
/*
- * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
- * It can block. Requires the big lock held.
- */
-void set_fs_root(struct fs_struct *fs, struct path *path)
-{
- struct path old_root;
-
- write_lock(&fs->lock);
- old_root = fs->root;
- fs->root = *path;
- path_get(path);
- write_unlock(&fs->lock);
- if (old_root.dentry)
- path_put(&old_root);
-}
-
-/*
- * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
- * It can block. Requires the big lock held.
- */
-void set_fs_pwd(struct fs_struct *fs, struct path *path)
-{
- struct path old_pwd;
-
- write_lock(&fs->lock);
- old_pwd = fs->pwd;
- fs->pwd = *path;
- path_get(path);
- write_unlock(&fs->lock);
-
- if (old_pwd.dentry)
- path_put(&old_pwd);
-}
-
-static void chroot_fs_refs(struct path *old_root, struct path *new_root)
-{
- struct task_struct *g, *p;
- struct fs_struct *fs;
-
- read_lock(&tasklist_lock);
- do_each_thread(g, p) {
- task_lock(p);
- fs = p->fs;
- if (fs) {
- atomic_inc(&fs->count);
- task_unlock(p);
- if (fs->root.dentry == old_root->dentry
- && fs->root.mnt == old_root->mnt)
- set_fs_root(fs, new_root);
- if (fs->pwd.dentry == old_root->dentry
- && fs->pwd.mnt == old_root->mnt)
- set_fs_pwd(fs, new_root);
- put_fs_struct(fs);
- } else
- task_unlock(p);
- } while_each_thread(g, p);
- read_unlock(&tasklist_lock);
-}
-
-/*
* pivot_root Semantics:
* Moves the root file system of the current process to the directory put_old,
* makes new_root as the new root file system of the current process, and sets
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index b82fe68..d0cc5ce 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -328,7 +328,7 @@
data->arg.create.verifier[1] = current->pid;
}
- sattr->ia_mode &= ~current->fs->umask;
+ sattr->ia_mode &= ~current_umask();
for (;;) {
status = nfs3_do_create(dir, dentry, data);
@@ -528,7 +528,7 @@
dprintk("NFS call mkdir %s\n", dentry->d_name.name);
- sattr->ia_mode &= ~current->fs->umask;
+ sattr->ia_mode &= ~current_umask();
data = nfs3_alloc_createdata();
if (data == NULL)
@@ -639,7 +639,7 @@
dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name,
MAJOR(rdev), MINOR(rdev));
- sattr->ia_mode &= ~current->fs->umask;
+ sattr->ia_mode &= ~current_umask();
data = nfs3_alloc_createdata();
if (data == NULL)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 97baccc..a4d2426 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1501,7 +1501,7 @@
attr.ia_mode = nd->intent.open.create_mode;
attr.ia_valid = ATTR_MODE;
if (!IS_POSIXACL(dir))
- attr.ia_mode &= ~current->fs->umask;
+ attr.ia_mode &= ~current_umask();
} else {
attr.ia_valid = 0;
BUG_ON(nd->intent.open.flags & O_CREAT);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index bc3567b..7c09852 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -403,7 +403,6 @@
nfsd(void *vrqstp)
{
struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
- struct fs_struct *fsp;
int err, preverr = 0;
/* Lock module and set up kernel thread */
@@ -412,13 +411,11 @@
/* At this point, the thread shares current->fs
* with the init process. We need to create files with a
* umask of 0 instead of init's umask. */
- fsp = copy_fs_struct(current->fs);
- if (!fsp) {
+ if (unshare_fs_struct() < 0) {
printk("Unable to start nfsd thread: out of memory\n");
goto out;
}
- exit_fs(current);
- current->fs = fsp;
+
current->fs->umask = 0;
/*
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 12dfb44..fbeaec7 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -296,7 +296,7 @@
return PTR_ERR(acl);
}
if (!acl)
- inode->i_mode &= ~current->fs->umask;
+ inode->i_mode &= ~current_umask();
}
if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
struct posix_acl *clone;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index d79e808..379ae5f 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -426,7 +426,7 @@
sbi->s_uid = current_uid();
sbi->s_gid = current_gid();
- sbi->s_dmask = sbi->s_fmask = current->fs->umask;
+ sbi->s_dmask = sbi->s_fmask = current_umask();
if (!parse_options((char *) data, sbi))
goto end;
diff --git a/fs/open.c b/fs/open.c
index 75b6167..377eb25 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -29,6 +29,7 @@
#include <linux/rcupdate.h>
#include <linux/audit.h>
#include <linux/falloc.h>
+#include <linux/fs_struct.h>
int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e0afd32..f715597 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -80,6 +80,7 @@
#include <linux/oom.h>
#include <linux/elf.h>
#include <linux/pid_namespace.h>
+#include <linux/fs_struct.h>
#include "internal.h"
/* NOTE:
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 370be0a..863464d 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -2,6 +2,7 @@
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/fs_struct.h>
#include <linux/mount.h>
#include <linux/ptrace.h>
#include <linux/seq_file.h>
@@ -49,7 +50,7 @@
else
bytes += kobjsize(mm);
- if (current->fs && atomic_read(¤t->fs->count) > 1)
+ if (current->fs && current->fs->users > 1)
sbytes += kobjsize(current->fs);
else
bytes += kobjsize(current->fs);
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index d423416..c303c42 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -428,7 +428,7 @@
} else {
apply_umask:
/* no ACL, apply umask */
- inode->i_mode &= ~current->fs->umask;
+ inode->i_mode &= ~current_umask();
}
return err;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 7aa53fe..2940612 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -227,7 +227,7 @@
xfs_dentry_to_name(&name, dentry);
if (IS_POSIXACL(dir) && !default_acl)
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
switch (mode & S_IFMT) {
case S_IFCHR:
@@ -416,7 +416,7 @@
mode_t mode;
mode = S_IFLNK |
- (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
+ (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
xfs_dentry_to_name(&name, dentry);
error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 3d7bcde..7b73bb8 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -332,22 +332,10 @@
static inline void buffer_init(void) {}
static inline int try_to_free_buffers(struct page *page) { return 1; }
-static inline int sync_blockdev(struct block_device *bdev) { return 0; }
static inline int inode_has_buffers(struct inode *inode) { return 0; }
static inline void invalidate_inode_buffers(struct inode *inode) {}
static inline int remove_inode_buffers(struct inode *inode) { return 1; }
static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
-static inline void invalidate_bdev(struct block_device *bdev) {}
-
-static inline struct super_block *freeze_bdev(struct block_device *sb)
-{
- return NULL;
-}
-
-static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
-{
- return 0;
-}
#endif /* CONFIG_BLOCK */
#endif /* _LINUX_BUFFER_HEAD_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 61211ad..a09e17c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1741,6 +1741,8 @@
extern int vfs_statfs(struct dentry *, struct kstatfs *);
+extern int current_umask(void);
+
/* /sys/fs */
extern struct kobject *fs_kobj;
@@ -1885,6 +1887,18 @@
extern int fsync_no_super(struct block_device *);
#else
static inline void bd_forget(struct inode *inode) {}
+static inline int sync_blockdev(struct block_device *bdev) { return 0; }
+static inline void invalidate_bdev(struct block_device *bdev) {}
+
+static inline struct super_block *freeze_bdev(struct block_device *sb)
+{
+ return NULL;
+}
+
+static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
+{
+ return 0;
+}
#endif
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 18b467d..78a05bf 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -4,12 +4,10 @@
#include <linux/path.h>
struct fs_struct {
- atomic_t count; /* This usage count is used by check_unsafe_exec() for
- * security checking purposes - therefore it may not be
- * incremented, except by clone(CLONE_FS).
- */
+ int users;
rwlock_t lock;
int umask;
+ int in_exec;
struct path root, pwd;
};
@@ -19,6 +17,8 @@
extern void set_fs_root(struct fs_struct *, struct path *);
extern void set_fs_pwd(struct fs_struct *, struct path *);
extern struct fs_struct *copy_fs_struct(struct fs_struct *);
-extern void put_fs_struct(struct fs_struct *);
+extern void free_fs_struct(struct fs_struct *);
+extern void daemonize_fs_struct(void);
+extern int unshare_fs_struct(void);
#endif /* _LINUX_FS_STRUCT_H */
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 830bbcd..3a05929 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -22,6 +22,8 @@
int event;
};
+struct fs_struct;
+
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
struct fs_struct *);
extern void __put_mnt_ns(struct mnt_namespace *ns);
diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 5c42821..068a0c9 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -11,21 +11,11 @@
*/
#ifdef CONFIG_BLOCK
-struct mpage_data {
- struct bio *bio;
- sector_t last_block_in_bio;
- get_block_t *get_block;
- unsigned use_writepage;
-};
-
struct writeback_control;
-struct bio *mpage_bio_submit(int rw, struct bio *bio);
int mpage_readpages(struct address_space *mapping, struct list_head *pages,
unsigned nr_pages, get_block_t get_block);
int mpage_readpage(struct page *page, get_block_t get_block);
-int __mpage_writepage(struct page *page, struct writeback_control *wbc,
- void *data);
int mpage_writepages(struct address_space *mapping,
struct writeback_control *wbc, get_block_t get_block);
int mpage_writepage(struct page *page, get_block_t *get_block,
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index afad7de..7b370c7 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -8,6 +8,7 @@
struct uts_namespace;
struct ipc_namespace;
struct pid_namespace;
+struct fs_struct;
/*
* A structure to contain pointers to all per-process
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 206ac00..9da5aa0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -68,7 +68,7 @@
#include <linux/smp.h>
#include <linux/sem.h>
#include <linux/signal.h>
-#include <linux/fs_struct.h>
+#include <linux/path.h>
#include <linux/compiler.h>
#include <linux/completion.h>
#include <linux/pid.h>
@@ -97,6 +97,7 @@
struct robust_list_head;
struct bio;
struct bts_tracer;
+struct fs_struct;
/*
* List of flags we want to share for kernel threads,
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 8d4ff5a..dd7ee5f 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/initrd.h>
#include <linux/async.h>
+#include <linux/fs_struct.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index a8ddadb..9167853 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -602,7 +602,7 @@
dentry->d_fsdata = attr;
}
- mode &= ~current->fs->umask;
+ mode &= ~current_umask();
ret = mnt_want_write(mqueue_mnt);
if (ret)
goto out;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 8cbddff..2bfc647 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -66,6 +66,7 @@
#include <linux/syscalls.h>
#include <linux/inotify.h>
#include <linux/capability.h>
+#include <linux/fs_struct.h>
#include "audit.h"
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 667c841..c35452c 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -18,6 +18,7 @@
#include <linux/syscalls.h>
#include <linux/sysctl.h>
#include <linux/types.h>
+#include <linux/fs_struct.h>
static void default_handler(int, struct pt_regs *);
@@ -145,28 +146,6 @@
return 0;
}
- if (atomic_read(¤t->fs->count) != 1) {
- struct fs_struct *fsp, *ofsp;
-
- fsp = copy_fs_struct(current->fs);
- if (fsp == NULL) {
- module_put(ep->module);
- return -ENOMEM;
- }
-
- task_lock(current);
- ofsp = current->fs;
- current->fs = fsp;
- task_unlock(current);
-
- put_fs_struct(ofsp);
- }
-
- /*
- * At that point we are guaranteed to be the sole owner of
- * current->fs.
- */
-
current->personality = personality;
oep = current_thread_info()->exec_domain;
current_thread_info()->exec_domain = ep;
diff --git a/kernel/exit.c b/kernel/exit.c
index 3bec141..6686ed1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -46,6 +46,7 @@
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/tracehook.h>
+#include <linux/fs_struct.h>
#include <linux/init_task.h>
#include <trace/sched.h>
@@ -420,7 +421,6 @@
void daemonize(const char *name, ...)
{
va_list args;
- struct fs_struct *fs;
sigset_t blocked;
va_start(args, name);
@@ -453,11 +453,7 @@
/* Become as one with the init task */
- exit_fs(current); /* current->fs->count--; */
- fs = init_task.fs;
- current->fs = fs;
- atomic_inc(&fs->count);
-
+ daemonize_fs_struct();
exit_files(current);
current->files = init_task.files;
atomic_inc(¤t->files->count);
@@ -556,30 +552,6 @@
}
}
-void put_fs_struct(struct fs_struct *fs)
-{
- /* No need to hold fs->lock if we are killing it */
- if (atomic_dec_and_test(&fs->count)) {
- path_put(&fs->root);
- path_put(&fs->pwd);
- kmem_cache_free(fs_cachep, fs);
- }
-}
-
-void exit_fs(struct task_struct *tsk)
-{
- struct fs_struct * fs = tsk->fs;
-
- if (fs) {
- task_lock(tsk);
- tsk->fs = NULL;
- task_unlock(tsk);
- put_fs_struct(fs);
- }
-}
-
-EXPORT_SYMBOL_GPL(exit_fs);
-
#ifdef CONFIG_MM_OWNER
/*
* Task p is exiting and it owned mm, lets find a new owner for it
diff --git a/kernel/fork.c b/kernel/fork.c
index f744582..660c2b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -60,6 +60,7 @@
#include <linux/tty.h>
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
+#include <linux/fs_struct.h>
#include <trace/sched.h>
#include <linux/magic.h>
@@ -681,38 +682,21 @@
return retval;
}
-static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
-{
- struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
- /* We don't need to lock fs - think why ;-) */
- if (fs) {
- atomic_set(&fs->count, 1);
- rwlock_init(&fs->lock);
- fs->umask = old->umask;
- read_lock(&old->lock);
- fs->root = old->root;
- path_get(&old->root);
- fs->pwd = old->pwd;
- path_get(&old->pwd);
- read_unlock(&old->lock);
- }
- return fs;
-}
-
-struct fs_struct *copy_fs_struct(struct fs_struct *old)
-{
- return __copy_fs_struct(old);
-}
-
-EXPORT_SYMBOL_GPL(copy_fs_struct);
-
static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
{
+ struct fs_struct *fs = current->fs;
if (clone_flags & CLONE_FS) {
- atomic_inc(¤t->fs->count);
+ /* tsk->fs is already what we want */
+ write_lock(&fs->lock);
+ if (fs->in_exec) {
+ write_unlock(&fs->lock);
+ return -EAGAIN;
+ }
+ fs->users++;
+ write_unlock(&fs->lock);
return 0;
}
- tsk->fs = __copy_fs_struct(current->fs);
+ tsk->fs = copy_fs_struct(fs);
if (!tsk->fs)
return -ENOMEM;
return 0;
@@ -1544,12 +1528,16 @@
{
struct fs_struct *fs = current->fs;
- if ((unshare_flags & CLONE_FS) &&
- (fs && atomic_read(&fs->count) > 1)) {
- *new_fsp = __copy_fs_struct(current->fs);
- if (!*new_fsp)
- return -ENOMEM;
- }
+ if (!(unshare_flags & CLONE_FS) || !fs)
+ return 0;
+
+ /* don't need lock here; in the worst case we'll do useless copy */
+ if (fs->users == 1)
+ return 0;
+
+ *new_fsp = copy_fs_struct(fs);
+ if (!*new_fsp)
+ return -ENOMEM;
return 0;
}
@@ -1665,8 +1653,13 @@
if (new_fs) {
fs = current->fs;
+ write_lock(&fs->lock);
current->fs = new_fs;
- new_fs = fs;
+ if (--fs->users)
+ new_fs = NULL;
+ else
+ new_fs = fs;
+ write_unlock(&fs->lock);
}
if (new_mm) {
@@ -1705,7 +1698,7 @@
bad_unshare_cleanup_fs:
if (new_fs)
- put_fs_struct(new_fs);
+ free_fs_struct(new_fs);
bad_unshare_cleanup_thread:
bad_unshare_out:
diff --git a/kernel/sys.c b/kernel/sys.c
index 742cefa..51dbb55 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -34,6 +34,7 @@
#include <linux/seccomp.h>
#include <linux/cpu.h>
#include <linux/ptrace.h>
+#include <linux/fs_struct.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index baac910..9dcc6e7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -832,7 +832,7 @@
* All right, let's create it.
*/
mode = S_IFSOCK |
- (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
+ (SOCK_INODE(sock)->i_mode & ~current_umask());
err = mnt_want_write(nd.path.mnt);
if (err)
goto out_mknod_dput;
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index d47f16b..3bbe01a 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/mount.h>
#include <linux/mnt_namespace.h>
+#include <linux/fs_struct.h>
#include "common.h"
#include "realpath.h"