Merge tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux
Pull file locking updates from Jeff Layton:
"File locking related changes for v4.5 (pile #1)
Highlights:
- new Kconfig option to allow disabling mandatory locking (which is
racy anyway)
- new tracepoints for setlk and close codepaths
- fix for a long-standing bug in code that handles races between
setting a POSIX lock and close()"
* tag 'locks-v4.5-1' of git://git.samba.org/jlayton/linux:
locks: rename __posix_lock_file to posix_lock_inode
locks: prink more detail when there are leaked locks
locks: pass inode pointer to locks_free_lock_context
locks: sprinkle some tracepoints around the file locking code
locks: don't check for race with close when setting OFD lock
locks: fix unlock when fcntl_setlk races with a close
fs: make locks.c explicitly non-modular
locks: use list_first_entry_or_null()
locks: Don't allow mounts in user namespaces to enable mandatory locking
locks: Allow disabling mandatory locking at compile time
diff --git a/fs/Kconfig b/fs/Kconfig
index 6ce72d8..2bb1ef8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -73,6 +73,16 @@
for filesystems like NFS and for the flock() system
call. Disabling this option saves about 11k.
+config MANDATORY_FILE_LOCKING
+ bool "Enable Mandatory file locking"
+ depends on FILE_LOCKING
+ default y
+ help
+ This option enables files appropriately marked files on appropriely
+ mounted filesystems to support mandatory locking.
+
+ To the best of my knowledge this is dead code that no one cares about.
+
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
diff --git a/fs/inode.c b/fs/inode.c
index 5bb85a0..4230f66 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -225,7 +225,7 @@
inode_detach_wb(inode);
security_inode_free(inode);
fsnotify_inode_delete(inode);
- locks_free_lock_context(inode->i_flctx);
+ locks_free_lock_context(inode);
if (!inode->i_nlink) {
WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
atomic_long_dec(&inode->i_sb->s_remove_count);
diff --git a/fs/locks.c b/fs/locks.c
index 0d2b326..a91f4ab 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -119,7 +119,6 @@
#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/init.h>
-#include <linux/module.h>
#include <linux/security.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
@@ -230,16 +229,44 @@
ctx = smp_load_acquire(&inode->i_flctx);
}
out:
+ trace_locks_get_lock_context(inode, type, ctx);
return ctx;
}
-void
-locks_free_lock_context(struct file_lock_context *ctx)
+static void
+locks_dump_ctx_list(struct list_head *list, char *list_type)
{
- if (ctx) {
- WARN_ON_ONCE(!list_empty(&ctx->flc_flock));
- WARN_ON_ONCE(!list_empty(&ctx->flc_posix));
- WARN_ON_ONCE(!list_empty(&ctx->flc_lease));
+ struct file_lock *fl;
+
+ list_for_each_entry(fl, list, fl_list) {
+ pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
+ }
+}
+
+static void
+locks_check_ctx_lists(struct inode *inode)
+{
+ struct file_lock_context *ctx = inode->i_flctx;
+
+ if (unlikely(!list_empty(&ctx->flc_flock) ||
+ !list_empty(&ctx->flc_posix) ||
+ !list_empty(&ctx->flc_lease))) {
+ pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
+ MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
+ inode->i_ino);
+ locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
+ locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
+ locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
+ }
+}
+
+void
+locks_free_lock_context(struct inode *inode)
+{
+ struct file_lock_context *ctx = inode->i_flctx;
+
+ if (unlikely(ctx)) {
+ locks_check_ctx_lists(inode);
kmem_cache_free(flctx_cache, ctx);
}
}
@@ -934,7 +961,8 @@
return error;
}
-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ struct file_lock *conflock)
{
struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL;
@@ -1142,6 +1170,8 @@
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
+ trace_posix_lock_inode(inode, request, error);
+
return error;
}
@@ -1162,7 +1192,7 @@
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return __posix_lock_file(file_inode(filp), fl, conflock);
+ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@@ -1178,7 +1208,7 @@
int error;
might_sleep ();
for (;;) {
- error = __posix_lock_file(inode, fl, NULL);
+ error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1191,6 +1221,7 @@
return error;
}
+#ifdef CONFIG_MANDATORY_FILE_LOCKING
/**
* locks_mandatory_locked - Check for an active lock
* @file: the file to check
@@ -1260,7 +1291,7 @@
if (filp) {
fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (!error)
break;
}
@@ -1268,7 +1299,7 @@
if (sleep)
fl.fl_flags |= FL_SLEEP;
fl.fl_owner = current->files;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
@@ -1289,6 +1320,7 @@
}
EXPORT_SYMBOL(locks_mandatory_area);
+#endif /* CONFIG_MANDATORY_FILE_LOCKING */
static void lease_clear_pending(struct file_lock *fl, int arg)
{
@@ -1503,12 +1535,10 @@
ctx = smp_load_acquire(&inode->i_flctx);
if (ctx && !list_empty_careful(&ctx->flc_lease)) {
spin_lock(&ctx->flc_lock);
- if (!list_empty(&ctx->flc_lease)) {
- fl = list_first_entry(&ctx->flc_lease,
- struct file_lock, fl_list);
- if (fl->fl_type == F_WRLCK)
- has_lease = true;
- }
+ fl = list_first_entry_or_null(&ctx->flc_lease,
+ struct file_lock, fl_list);
+ if (fl && (fl->fl_type == F_WRLCK))
+ has_lease = true;
spin_unlock(&ctx->flc_lock);
}
@@ -2165,6 +2195,8 @@
if (file_lock == NULL)
return -ENOLCK;
+ inode = file_inode(filp);
+
/*
* This might block, so we do it before checking the inode.
*/
@@ -2172,8 +2204,6 @@
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
- inode = file_inode(filp);
-
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@@ -2182,7 +2212,6 @@
goto out;
}
-again:
error = flock_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
@@ -2221,23 +2250,29 @@
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by
- * releasing the lock that was just acquired.
+ * Attempt to detect a close/fcntl race and recover by releasing the
+ * lock that was just acquired. There is no need to do that when we're
+ * unlocking though, or for OFD locks.
*/
- /*
- * we need that spin_lock here - it prevents reordering between
- * update of i_flctx->flc_posix and check for it done in close().
- * rcu_read_lock() wouldn't do.
- */
- spin_lock(¤t->files->file_lock);
- f = fcheck(fd);
- spin_unlock(¤t->files->file_lock);
- if (!error && f != filp && flock.l_type != F_UNLCK) {
- flock.l_type = F_UNLCK;
- goto again;
+ if (!error && file_lock->fl_type != F_UNLCK &&
+ !(file_lock->fl_flags & FL_OFDLCK)) {
+ /*
+ * We need that spin_lock here - it prevents reordering between
+ * update of i_flctx->flc_posix and check for it done in
+ * close(). rcu_read_lock() wouldn't do.
+ */
+ spin_lock(¤t->files->file_lock);
+ f = fcheck(fd);
+ spin_unlock(¤t->files->file_lock);
+ if (f != filp) {
+ file_lock->fl_type = F_UNLCK;
+ error = do_lock_file_wait(filp, cmd, file_lock);
+ WARN_ON_ONCE(error);
+ error = -EBADF;
+ }
}
-
out:
+ trace_fcntl_setlk(inode, file_lock, error);
locks_free_lock(file_lock);
return error;
}
@@ -2322,7 +2357,6 @@
goto out;
}
-again:
error = flock64_to_posix_lock(filp, file_lock, &flock);
if (error)
goto out;
@@ -2361,17 +2395,27 @@
error = do_lock_file_wait(filp, cmd, file_lock);
/*
- * Attempt to detect a close/fcntl race and recover by
- * releasing the lock that was just acquired.
+ * Attempt to detect a close/fcntl race and recover by releasing the
+ * lock that was just acquired. There is no need to do that when we're
+ * unlocking though, or for OFD locks.
*/
- spin_lock(¤t->files->file_lock);
- f = fcheck(fd);
- spin_unlock(¤t->files->file_lock);
- if (!error && f != filp && flock.l_type != F_UNLCK) {
- flock.l_type = F_UNLCK;
- goto again;
+ if (!error && file_lock->fl_type != F_UNLCK &&
+ !(file_lock->fl_flags & FL_OFDLCK)) {
+ /*
+ * We need that spin_lock here - it prevents reordering between
+ * update of i_flctx->flc_posix and check for it done in
+ * close(). rcu_read_lock() wouldn't do.
+ */
+ spin_lock(¤t->files->file_lock);
+ f = fcheck(fd);
+ spin_unlock(¤t->files->file_lock);
+ if (f != filp) {
+ file_lock->fl_type = F_UNLCK;
+ error = do_lock_file_wait(filp, cmd, file_lock);
+ WARN_ON_ONCE(error);
+ error = -EBADF;
+ }
}
-
out:
locks_free_lock(file_lock);
return error;
@@ -2385,6 +2429,7 @@
*/
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
+ int error;
struct file_lock lock;
struct file_lock_context *ctx;
@@ -2407,10 +2452,11 @@
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
- vfs_lock_file(filp, F_SETLK, &lock, NULL);
+ error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
+ trace_locks_remove_posix(file_inode(filp), &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
@@ -2706,7 +2752,7 @@
proc_create("locks", 0, NULL, &proc_locks_operations);
return 0;
}
-module_init(proc_locks_init);
+fs_initcall(proc_locks_init);
#endif
static int __init filelock_init(void)
diff --git a/fs/namespace.c b/fs/namespace.c
index 0570729..4d2c8f64 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1584,6 +1584,14 @@
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
+static inline bool may_mandlock(void)
+{
+#ifndef CONFIG_MANDATORY_FILE_LOCKING
+ return false;
+#endif
+ return capable(CAP_SYS_ADMIN);
+}
+
/*
* Now umount can handle mount points as well as block devices.
* This is important for filesystems which use unnamed block devices.
@@ -2677,6 +2685,8 @@
type_page, flags, data_page);
if (!retval && !may_mount())
retval = -EPERM;
+ if (!retval && (flags & MS_MANDLOCK) && !may_mandlock())
+ retval = -EPERM;
if (retval)
goto dput_out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ef3cd36..566f8e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1043,7 +1043,7 @@
extern int fcntl_getlease(struct file *filp);
/* fs/locks.c */
-void locks_free_lock_context(struct file_lock_context *ctx);
+void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
extern void locks_init_lock(struct file_lock *);
extern struct file_lock * locks_alloc_lock(void);
@@ -1104,7 +1104,7 @@
}
static inline void
-locks_free_lock_context(struct file_lock_context *ctx)
+locks_free_lock_context(struct inode *inode)
{
}
@@ -2030,7 +2030,7 @@
#define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2
-#ifdef CONFIG_FILE_LOCKING
+#ifdef CONFIG_MANDATORY_FILE_LOCKING
extern int locks_mandatory_locked(struct file *);
extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
@@ -2075,6 +2075,45 @@
return 0;
}
+#else /* !CONFIG_MANDATORY_FILE_LOCKING */
+
+static inline int locks_mandatory_locked(struct file *file)
+{
+ return 0;
+}
+
+static inline int locks_mandatory_area(int rw, struct inode *inode,
+ struct file *filp, loff_t offset,
+ size_t count)
+{
+ return 0;
+}
+
+static inline int __mandatory_lock(struct inode *inode)
+{
+ return 0;
+}
+
+static inline int mandatory_lock(struct inode *inode)
+{
+ return 0;
+}
+
+static inline int locks_verify_locked(struct file *file)
+{
+ return 0;
+}
+
+static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
+ size_t size)
+{
+ return 0;
+}
+
+#endif /* CONFIG_MANDATORY_FILE_LOCKING */
+
+
+#ifdef CONFIG_FILE_LOCKING
static inline int break_lease(struct inode *inode, unsigned int mode)
{
/*
@@ -2136,39 +2175,6 @@
}
#else /* !CONFIG_FILE_LOCKING */
-static inline int locks_mandatory_locked(struct file *file)
-{
- return 0;
-}
-
-static inline int locks_mandatory_area(int rw, struct inode *inode,
- struct file *filp, loff_t offset,
- size_t count)
-{
- return 0;
-}
-
-static inline int __mandatory_lock(struct inode *inode)
-{
- return 0;
-}
-
-static inline int mandatory_lock(struct inode *inode)
-{
- return 0;
-}
-
-static inline int locks_verify_locked(struct file *file)
-{
- return 0;
-}
-
-static inline int locks_verify_truncate(struct inode *inode, struct file *filp,
- size_t size)
-{
- return 0;
-}
-
static inline int break_lease(struct inode *inode, unsigned int mode)
{
return 0;
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index c72f2dc..63a7680 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -34,6 +34,83 @@
{ F_WRLCK, "F_WRLCK" }, \
{ F_UNLCK, "F_UNLCK" })
+TRACE_EVENT(locks_get_lock_context,
+ TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),
+
+ TP_ARGS(inode, type, ctx),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(unsigned char, type)
+ __field(struct file_lock_context *, ctx)
+ ),
+
+ TP_fast_assign(
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->i_ino = inode->i_ino;
+ __entry->type = type;
+ __entry->ctx = ctx;
+ ),
+
+ TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
+ MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
+);
+
+DECLARE_EVENT_CLASS(filelock_lock,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+
+ TP_ARGS(inode, fl, ret),
+
+ TP_STRUCT__entry(
+ __field(struct file_lock *, fl)
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(struct file_lock *, fl_next)
+ __field(fl_owner_t, fl_owner)
+ __field(unsigned int, fl_pid)
+ __field(unsigned int, fl_flags)
+ __field(unsigned char, fl_type)
+ __field(loff_t, fl_start)
+ __field(loff_t, fl_end)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->fl = fl ? fl : NULL;
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->i_ino = inode->i_ino;
+ __entry->fl_next = fl ? fl->fl_next : NULL;
+ __entry->fl_owner = fl ? fl->fl_owner : NULL;
+ __entry->fl_pid = fl ? fl->fl_pid : 0;
+ __entry->fl_flags = fl ? fl->fl_flags : 0;
+ __entry->fl_type = fl ? fl->fl_type : 0;
+ __entry->fl_start = fl ? fl->fl_start : 0;
+ __entry->fl_end = fl ? fl->fl_end : 0;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
+ __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino, __entry->fl_next, __entry->fl_owner,
+ __entry->fl_pid, show_fl_flags(__entry->fl_flags),
+ show_fl_type(__entry->fl_type),
+ __entry->fl_start, __entry->fl_end, __entry->ret)
+);
+
+DEFINE_EVENT(filelock_lock, posix_lock_inode,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, fcntl_setlk,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, locks_remove_posix,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
DECLARE_EVENT_CLASS(filelock_lease,
TP_PROTO(struct inode *inode, struct file_lock *fl),