vfs: Commit to never having exectuables on proc and sysfs.

Today proc and sysfs do not contain any executable files.  Several
applications today mount proc or sysfs without noexec and nosuid and
then depend on there being no exectuables files on proc or sysfs.
Having any executable files show on proc or sysfs would cause
a user space visible regression, and most likely security problems.

Therefore commit to never allowing executables on proc and sysfs by
adding a new flag to mark them as filesystems without executables and
enforce that flag.

Test the flag where MNT_NOEXEC is tested today, so that the only user
visible effect will be that exectuables will be treated as if the
execute bit is cleared.

The filesystems proc and sysfs do not currently incoporate any
executable files so this does not result in any user visible effects.

This makes it unnecessary to vet changes to proc and sysfs tightly for
adding exectuable files or changes to chattr that would modify
existing files, as no matter what the individual file say they will
not be treated as exectuable files by the vfs.

Not having to vet changes to closely is important as without this we
are only one proc_create call (or another goof up in the
implementation of notify_change) from having problematic executables
on proc.  Those mistakes are all too easy to make and would create
a situation where there are security issues or the assumptions of
some program having to be broken (and cause userspace regressions).

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
diff --git a/fs/exec.c b/fs/exec.c
index 1977c2a..b06623a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -98,6 +98,12 @@
 	module_put(fmt->module);
 }
 
+bool path_noexec(const struct path *path)
+{
+	return (path->mnt->mnt_flags & MNT_NOEXEC) ||
+	       (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
+}
+
 #ifdef CONFIG_USELIB
 /*
  * Note that a shared library must be both readable and executable due to
@@ -132,7 +138,7 @@
 		goto exit;
 
 	error = -EACCES;
-	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
+	if (path_noexec(&file->f_path))
 		goto exit;
 
 	fsnotify_open(file);
@@ -777,7 +783,7 @@
 	if (!S_ISREG(file_inode(file)->i_mode))
 		goto exit;
 
-	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
+	if (path_noexec(&file->f_path))
 		goto exit;
 
 	err = deny_write_access(file);
diff --git a/fs/open.c b/fs/open.c
index e33dab2..b6f1e96 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -377,7 +377,7 @@
 		 * with the "noexec" flag.
 		 */
 		res = -EACCES;
-		if (path.mnt->mnt_flags & MNT_NOEXEC)
+		if (path_noexec(&path))
 			goto out_path_release;
 	}
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 68feb0f..361ab4e 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -134,6 +134,8 @@
 		}
 
 		sb->s_flags |= MS_ACTIVE;
+		/* User space would break if executables appear on proc */
+		sb->s_iflags |= SB_I_NOEXEC;
 	}
 
 	return dget(sb->s_root);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 1c6ac6f..f3db820 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -40,6 +40,10 @@
 				SYSFS_MAGIC, &new_sb, ns);
 	if (IS_ERR(root) || !new_sb)
 		kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
+	else if (new_sb)
+		/* Userspace would break if executables appear on sysfs */
+		root->d_sb->s_iflags |= SB_I_NOEXEC;
+
 	return root;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a0653e5..42912f8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1244,6 +1244,7 @@
 
 /* sb->s_iflags */
 #define SB_I_CGROUPWB	0x00000001	/* cgroup-aware writeback enabled */
+#define SB_I_NOEXEC	0x00000002	/* Ignore executables on this fs */
 
 /* Possible states of 'frozen' field */
 enum {
@@ -3030,4 +3031,6 @@
 	return !IS_DEADDIR(inode);
 }
 
+extern bool path_noexec(const struct path *path);
+
 #endif /* _LINUX_FS_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 259fda2..fa2f2f6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1668,8 +1668,7 @@
 	 * overall picture.
 	 */
 	err = -EACCES;
-	if (!S_ISREG(inode->i_mode)	||
-	    exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
+	if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path))
 		goto exit;
 
 	err = inode_permission(inode, MAY_EXEC);
diff --git a/mm/mmap.c b/mm/mmap.c
index aa632ad..f126923 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1268,7 +1268,7 @@
 	 *  mounted, in which case we dont add PROT_EXEC.)
 	 */
 	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
-		if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
+		if (!(file && path_noexec(&file->f_path)))
 			prot |= PROT_EXEC;
 
 	if (!(flags & MAP_FIXED))
@@ -1337,7 +1337,7 @@
 		case MAP_PRIVATE:
 			if (!(file->f_mode & FMODE_READ))
 				return -EACCES;
-			if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
+			if (path_noexec(&file->f_path)) {
 				if (vm_flags & VM_EXEC)
 					return -EPERM;
 				vm_flags &= ~VM_MAYEXEC;
diff --git a/mm/nommu.c b/mm/nommu.c
index 58ea364..ce17abf 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1035,7 +1035,7 @@
 
 		/* handle executable mappings and implied executable
 		 * mappings */
-		if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
+		if (path_noexec(&file->f_path)) {
 			if (prot & PROT_EXEC)
 				return -EPERM;
 		} else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) {
diff --git a/security/security.c b/security/security.c
index 595fffa..062f3c99 100644
--- a/security/security.c
+++ b/security/security.c
@@ -776,7 +776,7 @@
 	 * ditto if it's not on noexec mount, except that on !MMU we need
 	 * NOMMU_MAP_EXEC (== VM_MAYEXEC) in this case
 	 */
-	if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) {
+	if (!path_noexec(&file->f_path)) {
 #ifndef CONFIG_MMU
 		if (file->f_op->mmap_capabilities) {
 			unsigned caps = file->f_op->mmap_capabilities(file);