| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * (C) 2001 Clemson University and The University of Chicago |
| * |
| * See COPYING in top-level directory. |
| */ |
| #include <linux/kernel.h> |
| #include "protocol.h" |
| #include "orangefs-kernel.h" |
| #include "orangefs-dev-proto.h" |
| #include "orangefs-bufmap.h" |
| |
| __s32 fsid_of_op(struct orangefs_kernel_op_s *op) |
| { |
| __s32 fsid = ORANGEFS_FS_ID_NULL; |
| |
| if (op) { |
| switch (op->upcall.type) { |
| case ORANGEFS_VFS_OP_FILE_IO: |
| fsid = op->upcall.req.io.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_LOOKUP: |
| fsid = op->upcall.req.lookup.parent_refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_CREATE: |
| fsid = op->upcall.req.create.parent_refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_GETATTR: |
| fsid = op->upcall.req.getattr.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_REMOVE: |
| fsid = op->upcall.req.remove.parent_refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_MKDIR: |
| fsid = op->upcall.req.mkdir.parent_refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_READDIR: |
| fsid = op->upcall.req.readdir.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_SETATTR: |
| fsid = op->upcall.req.setattr.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_SYMLINK: |
| fsid = op->upcall.req.sym.parent_refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_RENAME: |
| fsid = op->upcall.req.rename.old_parent_refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_STATFS: |
| fsid = op->upcall.req.statfs.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_TRUNCATE: |
| fsid = op->upcall.req.truncate.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_RA_FLUSH: |
| fsid = op->upcall.req.ra_cache_flush.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_FS_UMOUNT: |
| fsid = op->upcall.req.fs_umount.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_GETXATTR: |
| fsid = op->upcall.req.getxattr.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_SETXATTR: |
| fsid = op->upcall.req.setxattr.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_LISTXATTR: |
| fsid = op->upcall.req.listxattr.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_REMOVEXATTR: |
| fsid = op->upcall.req.removexattr.refn.fs_id; |
| break; |
| case ORANGEFS_VFS_OP_FSYNC: |
| fsid = op->upcall.req.fsync.refn.fs_id; |
| break; |
| default: |
| break; |
| } |
| } |
| return fsid; |
| } |
| |
| static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs) |
| { |
| int flags = 0; |
| if (attrs->flags & ORANGEFS_IMMUTABLE_FL) |
| flags |= S_IMMUTABLE; |
| else |
| flags &= ~S_IMMUTABLE; |
| if (attrs->flags & ORANGEFS_APPEND_FL) |
| flags |= S_APPEND; |
| else |
| flags &= ~S_APPEND; |
| if (attrs->flags & ORANGEFS_NOATIME_FL) |
| flags |= S_NOATIME; |
| else |
| flags &= ~S_NOATIME; |
| return flags; |
| } |
| |
| static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs) |
| { |
| int perm_mode = 0; |
| |
| if (attrs->perms & ORANGEFS_O_EXECUTE) |
| perm_mode |= S_IXOTH; |
| if (attrs->perms & ORANGEFS_O_WRITE) |
| perm_mode |= S_IWOTH; |
| if (attrs->perms & ORANGEFS_O_READ) |
| perm_mode |= S_IROTH; |
| |
| if (attrs->perms & ORANGEFS_G_EXECUTE) |
| perm_mode |= S_IXGRP; |
| if (attrs->perms & ORANGEFS_G_WRITE) |
| perm_mode |= S_IWGRP; |
| if (attrs->perms & ORANGEFS_G_READ) |
| perm_mode |= S_IRGRP; |
| |
| if (attrs->perms & ORANGEFS_U_EXECUTE) |
| perm_mode |= S_IXUSR; |
| if (attrs->perms & ORANGEFS_U_WRITE) |
| perm_mode |= S_IWUSR; |
| if (attrs->perms & ORANGEFS_U_READ) |
| perm_mode |= S_IRUSR; |
| |
| if (attrs->perms & ORANGEFS_G_SGID) |
| perm_mode |= S_ISGID; |
| if (attrs->perms & ORANGEFS_U_SUID) |
| perm_mode |= S_ISUID; |
| |
| return perm_mode; |
| } |
| |
| /* |
| * NOTE: in kernel land, we never use the sys_attr->link_target for |
| * anything, so don't bother copying it into the sys_attr object here. |
| */ |
| static inline int copy_attributes_from_inode(struct inode *inode, |
| struct ORANGEFS_sys_attr_s *attrs, |
| struct iattr *iattr) |
| { |
| umode_t tmp_mode; |
| |
| if (!iattr || !inode || !attrs) { |
| gossip_err("NULL iattr (%p), inode (%p), attrs (%p) " |
| "in copy_attributes_from_inode!\n", |
| iattr, |
| inode, |
| attrs); |
| return -EINVAL; |
| } |
| /* |
| * We need to be careful to only copy the attributes out of the |
| * iattr object that we know are valid. |
| */ |
| attrs->mask = 0; |
| if (iattr->ia_valid & ATTR_UID) { |
| attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid); |
| attrs->mask |= ORANGEFS_ATTR_SYS_UID; |
| gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner); |
| } |
| if (iattr->ia_valid & ATTR_GID) { |
| attrs->group = from_kgid(&init_user_ns, iattr->ia_gid); |
| attrs->mask |= ORANGEFS_ATTR_SYS_GID; |
| gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group); |
| } |
| |
| if (iattr->ia_valid & ATTR_ATIME) { |
| attrs->mask |= ORANGEFS_ATTR_SYS_ATIME; |
| if (iattr->ia_valid & ATTR_ATIME_SET) { |
| attrs->atime = (time64_t)iattr->ia_atime.tv_sec; |
| attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET; |
| } |
| } |
| if (iattr->ia_valid & ATTR_MTIME) { |
| attrs->mask |= ORANGEFS_ATTR_SYS_MTIME; |
| if (iattr->ia_valid & ATTR_MTIME_SET) { |
| attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec; |
| attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET; |
| } |
| } |
| if (iattr->ia_valid & ATTR_CTIME) |
| attrs->mask |= ORANGEFS_ATTR_SYS_CTIME; |
| |
| /* |
| * ORANGEFS cannot set size with a setattr operation. Probably not likely |
| * to be requested through the VFS, but just in case, don't worry about |
| * ATTR_SIZE |
| */ |
| |
| if (iattr->ia_valid & ATTR_MODE) { |
| tmp_mode = iattr->ia_mode; |
| if (tmp_mode & (S_ISVTX)) { |
| if (is_root_handle(inode)) { |
| /* |
| * allow sticky bit to be set on root (since |
| * it shows up that way by default anyhow), |
| * but don't show it to the server |
| */ |
| tmp_mode -= S_ISVTX; |
| } else { |
| gossip_debug(GOSSIP_UTILS_DEBUG, |
| "User attempted to set sticky bit on non-root directory; returning EINVAL.\n"); |
| return -EINVAL; |
| } |
| } |
| |
| if (tmp_mode & (S_ISUID)) { |
| gossip_debug(GOSSIP_UTILS_DEBUG, |
| "Attempting to set setuid bit (not supported); returning EINVAL.\n"); |
| return -EINVAL; |
| } |
| |
| attrs->perms = ORANGEFS_util_translate_mode(tmp_mode); |
| attrs->mask |= ORANGEFS_ATTR_SYS_PERM; |
| } |
| |
| return 0; |
| } |
| |
| static int orangefs_inode_type(enum orangefs_ds_type objtype) |
| { |
| if (objtype == ORANGEFS_TYPE_METAFILE) |
| return S_IFREG; |
| else if (objtype == ORANGEFS_TYPE_DIRECTORY) |
| return S_IFDIR; |
| else if (objtype == ORANGEFS_TYPE_SYMLINK) |
| return S_IFLNK; |
| else |
| return -1; |
| } |
| |
| static void orangefs_make_bad_inode(struct inode *inode) |
| { |
| if (is_root_handle(inode)) { |
| /* |
| * if this occurs, the pvfs2-client-core was killed but we |
| * can't afford to lose the inode operations and such |
| * associated with the root handle in any case. |
| */ |
| gossip_debug(GOSSIP_UTILS_DEBUG, |
| "*** NOT making bad root inode %pU\n", |
| get_khandle_from_ino(inode)); |
| } else { |
| gossip_debug(GOSSIP_UTILS_DEBUG, |
| "*** making bad inode %pU\n", |
| get_khandle_from_ino(inode)); |
| make_bad_inode(inode); |
| } |
| } |
| |
| static int orangefs_inode_is_stale(struct inode *inode, |
| struct ORANGEFS_sys_attr_s *attrs, char *link_target) |
| { |
| struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| int type = orangefs_inode_type(attrs->objtype); |
| /* |
| * If the inode type or symlink target have changed then this |
| * inode is stale. |
| */ |
| if (type == -1 || !(inode->i_mode & type)) { |
| orangefs_make_bad_inode(inode); |
| return 1; |
| } |
| if (type == S_IFLNK && strncmp(orangefs_inode->link_target, |
| link_target, ORANGEFS_NAME_MAX)) { |
| orangefs_make_bad_inode(inode); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int orangefs_inode_getattr(struct inode *inode, int new, int bypass, |
| u32 request_mask) |
| { |
| struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| struct orangefs_kernel_op_s *new_op; |
| loff_t inode_size, rounded_up_size; |
| int ret, type; |
| |
| gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, |
| get_khandle_from_ino(inode)); |
| |
| if (!new && !bypass) { |
| /* |
| * Must have all the attributes in the mask and be within cache |
| * time. |
| */ |
| if ((request_mask & orangefs_inode->getattr_mask) == |
| request_mask && |
| time_before(jiffies, orangefs_inode->getattr_time)) |
| return 0; |
| } |
| |
| new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); |
| if (!new_op) |
| return -ENOMEM; |
| new_op->upcall.req.getattr.refn = orangefs_inode->refn; |
| /* |
| * Size is the hardest attribute to get. The incremental cost of any |
| * other attribute is essentially zero. |
| */ |
| if (request_mask & STATX_SIZE || new) |
| new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT; |
| else |
| new_op->upcall.req.getattr.mask = |
| ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE; |
| |
| ret = service_operation(new_op, __func__, |
| get_interruptible_flag(inode)); |
| if (ret != 0) |
| goto out; |
| |
| if (!new) { |
| ret = orangefs_inode_is_stale(inode, |
| &new_op->downcall.resp.getattr.attributes, |
| new_op->downcall.resp.getattr.link_target); |
| if (ret) { |
| ret = -ESTALE; |
| goto out; |
| } |
| } |
| |
| type = orangefs_inode_type(new_op-> |
| downcall.resp.getattr.attributes.objtype); |
| switch (type) { |
| case S_IFREG: |
| inode->i_flags = orangefs_inode_flags(&new_op-> |
| downcall.resp.getattr.attributes); |
| if (request_mask & STATX_SIZE || new) { |
| inode_size = (loff_t)new_op-> |
| downcall.resp.getattr.attributes.size; |
| rounded_up_size = |
| (inode_size + (4096 - (inode_size % 4096))); |
| inode->i_size = inode_size; |
| orangefs_inode->blksize = |
| new_op->downcall.resp.getattr.attributes.blksize; |
| spin_lock(&inode->i_lock); |
| inode->i_bytes = inode_size; |
| inode->i_blocks = |
| (unsigned long)(rounded_up_size / 512); |
| spin_unlock(&inode->i_lock); |
| } |
| break; |
| case S_IFDIR: |
| if (request_mask & STATX_SIZE || new) { |
| inode->i_size = PAGE_SIZE; |
| orangefs_inode->blksize = i_blocksize(inode); |
| spin_lock(&inode->i_lock); |
| inode_set_bytes(inode, inode->i_size); |
| spin_unlock(&inode->i_lock); |
| } |
| set_nlink(inode, 1); |
| break; |
| case S_IFLNK: |
| if (new) { |
| inode->i_size = (loff_t)strlen(new_op-> |
| downcall.resp.getattr.link_target); |
| orangefs_inode->blksize = i_blocksize(inode); |
| ret = strscpy(orangefs_inode->link_target, |
| new_op->downcall.resp.getattr.link_target, |
| ORANGEFS_NAME_MAX); |
| if (ret == -E2BIG) { |
| ret = -EIO; |
| goto out; |
| } |
| inode->i_link = orangefs_inode->link_target; |
| } |
| break; |
| /* i.e. -1 */ |
| default: |
| /* XXX: ESTALE? This is what is done if it is not new. */ |
| orangefs_make_bad_inode(inode); |
| ret = -ESTALE; |
| goto out; |
| } |
| |
| inode->i_uid = make_kuid(&init_user_ns, new_op-> |
| downcall.resp.getattr.attributes.owner); |
| inode->i_gid = make_kgid(&init_user_ns, new_op-> |
| downcall.resp.getattr.attributes.group); |
| inode->i_atime.tv_sec = (time64_t)new_op-> |
| downcall.resp.getattr.attributes.atime; |
| inode->i_mtime.tv_sec = (time64_t)new_op-> |
| downcall.resp.getattr.attributes.mtime; |
| inode->i_ctime.tv_sec = (time64_t)new_op-> |
| downcall.resp.getattr.attributes.ctime; |
| inode->i_atime.tv_nsec = 0; |
| inode->i_mtime.tv_nsec = 0; |
| inode->i_ctime.tv_nsec = 0; |
| |
| /* special case: mark the root inode as sticky */ |
| inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) | |
| orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes); |
| |
| orangefs_inode->getattr_time = jiffies + |
| orangefs_getattr_timeout_msecs*HZ/1000; |
| if (request_mask & STATX_SIZE || new) |
| orangefs_inode->getattr_mask = STATX_BASIC_STATS; |
| else |
| orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE; |
| ret = 0; |
| out: |
| op_release(new_op); |
| return ret; |
| } |
| |
| int orangefs_inode_check_changed(struct inode *inode) |
| { |
| struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| struct orangefs_kernel_op_s *new_op; |
| int ret; |
| |
| gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__, |
| get_khandle_from_ino(inode)); |
| |
| new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR); |
| if (!new_op) |
| return -ENOMEM; |
| new_op->upcall.req.getattr.refn = orangefs_inode->refn; |
| new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE | |
| ORANGEFS_ATTR_SYS_LNK_TARGET; |
| |
| ret = service_operation(new_op, __func__, |
| get_interruptible_flag(inode)); |
| if (ret != 0) |
| goto out; |
| |
| ret = orangefs_inode_is_stale(inode, |
| &new_op->downcall.resp.getattr.attributes, |
| new_op->downcall.resp.getattr.link_target); |
| out: |
| op_release(new_op); |
| return ret; |
| } |
| |
| /* |
| * issues a orangefs setattr request to make sure the new attribute values |
| * take effect if successful. returns 0 on success; -errno otherwise |
| */ |
| int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr) |
| { |
| struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| struct orangefs_kernel_op_s *new_op; |
| int ret; |
| |
| new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR); |
| if (!new_op) |
| return -ENOMEM; |
| |
| new_op->upcall.req.setattr.refn = orangefs_inode->refn; |
| ret = copy_attributes_from_inode(inode, |
| &new_op->upcall.req.setattr.attributes, |
| iattr); |
| if (ret >= 0) { |
| ret = service_operation(new_op, __func__, |
| get_interruptible_flag(inode)); |
| |
| gossip_debug(GOSSIP_UTILS_DEBUG, |
| "orangefs_inode_setattr: returning %d\n", |
| ret); |
| } |
| |
| op_release(new_op); |
| |
| if (ret == 0) |
| orangefs_inode->getattr_time = jiffies - 1; |
| |
| return ret; |
| } |
| |
| /* |
| * The following is a very dirty hack that is now a permanent part of the |
| * ORANGEFS protocol. See protocol.h for more error definitions. |
| */ |
| |
| /* The order matches include/orangefs-types.h in the OrangeFS source. */ |
| static int PINT_errno_mapping[] = { |
| 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM, |
| EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE, |
| EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG, |
| ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH, |
| EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM, |
| EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE, |
| ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE, |
| EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS, |
| ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY, |
| EACCES, ECONNRESET, ERANGE |
| }; |
| |
| int orangefs_normalize_to_errno(__s32 error_code) |
| { |
| __u32 i; |
| |
| /* Success */ |
| if (error_code == 0) { |
| return 0; |
| /* |
| * This shouldn't ever happen. If it does it should be fixed on the |
| * server. |
| */ |
| } else if (error_code > 0) { |
| gossip_err("orangefs: error status received.\n"); |
| gossip_err("orangefs: assuming error code is inverted.\n"); |
| error_code = -error_code; |
| } |
| |
| /* |
| * XXX: This is very bad since error codes from ORANGEFS may not be |
| * suitable for return into userspace. |
| */ |
| |
| /* |
| * Convert ORANGEFS error values into errno values suitable for return |
| * from the kernel. |
| */ |
| if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) { |
| if (((-error_code) & |
| (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT| |
| ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) { |
| /* |
| * cancellation error codes generally correspond to |
| * a timeout from the client's perspective |
| */ |
| error_code = -ETIMEDOUT; |
| } else { |
| /* assume a default error code */ |
| gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code); |
| error_code = -EINVAL; |
| } |
| |
| /* Convert ORANGEFS encoded errno values into regular errno values. */ |
| } else if ((-error_code) & ORANGEFS_ERROR_BIT) { |
| i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS); |
| if (i < ARRAY_SIZE(PINT_errno_mapping)) |
| error_code = -PINT_errno_mapping[i]; |
| else |
| error_code = -EINVAL; |
| |
| /* |
| * Only ORANGEFS protocol error codes should ever come here. Otherwise |
| * there is a bug somewhere. |
| */ |
| } else { |
| gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n"); |
| error_code = -EINVAL; |
| } |
| return error_code; |
| } |
| |
| #define NUM_MODES 11 |
| __s32 ORANGEFS_util_translate_mode(int mode) |
| { |
| int ret = 0; |
| int i = 0; |
| static int modes[NUM_MODES] = { |
| S_IXOTH, S_IWOTH, S_IROTH, |
| S_IXGRP, S_IWGRP, S_IRGRP, |
| S_IXUSR, S_IWUSR, S_IRUSR, |
| S_ISGID, S_ISUID |
| }; |
| static int orangefs_modes[NUM_MODES] = { |
| ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ, |
| ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ, |
| ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ, |
| ORANGEFS_G_SGID, ORANGEFS_U_SUID |
| }; |
| |
| for (i = 0; i < NUM_MODES; i++) |
| if (mode & modes[i]) |
| ret |= orangefs_modes[i]; |
| |
| return ret; |
| } |
| #undef NUM_MODES |