ocfs2: Handle errors while setting external xattr values.
ocfs2 can store extended attribute values as large as a single file. It
does this using a standard ocfs2 btree for the large value. However,
the previous code did not handle all error cases cleanly.
There are multiple problems to have.
1) We have trouble allocating space for a new xattr. This leaves us
with an empty xattr.
2) We overwrote an existing local xattr with a value root, and now we
have an error allocating the storage. This leaves us an empty xattr.
where there used to be a value. The value is lost.
3) We have trouble truncating a reused value. This leaves us with the
original entry pointing to the truncated original value. The value
is lost.
4) We have trouble extending the storage on a reused value. This leaves
us with the original value safely in place, but with more storage
allocated when needed.
This doesn't consider storing local xattrs (values that don't require a
btree). Those only fail when the journal fails.
Case (1) is easy. We just remove the xattr we added. We leak the
storage because we can't safely remove it, but otherwise everything is
happy. We'll print a warning about the leak.
Case (4) is easy. We still have the original value in place. We can
just leave the extra storage attached to this xattr. We return the
error, but the old value is untouched. We print a warning about the
storage.
Case (2) and (3) are hard because we've lost the original values. In
the old code, we ended up with values that could be partially read.
That's not good. Instead, we just wipe the xattr entry and leak the
storage. It stinks that the original value is lost, but now there isn't
a partial value to be read. We'll print a big fat warning.
Signed-off-by: Joel Becker <joel.becker@oracle.com>
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index a2d912a..d1b0d38 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1869,6 +1869,17 @@
.xlo_fill_value_buf = ocfs2_xa_bucket_fill_value_buf,
};
+static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
+{
+ struct ocfs2_xattr_value_buf vb;
+
+ if (ocfs2_xattr_is_local(loc->xl_entry))
+ return 0;
+
+ ocfs2_xa_fill_value_buf(loc, &vb);
+ return le32_to_cpu(vb.vb_xv->xr_clusters);
+}
+
static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
struct ocfs2_xattr_set_ctxt *ctxt)
{
@@ -1923,16 +1934,85 @@
}
}
+/*
+ * If we have a problem adjusting the size of an external value during
+ * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
+ * in an intermediate state. For example, the value may be partially
+ * truncated.
+ *
+ * If the value tree hasn't changed, the extend/truncate went nowhere.
+ * We have nothing to do. The caller can treat it as a straight error.
+ *
+ * If the value tree got partially truncated, we now have a corrupted
+ * extended attribute. We're going to wipe its entry and leak the
+ * clusters. Better to leak some storage than leave a corrupt entry.
+ *
+ * If the value tree grew, it obviously didn't grow enough for the
+ * new entry. We're not going to try and reclaim those clusters either.
+ * If there was already an external value there (orig_clusters != 0),
+ * the new clusters are attached safely and we can just leave the old
+ * value in place. If there was no external value there, we remove
+ * the entry.
+ *
+ * This way, the xattr block we store in the journal will be consistent.
+ * If the size change broke because of the journal, no changes will hit
+ * disk anyway.
+ */
+static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
+ const char *what,
+ unsigned int orig_clusters)
+{
+ unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
+ char *nameval_buf = ocfs2_xa_offset_pointer(loc,
+ le16_to_cpu(loc->xl_entry->xe_name_offset));
+
+ if (new_clusters < orig_clusters) {
+ mlog(ML_ERROR,
+ "Partial truncate while %s xattr %.*s. Leaking "
+ "%u clusters and removing the entry\n",
+ what, loc->xl_entry->xe_name_len, nameval_buf,
+ orig_clusters - new_clusters);
+ ocfs2_xa_remove_entry(loc);
+ } else if (!orig_clusters) {
+ mlog(ML_ERROR,
+ "Unable to allocate an external value for xattr "
+ "%.*s safely. Leaking %u clusters and removing the "
+ "entry\n",
+ loc->xl_entry->xe_name_len, nameval_buf,
+ new_clusters - orig_clusters);
+ ocfs2_xa_remove_entry(loc);
+ } else if (new_clusters > orig_clusters)
+ mlog(ML_ERROR,
+ "Unable to grow xattr %.*s safely. %u new clusters "
+ "have been added, but the value will not be "
+ "modified\n",
+ loc->xl_entry->xe_name_len, nameval_buf,
+ new_clusters - orig_clusters);
+}
+
static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
struct ocfs2_xattr_set_ctxt *ctxt)
{
int rc = 0;
+ unsigned int orig_clusters;
if (!ocfs2_xattr_is_local(loc->xl_entry)) {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
if (rc) {
mlog_errno(rc);
- goto out;
+ /*
+ * Since this is remove, we can return 0 if
+ * ocfs2_xa_cleanup_value_truncate() is going to
+ * wipe the entry anyway. So we check the
+ * cluster count as well.
+ */
+ if (orig_clusters != ocfs2_xa_value_clusters(loc))
+ rc = 0;
+ ocfs2_xa_cleanup_value_truncate(loc, "removing",
+ orig_clusters);
+ if (rc)
+ goto out;
}
}
@@ -1963,6 +2043,7 @@
{
int rc = 0;
int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
+ unsigned int orig_clusters;
char *nameval_buf;
int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
@@ -1978,23 +2059,27 @@
if (!xi_local)
ocfs2_xa_install_value_root(loc);
} else {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
if (xi_local) {
rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
- if (rc < 0) {
+ if (rc < 0)
mlog_errno(rc);
- goto out;
- }
- memset(nameval_buf + name_size, 0,
- namevalue_size_xe(loc->xl_entry) -
- name_size);
+ else
+ memset(nameval_buf + name_size, 0,
+ namevalue_size_xe(loc->xl_entry) -
+ name_size);
} else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
xi->xi_value_len) {
rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
ctxt);
- if (rc < 0) {
+ if (rc < 0)
mlog_errno(rc);
- goto out;
- }
+ }
+
+ if (rc) {
+ ocfs2_xa_cleanup_value_truncate(loc, "reusing",
+ orig_clusters);
+ goto out;
}
}
@@ -2019,6 +2104,8 @@
struct ocfs2_xattr_set_ctxt *ctxt)
{
int rc = 0;
+ unsigned int orig_clusters;
+ __le64 orig_value_size = 0;
rc = ocfs2_xa_check_space(loc, xi);
if (rc)
@@ -2026,6 +2113,7 @@
if (loc->xl_entry) {
if (ocfs2_xa_can_reuse_entry(loc, xi)) {
+ orig_value_size = loc->xl_entry->xe_value_size;
rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
if (rc)
goto out;
@@ -2033,9 +2121,13 @@
}
if (!ocfs2_xattr_is_local(loc->xl_entry)) {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
if (rc) {
mlog_errno(rc);
+ ocfs2_xa_cleanup_value_truncate(loc,
+ "overwriting",
+ orig_clusters);
goto out;
}
}
@@ -2053,9 +2145,20 @@
alloc_value:
if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
+ orig_clusters = ocfs2_xa_value_clusters(loc);
rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
- if (rc < 0)
+ if (rc < 0) {
+ /*
+ * If we tried to grow an existing external value,
+ * ocfs2_xa_cleanuP-value_truncate() is going to
+ * let it stand. We have to restore its original
+ * value size.
+ */
+ loc->xl_entry->xe_value_size = orig_value_size;
+ ocfs2_xa_cleanup_value_truncate(loc, "growing",
+ orig_clusters);
mlog_errno(rc);
+ }
}
out:
@@ -2105,25 +2208,30 @@
goto out;
}
+ /*
+ * From here on out, everything is going to modify the buffer a
+ * little. Errors are going to leave the xattr header in a
+ * sane state. Thus, even with errors we dirty the sucker.
+ */
+
/* Don't worry, we are never called with !xi_value and !xl_entry */
if (!xi->xi_value) {
ret = ocfs2_xa_remove(loc, ctxt);
- goto out;
+ goto out_dirty;
}
ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
if (ret) {
if (ret != -ENOSPC)
mlog_errno(ret);
- goto out;
+ goto out_dirty;
}
ret = ocfs2_xa_store_value(loc, xi, ctxt);
- if (ret) {
+ if (ret)
mlog_errno(ret);
- goto out;
- }
+out_dirty:
ocfs2_xa_journal_dirty(ctxt->handle, loc);
out: