btrfs: extended inode refs
This patch adds basic support for extended inode refs. This includes support
for link and unlink of the refs, which basically gets us support for rename
as well.
Inode creation does not need changing - extended refs are only added after
the ref array is full.
Signed-off-by: Mark Fasheh <mfasheh@suse.de>
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7084431b..dc96312 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1109,6 +1109,74 @@
found_key);
}
+int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
+ u64 start_off, struct btrfs_path *path,
+ struct btrfs_inode_extref **ret_extref,
+ u64 *found_off)
+{
+ int ret, slot;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+ unsigned long ptr;
+
+ key.objectid = inode_objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY);
+ key.offset = start_off;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ while (1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ /*
+ * If the item at offset is not found,
+ * btrfs_search_slot will point us to the slot
+ * where it should be inserted. In our case
+ * that will be the slot directly before the
+ * next INODE_REF_KEY_V2 item. In the case
+ * that we're pointing to the last slot in a
+ * leaf, we must move one leaf over.
+ */
+ ret = btrfs_next_leaf(root, path);
+ if (ret) {
+ if (ret >= 1)
+ ret = -ENOENT;
+ break;
+ }
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ /*
+ * Check that we're still looking at an extended ref key for
+ * this particular objectid. If we have different
+ * objectid or type then there are no more to be found
+ * in the tree and we can exit.
+ */
+ ret = -ENOENT;
+ if (found_key.objectid != inode_objectid)
+ break;
+ if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY)
+ break;
+
+ ret = 0;
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ extref = (struct btrfs_inode_extref *)ptr;
+ *ret_extref = extref;
+ if (found_off)
+ *found_off = found_key.offset;
+ break;
+ }
+
+ return ret;
+}
+
/*
* this iterates to turn a btrfs_inode_ref into a full filesystem path. elements
* of the path are separated by '/' and the path is guaranteed to be
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 4fda5d8..0b920c1 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -70,4 +70,9 @@
struct btrfs_path *path);
void free_ipath(struct inode_fs_paths *ipath);
+int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
+ u64 start_off, struct btrfs_path *path,
+ struct btrfs_inode_extref **ret_extref,
+ u64 *found_off);
+
#endif
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 399521a..50dcd0f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -154,6 +154,13 @@
*/
#define BTRFS_NAME_LEN 255
+/*
+ * Theoretical limit is larger, but we keep this down to a sane
+ * value. That should limit greatly the possibility of collisions on
+ * inode ref items.
+ */
+#define BTRFS_LINK_MAX 65535U
+
/* 32 bytes in various csum fields */
#define BTRFS_CSUM_SIZE 32
@@ -489,6 +496,8 @@
*/
#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
+#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
+
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
#define BTRFS_FEATURE_INCOMPAT_SUPP \
@@ -496,7 +505,8 @@
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
- BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
+ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -643,6 +653,14 @@
/* name goes here */
} __attribute__ ((__packed__));
+struct btrfs_inode_extref {
+ __le64 parent_objectid;
+ __le64 index;
+ __le16 name_len;
+ __u8 name[0];
+ /* name goes here */
+} __attribute__ ((__packed__));
+
struct btrfs_timespec {
__le64 sec;
__le32 nsec;
@@ -1601,6 +1619,7 @@
*/
#define BTRFS_INODE_ITEM_KEY 1
#define BTRFS_INODE_REF_KEY 12
+#define BTRFS_INODE_EXTREF_KEY 13
#define BTRFS_XATTR_ITEM_KEY 24
#define BTRFS_ORPHAN_ITEM_KEY 48
/* reserve 2-15 close to the inode for later flexibility */
@@ -1987,6 +2006,13 @@
BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
+/* struct btrfs_inode_extref */
+BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref,
+ parent_objectid, 64);
+BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref,
+ name_len, 16);
+BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64);
+
/* struct btrfs_inode_item */
BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64);
@@ -3184,12 +3210,12 @@
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 *index);
-struct btrfs_inode_ref *
-btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, int mod);
+int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, int mod,
+ u64 *ret_index);
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid);
@@ -3197,6 +3223,19 @@
*root, struct btrfs_path *path,
struct btrfs_key *location, int mod);
+struct btrfs_inode_extref *
+btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, int ins_len,
+ int cow);
+
+int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
+ u64 ref_objectid, const char *name,
+ int name_len,
+ struct btrfs_inode_extref **extref_ret);
+
/* file-item.c */
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len);
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index db2ff97..1d98281 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -24,4 +24,14 @@
{
return crc32c((u32)~1, name, len);
}
+
+/*
+ * Figure the key offset of an extended inode ref
+ */
+static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
+ int len)
+{
+ return (u64) crc32c(parent_objectid, name, len);
+}
+
#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index a13cf1a..48b8fda 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -18,6 +18,7 @@
#include "ctree.h"
#include "disk-io.h"
+#include "hash.h"
#include "transaction.h"
#include "print-tree.h"
@@ -50,18 +51,57 @@
return 0;
}
-struct btrfs_inode_ref *
-btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, int mod)
+int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
+ const char *name, int name_len,
+ struct btrfs_inode_extref **extref_ret)
{
+ struct extent_buffer *leaf;
+ struct btrfs_inode_extref *extref;
+ unsigned long ptr;
+ unsigned long name_ptr;
+ u32 item_size;
+ u32 cur_offset = 0;
+ int ref_name_len;
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+
+ /*
+ * Search all extended backrefs in this item. We're only
+ * looking through any collisions so most of the time this is
+ * just going to compare against one buffer. If all is well,
+ * we'll return success and the inode ref object.
+ */
+ while (cur_offset < item_size) {
+ extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
+ name_ptr = (unsigned long)(&extref->name);
+ ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
+
+ if (ref_name_len == name_len &&
+ btrfs_inode_extref_parent(leaf, extref) == ref_objectid &&
+ (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)) {
+ if (extref_ret)
+ *extref_ret = extref;
+ return 1;
+ }
+
+ cur_offset += ref_name_len + sizeof(*extref);
+ }
+ return 0;
+}
+
+static struct btrfs_inode_ref *
+btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, int ins_len,
+ int cow)
+{
+ int ret;
struct btrfs_key key;
struct btrfs_inode_ref *ref;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
- int ret;
key.objectid = inode_objectid;
key.type = BTRFS_INODE_REF_KEY;
@@ -77,13 +117,150 @@
return ref;
}
-int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+/* Returns NULL if no extref found */
+struct btrfs_inode_extref *
+btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, int ins_len,
+ int cow)
+{
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_inode_extref *extref;
+
+ key.objectid = inode_objectid;
+ key.type = BTRFS_INODE_EXTREF_KEY;
+ key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
+
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ return NULL;
+ if (!btrfs_find_name_in_ext_backref(path, ref_objectid, name, name_len, &extref))
+ return NULL;
+ return extref;
+}
+
+int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, int mod,
+ u64 *ret_index)
+{
+ struct btrfs_inode_ref *ref;
+ struct btrfs_inode_extref *extref;
+ int ins_len = mod < 0 ? -1 : 0;
+ int cow = mod != 0;
+
+ ref = btrfs_lookup_inode_ref(trans, root, path, name, name_len,
+ inode_objectid, ref_objectid, ins_len,
+ cow);
+ if (IS_ERR(ref))
+ return PTR_ERR(ref);
+
+ if (ref != NULL) {
+ *ret_index = btrfs_inode_ref_index(path->nodes[0], ref);
+ return 0;
+ }
+
+ btrfs_release_path(path);
+
+ extref = btrfs_lookup_inode_extref(trans, root, path, name,
+ name_len, inode_objectid,
+ ref_objectid, ins_len, cow);
+ if (IS_ERR(extref))
+ return PTR_ERR(extref);
+
+ if (extref) {
+ *ret_index = btrfs_inode_extref_index(path->nodes[0], extref);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
u64 inode_objectid, u64 ref_objectid, u64 *index)
{
struct btrfs_path *path;
struct btrfs_key key;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+ int ret;
+ int del_len = name_len + sizeof(*extref);
+ unsigned long ptr;
+ unsigned long item_start;
+ u32 item_size;
+
+ key.objectid = inode_objectid;
+ btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY);
+ key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->leave_spinning = 1;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Sanity check - did we find the right item for this name?
+ * This should always succeed so error here will make the FS
+ * readonly.
+ */
+ if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
+ name, name_len, &extref)) {
+ btrfs_std_error(root->fs_info, -ENOENT);
+ ret = -EROFS;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (index)
+ *index = btrfs_inode_extref_index(leaf, extref);
+
+ if (del_len == item_size) {
+ /*
+ * Common case only one ref in the item, remove the
+ * whole item.
+ */
+ ret = btrfs_del_item(trans, root, path);
+ goto out;
+ }
+
+ ptr = (unsigned long)extref;
+ item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
+
+ memmove_extent_buffer(leaf, ptr, ptr + del_len,
+ item_size - (ptr + del_len - item_start));
+
+ btrfs_truncate_item(trans, root, path, item_size - del_len, 1);
+
+out:
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, u64 *index)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
struct btrfs_inode_ref *ref;
struct extent_buffer *leaf;
unsigned long ptr;
@@ -91,6 +268,7 @@
u32 item_size;
u32 sub_item_len;
int ret;
+ int search_ext_refs = 0;
int del_len = name_len + sizeof(*ref);
key.objectid = inode_objectid;
@@ -106,12 +284,14 @@
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
ret = -ENOENT;
+ search_ext_refs = 1;
goto out;
} else if (ret < 0) {
goto out;
}
if (!find_name_in_backref(path, name, name_len, &ref)) {
ret = -ENOENT;
+ search_ext_refs = 1;
goto out;
}
leaf = path->nodes[0];
@@ -129,8 +309,78 @@
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_size - (ptr + sub_item_len - item_start));
- btrfs_truncate_item(trans, root, path,
- item_size - sub_item_len, 1);
+ btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1);
+out:
+ btrfs_free_path(path);
+
+ if (search_ext_refs) {
+ /*
+ * No refs were found, or we could not find the
+ * name in our ref array. Find and remove the extended
+ * inode ref then.
+ */
+ return btrfs_del_inode_extref(trans, root, name, name_len,
+ inode_objectid, ref_objectid, index);
+ }
+
+ return ret;
+}
+
+/*
+ * btrfs_insert_inode_extref() - Inserts an extended inode ref into a tree.
+ *
+ * The caller must have checked against BTRFS_LINK_MAX already.
+ */
+static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid, u64 index)
+{
+ struct btrfs_inode_extref *extref;
+ int ret;
+ int ins_len = name_len + sizeof(*extref);
+ unsigned long ptr;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+
+ key.objectid = inode_objectid;
+ key.type = BTRFS_INODE_EXTREF_KEY;
+ key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ path->leave_spinning = 1;
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ ins_len);
+ if (ret == -EEXIST) {
+ if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+ name, name_len, NULL))
+ goto out;
+
+ btrfs_extend_item(trans, root, path, ins_len);
+ ret = 0;
+ }
+ if (ret < 0)
+ goto out;
+
+ leaf = path->nodes[0];
+ item = btrfs_item_nr(leaf, path->slots[0]);
+ ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
+ ptr += btrfs_item_size(leaf, item) - ins_len;
+ extref = (struct btrfs_inode_extref *)ptr;
+
+ btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len);
+ btrfs_set_inode_extref_index(path->nodes[0], extref, index);
+ btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid);
+
+ ptr = (unsigned long)&extref->name;
+ write_extent_buffer(path->nodes[0], name, ptr, name_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
out:
btrfs_free_path(path);
return ret;
@@ -191,6 +441,19 @@
out:
btrfs_free_path(path);
+
+ if (ret == -EMLINK) {
+ struct btrfs_super_block *disk_super = root->fs_info->super_copy;
+ /* We ran out of space in the ref array. Need to
+ * add an extended ref. */
+ if (btrfs_super_incompat_flags(disk_super)
+ & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+ ret = btrfs_insert_inode_extref(trans, root, name,
+ name_len,
+ inode_objectid,
+ ref_objectid, index);
+ }
+
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1c50f7c..596305e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2903,7 +2903,6 @@
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_path *path;
- struct btrfs_inode_ref *ref;
struct btrfs_dir_item *di;
struct inode *inode = dentry->d_inode;
u64 index;
@@ -3017,17 +3016,17 @@
}
btrfs_release_path(path);
- ref = btrfs_lookup_inode_ref(trans, root, path,
- dentry->d_name.name, dentry->d_name.len,
- ino, dir_ino, 0);
- if (IS_ERR(ref)) {
- err = PTR_ERR(ref);
+ ret = btrfs_get_inode_ref_index(trans, root, path, dentry->d_name.name,
+ dentry->d_name.len, ino, dir_ino, 0,
+ &index);
+ if (ret) {
+ err = ret;
goto out;
}
- BUG_ON(!ref); /* Logic error */
+
if (check_path_shared(root, path))
goto out;
- index = btrfs_inode_ref_index(path->nodes[0], ref);
+
btrfs_release_path(path);
/*
@@ -4743,6 +4742,12 @@
btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
key[0].offset = 0;
+ /*
+ * Start new inodes with an inode_ref. This is slightly more
+ * efficient for small numbers of hard links since they will
+ * be packed into one item. Extended refs will kick in if we
+ * add more hard links than can fit in the ref item.
+ */
key[1].objectid = objectid;
btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
key[1].offset = ref_objectid;
@@ -5049,7 +5054,7 @@
if (root->objectid != BTRFS_I(inode)->root->objectid)
return -EXDEV;
- if (inode->i_nlink == ~0U)
+ if (inode->i_nlink >= BTRFS_LINK_MAX)
return -EMLINK;
err = btrfs_set_inode_index(dir, &index);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 15dae58..1d7b348 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -24,8 +24,10 @@
#include "disk-io.h"
#include "locking.h"
#include "print-tree.h"
+#include "backref.h"
#include "compat.h"
#include "tree-log.h"
+#include "hash.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
@@ -743,6 +745,7 @@
*/
static noinline int backref_in_log(struct btrfs_root *log,
struct btrfs_key *key,
+ u64 ref_objectid,
char *name, int namelen)
{
struct btrfs_path *path;
@@ -763,8 +766,17 @@
if (ret != 0)
goto out;
- item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
+
+ if (key->type == BTRFS_INODE_EXTREF_KEY) {
+ if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+ name, namelen, NULL))
+ match = 1;
+
+ goto out;
+ }
+
+ item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
ref = (struct btrfs_inode_ref *)ptr;
@@ -790,27 +802,36 @@
struct btrfs_path *path,
struct btrfs_root *log_root,
struct inode *dir, struct inode *inode,
- struct btrfs_key *key,
struct extent_buffer *eb,
- struct btrfs_inode_ref *ref,
- char *name, int namelen, int *search_done)
+ u64 inode_objectid, u64 parent_objectid,
+ u64 ref_index, char *name, int namelen,
+ int *search_done)
{
int ret;
+ char *victim_name;
+ int victim_name_len;
+ struct extent_buffer *leaf;
struct btrfs_dir_item *di;
+ struct btrfs_key search_key;
+ struct btrfs_inode_extref *extref;
- ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+again:
+ /* Search old style refs */
+ search_key.objectid = inode_objectid;
+ search_key.type = BTRFS_INODE_REF_KEY;
+ search_key.offset = parent_objectid;
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret == 0) {
- char *victim_name;
- int victim_name_len;
struct btrfs_inode_ref *victim_ref;
unsigned long ptr;
unsigned long ptr_end;
- struct extent_buffer *leaf = path->nodes[0];
+
+ leaf = path->nodes[0];
/* are we trying to overwrite a back ref for the root directory
* if so, just jump out, we're done
*/
- if (key->objectid == key->offset)
+ if (search_key.objectid == search_key.offset)
return 1;
/* check all the names in this back reference to see
@@ -830,7 +851,9 @@
(unsigned long)(victim_ref + 1),
victim_name_len);
- if (!backref_in_log(log_root, key, victim_name,
+ if (!backref_in_log(log_root, &search_key,
+ parent_objectid,
+ victim_name,
victim_name_len)) {
btrfs_inc_nlink(inode);
btrfs_release_path(path);
@@ -838,9 +861,14 @@
ret = btrfs_unlink_inode(trans, root, dir,
inode, victim_name,
victim_name_len);
+ BUG_ON(ret);
btrfs_run_delayed_items(trans, root);
+ kfree(victim_name);
+ *search_done = 1;
+ goto again;
}
kfree(victim_name);
+
ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
}
BUG_ON(ret);
@@ -853,10 +881,74 @@
}
btrfs_release_path(path);
+ /* Same search but for extended refs */
+ extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
+ inode_objectid, parent_objectid, 0,
+ 0);
+ if (!IS_ERR_OR_NULL(extref)) {
+ u32 item_size;
+ u32 cur_offset = 0;
+ unsigned long base;
+ struct inode *victim_parent;
+
+ leaf = path->nodes[0];
+
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ base = btrfs_item_ptr_offset(leaf, path->slots[0]);
+
+ while (cur_offset < item_size) {
+ extref = (struct btrfs_inode_extref *)base + cur_offset;
+
+ victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
+
+ if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
+ goto next;
+
+ victim_name = kmalloc(victim_name_len, GFP_NOFS);
+ read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
+ victim_name_len);
+
+ search_key.objectid = inode_objectid;
+ search_key.type = BTRFS_INODE_EXTREF_KEY;
+ search_key.offset = btrfs_extref_hash(parent_objectid,
+ victim_name,
+ victim_name_len);
+ ret = 0;
+ if (!backref_in_log(log_root, &search_key,
+ parent_objectid, victim_name,
+ victim_name_len)) {
+ ret = -ENOENT;
+ victim_parent = read_one_inode(root,
+ parent_objectid);
+ if (victim_parent) {
+ btrfs_inc_nlink(inode);
+ btrfs_release_path(path);
+
+ ret = btrfs_unlink_inode(trans, root,
+ victim_parent,
+ inode,
+ victim_name,
+ victim_name_len);
+ btrfs_run_delayed_items(trans, root);
+ }
+ BUG_ON(ret);
+ iput(victim_parent);
+ kfree(victim_name);
+ *search_done = 1;
+ goto again;
+ }
+ kfree(victim_name);
+ BUG_ON(ret);
+next:
+ cur_offset += victim_name_len + sizeof(*extref);
+ }
+ *search_done = 1;
+ }
+ btrfs_release_path(path);
+
/* look for a conflicting sequence number */
di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
- btrfs_inode_ref_index(eb, ref),
- name, namelen, 0);
+ ref_index, name, namelen, 0);
if (di && !IS_ERR(di)) {
ret = drop_one_dir_item(trans, root, path, dir, di);
BUG_ON(ret);
@@ -875,6 +967,48 @@
return 0;
}
+static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+ u32 *namelen, char **name, u64 *index,
+ u64 *parent_objectid)
+{
+ struct btrfs_inode_extref *extref;
+
+ extref = (struct btrfs_inode_extref *)ref_ptr;
+
+ *namelen = btrfs_inode_extref_name_len(eb, extref);
+ *name = kmalloc(*namelen, GFP_NOFS);
+ if (*name == NULL)
+ return -ENOMEM;
+
+ read_extent_buffer(eb, *name, (unsigned long)&extref->name,
+ *namelen);
+
+ *index = btrfs_inode_extref_index(eb, extref);
+ if (parent_objectid)
+ *parent_objectid = btrfs_inode_extref_parent(eb, extref);
+
+ return 0;
+}
+
+static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+ u32 *namelen, char **name, u64 *index)
+{
+ struct btrfs_inode_ref *ref;
+
+ ref = (struct btrfs_inode_ref *)ref_ptr;
+
+ *namelen = btrfs_inode_ref_name_len(eb, ref);
+ *name = kmalloc(*namelen, GFP_NOFS);
+ if (*name == NULL)
+ return -ENOMEM;
+
+ read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
+
+ *index = btrfs_inode_ref_index(eb, ref);
+
+ return 0;
+}
+
/*
* replay one inode back reference item found in the log tree.
* eb, slot and key refer to the buffer and key found in the log tree.
@@ -888,7 +1022,6 @@
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- struct btrfs_inode_ref *ref;
struct inode *dir;
struct inode *inode;
unsigned long ref_ptr;
@@ -897,6 +1030,27 @@
int namelen;
int ret;
int search_done = 0;
+ int log_ref_ver = 0;
+ u64 parent_objectid;
+ u64 inode_objectid;
+ u64 ref_index;
+ int ref_struct_size;
+
+ ref_ptr = btrfs_item_ptr_offset(eb, slot);
+ ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
+
+ if (key->type == BTRFS_INODE_EXTREF_KEY) {
+ struct btrfs_inode_extref *r;
+
+ ref_struct_size = sizeof(struct btrfs_inode_extref);
+ log_ref_ver = 1;
+ r = (struct btrfs_inode_extref *)ref_ptr;
+ parent_objectid = btrfs_inode_extref_parent(eb, r);
+ } else {
+ ref_struct_size = sizeof(struct btrfs_inode_ref);
+ parent_objectid = key->offset;
+ }
+ inode_objectid = key->objectid;
/*
* it is possible that we didn't log all the parent directories
@@ -904,32 +1058,38 @@
* copy the back ref in. The link count fixup code will take
* care of the rest
*/
- dir = read_one_inode(root, key->offset);
+ dir = read_one_inode(root, parent_objectid);
if (!dir)
return -ENOENT;
- inode = read_one_inode(root, key->objectid);
+ inode = read_one_inode(root, inode_objectid);
if (!inode) {
iput(dir);
return -EIO;
}
- ref_ptr = btrfs_item_ptr_offset(eb, slot);
- ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
-
while (ref_ptr < ref_end) {
- ref = (struct btrfs_inode_ref *)ref_ptr;
-
- namelen = btrfs_inode_ref_name_len(eb, ref);
- name = kmalloc(namelen, GFP_NOFS);
- BUG_ON(!name);
-
- read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
+ if (log_ref_ver) {
+ ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
+ &ref_index, &parent_objectid);
+ /*
+ * parent object can change from one array
+ * item to another.
+ */
+ if (!dir)
+ dir = read_one_inode(root, parent_objectid);
+ if (!dir)
+ return -ENOENT;
+ } else {
+ ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
+ &ref_index);
+ }
+ if (ret)
+ return ret;
/* if we already have a perfect match, we're done */
if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
- btrfs_inode_ref_index(eb, ref),
- name, namelen)) {
+ ref_index, name, namelen)) {
/*
* look for a conflicting back reference in the
* metadata. if we find one we have to unlink that name
@@ -940,8 +1100,10 @@
if (!search_done) {
ret = __add_inode_ref(trans, root, path, log,
- dir, inode, key, eb, ref,
- name, namelen,
+ dir, inode, eb,
+ inode_objectid,
+ parent_objectid,
+ ref_index, name, namelen,
&search_done);
if (ret == 1)
goto out;
@@ -950,14 +1112,18 @@
/* insert our name */
ret = btrfs_add_link(trans, dir, inode, name, namelen,
- 0, btrfs_inode_ref_index(eb, ref));
+ 0, ref_index);
BUG_ON(ret);
btrfs_update_inode(trans, root, inode);
}
- ref_ptr = (unsigned long)(ref + 1) + namelen;
+ ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
kfree(name);
+ if (log_ref_ver) {
+ iput(dir);
+ dir = NULL;
+ }
}
/* finally write the back reference in the inode */
@@ -981,25 +1147,55 @@
return ret;
}
-
-/*
- * There are a few corners where the link count of the file can't
- * be properly maintained during replay. So, instead of adding
- * lots of complexity to the log code, we just scan the backrefs
- * for any file that has been through replay.
- *
- * The scan will update the link count on the inode to reflect the
- * number of back refs found. If it goes down to zero, the iput
- * will free the inode.
- */
-static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode)
+static int count_inode_extrefs(struct btrfs_root *root,
+ struct inode *inode, struct btrfs_path *path)
{
- struct btrfs_path *path;
+ int ret = 0;
+ int name_len;
+ unsigned int nlink = 0;
+ u32 item_size;
+ u32 cur_offset = 0;
+ u64 inode_objectid = btrfs_ino(inode);
+ u64 offset = 0;
+ unsigned long ptr;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+
+ while (1) {
+ ret = btrfs_find_one_extref(root, inode_objectid, offset, path,
+ &extref, &offset);
+ if (ret)
+ break;
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+
+ while (cur_offset < item_size) {
+ extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
+ name_len = btrfs_inode_extref_name_len(leaf, extref);
+
+ nlink++;
+
+ cur_offset += name_len + sizeof(*extref);
+ }
+
+ offset++;
+ btrfs_release_path(path);
+ }
+ btrfs_release_path(path);
+
+ if (ret < 0)
+ return ret;
+ return nlink;
+}
+
+static int count_inode_refs(struct btrfs_root *root,
+ struct inode *inode, struct btrfs_path *path)
+{
int ret;
struct btrfs_key key;
- u64 nlink = 0;
+ unsigned int nlink = 0;
unsigned long ptr;
unsigned long ptr_end;
int name_len;
@@ -1009,10 +1205,6 @@
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
@@ -1046,6 +1238,50 @@
btrfs_release_path(path);
}
btrfs_release_path(path);
+
+ return nlink;
+}
+
+/*
+ * There are a few corners where the link count of the file can't
+ * be properly maintained during replay. So, instead of adding
+ * lots of complexity to the log code, we just scan the backrefs
+ * for any file that has been through replay.
+ *
+ * The scan will update the link count on the inode to reflect the
+ * number of back refs found. If it goes down to zero, the iput
+ * will free the inode.
+ */
+static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode)
+{
+ struct btrfs_path *path;
+ int ret;
+ u64 nlink = 0;
+ u64 ino = btrfs_ino(inode);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = count_inode_refs(root, inode, path);
+ if (ret < 0)
+ goto out;
+
+ nlink = ret;
+
+ ret = count_inode_extrefs(root, inode, path);
+ if (ret == -ENOENT)
+ ret = 0;
+
+ if (ret < 0)
+ goto out;
+
+ nlink += ret;
+
+ ret = 0;
+
if (nlink != inode->i_nlink) {
set_nlink(inode, nlink);
btrfs_update_inode(trans, root, inode);
@@ -1061,9 +1297,10 @@
ret = insert_orphan_item(trans, root, ino);
BUG_ON(ret);
}
- btrfs_free_path(path);
- return 0;
+out:
+ btrfs_free_path(path);
+ return ret;
}
static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
@@ -1710,6 +1947,10 @@
ret = add_inode_ref(wc->trans, root, log, path,
eb, i, &key);
BUG_ON(ret && ret != -ENOENT);
+ } else if (key.type == BTRFS_INODE_EXTREF_KEY) {
+ ret = add_inode_ref(wc->trans, root, log, path,
+ eb, i, &key);
+ BUG_ON(ret && ret != -ENOENT);
} else if (key.type == BTRFS_EXTENT_DATA_KEY) {
ret = replay_one_extent(wc->trans, root, path,
eb, i, &key);