NTFS: - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking
helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
This allows us to map runlist fragments with the runlist lock already
held without having to drop and reacquire it around the call. Adapt
all callers.
- Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
runlist. This allows us to find runlist elements with the runlist
lock already held without having to drop and reacquire it around the
call. Adapt all callers.
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index b40c334..9d42393 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -63,6 +63,15 @@
- Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in
the creation of the unmapped runlist element for the base attribute
extent.
+ - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking
+ helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist().
+ This allows us to map runlist fragments with the runlist lock already
+ held without having to drop and reacquire it around the call. Adapt
+ all callers.
+ - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked
+ runlist. This allows us to find runlist elements with the runlist
+ lock already held without having to drop and reacquire it around the
+ call. Adapt all callers.
2.1.22 - Many bug and race fixes and error handling improvements.
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 812d53e..2b4b8b9 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -2,7 +2,7 @@
* aops.c - NTFS kernel address space operations and page cache handling.
* Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -135,7 +135,7 @@
i * rec_size), rec_size);
flush_dcache_page(page);
kunmap_atomic(addr, KM_BIO_SRC_IRQ);
- if (likely(!PageError(page) && page_uptodate))
+ if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);
}
unlock_page(page);
@@ -347,11 +347,11 @@
*/
static int ntfs_readpage(struct file *file, struct page *page)
{
- loff_t i_size;
ntfs_inode *ni, *base_ni;
u8 *kaddr;
ntfs_attr_search_ctx *ctx;
MFT_RECORD *mrec;
+ unsigned long flags;
u32 attr_len;
int err = 0;
@@ -389,9 +389,9 @@
* Attribute is resident, implying it is not compressed or encrypted.
* This also means the attribute is smaller than an mft record and
* hence smaller than a page, so can simply zero out any pages with
- * index above 0. We can also do this if the file size is 0.
+ * index above 0.
*/
- if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) {
+ if (unlikely(page->index > 0)) {
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr, 0, PAGE_CACHE_SIZE);
flush_dcache_page(page);
@@ -418,9 +418,10 @@
if (unlikely(err))
goto put_unm_err_out;
attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
- i_size = i_size_read(VFS_I(ni));
- if (unlikely(attr_len > i_size))
- attr_len = i_size;
+ read_lock_irqsave(&ni->size_lock, flags);
+ if (unlikely(attr_len > ni->initialized_size))
+ attr_len = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
kaddr = kmap_atomic(page, KM_USER0);
/* Copy the data to the page. */
memcpy(kaddr, (u8*)ctx->attr +
@@ -1247,20 +1248,6 @@
int err;
BUG_ON(!PageLocked(page));
- /*
- * If a previous ntfs_truncate() failed, repeat it and abort if it
- * fails again.
- */
- if (unlikely(NInoTruncateFailed(ni))) {
- down_write(&vi->i_alloc_sem);
- err = ntfs_truncate(vi);
- up_write(&vi->i_alloc_sem);
- if (err || NInoTruncateFailed(ni)) {
- if (!err)
- err = -EIO;
- goto err_out;
- }
- }
i_size = i_size_read(vi);
/* Is the page fully outside i_size? (truncate in progress) */
if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
@@ -1490,13 +1477,12 @@
read_lock_irqsave(&ni->size_lock, flags);
/*
- * The first out of bounds block for the allocated size. No need to
+ * The first out of bounds block for the allocated size. No need to
* round up as allocated_size is in multiples of cluster size and the
* minimum cluster size is 512 bytes, which is equal to the smallest
* blocksize.
*/
ablock = ni->allocated_size >> blocksize_bits;
-
i_size = i_size_read(vi);
initialized_size = ni->initialized_size;
read_unlock_irqrestore(&ni->size_lock, flags);
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 7d66846..7a16f7c 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
/**
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -30,7 +30,7 @@
#include "types.h"
/**
- * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
+ * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
* @ni: ntfs inode for which to map (part of) a runlist
* @vcn: map runlist part containing this vcn
*
@@ -38,24 +38,23 @@
*
* Return 0 on success and -errno on error.
*
- * Locking: - The runlist must be unlocked on entry and is unlocked on return.
- * - This function takes the lock for writing and modifies the runlist.
+ * Locking: - The runlist must be locked for writing.
+ * - This function modifies the runlist.
*/
-int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
+int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
{
ntfs_inode *base_ni;
- ntfs_attr_search_ctx *ctx;
MFT_RECORD *mrec;
+ ntfs_attr_search_ctx *ctx;
+ runlist_element *rl;
int err = 0;
ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
(unsigned long long)vcn);
-
if (!NInoAttr(ni))
base_ni = ni;
else
base_ni = ni->ext.base_ntfs_ino;
-
mrec = map_mft_record(base_ni);
if (IS_ERR(mrec))
return PTR_ERR(mrec);
@@ -66,15 +65,7 @@
}
err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
CASE_SENSITIVE, vcn, NULL, 0, ctx);
- if (unlikely(err))
- goto put_err_out;
-
- down_write(&ni->runlist.lock);
- /* Make sure someone else didn't do the work while we were sleeping. */
- if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
- LCN_RL_NOT_MAPPED)) {
- runlist_element *rl;
-
+ if (likely(!err)) {
rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr,
ni->runlist.rl);
if (IS_ERR(rl))
@@ -82,9 +73,6 @@
else
ni->runlist.rl = rl;
}
- up_write(&ni->runlist.lock);
-
-put_err_out:
ntfs_attr_put_search_ctx(ctx);
err_out:
unmap_mft_record(base_ni);
@@ -92,17 +80,45 @@
}
/**
- * ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode
- * @ni: ntfs inode describing the runlist to search
- * @vcn: vcn to find
- * @need_write: if false, lock for reading and if true, lock for writing
+ * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode
+ * @ni: ntfs inode for which to map (part of) a runlist
+ * @vcn: map runlist part containing this vcn
+ *
+ * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
+ *
+ * Return 0 on success and -errno on error.
+ *
+ * Locking: - The runlist must be unlocked on entry and is unlocked on return.
+ * - This function takes the runlist lock for writing and modifies the
+ * runlist.
+ */
+int ntfs_map_runlist(ntfs_inode *ni, VCN vcn)
+{
+ int err = 0;
+
+ down_write(&ni->runlist.lock);
+ /* Make sure someone else didn't do the work while we were sleeping. */
+ if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <=
+ LCN_RL_NOT_MAPPED))
+ err = ntfs_map_runlist_nolock(ni, vcn);
+ up_write(&ni->runlist.lock);
+ return err;
+}
+
+/**
+ * ntfs_find_vcn_nolock - find a vcn in the runlist described by an ntfs inode
+ * @ni: ntfs inode describing the runlist to search
+ * @vcn: vcn to find
+ * @write_locked: true if the runlist is locked for writing
*
* Find the virtual cluster number @vcn in the runlist described by the ntfs
* inode @ni and return the address of the runlist element containing the @vcn.
- * The runlist is left locked and the caller has to unlock it. If @need_write
- * is true, the runlist is locked for writing and if @need_write is false, the
- * runlist is locked for reading. In the error case, the runlist is not left
- * locked.
+ * The runlist is left locked and the caller has to unlock it. In the error
+ * case, the runlist is left in the same locking state as on entry.
+ *
+ * Note if @write_locked is FALSE the lock may be dropped inside the function
+ * so you cannot rely on the runlist still being the same when this function
+ * returns.
*
* Note you need to distinguish between the lcn of the returned runlist element
* being >= 0 and LCN_HOLE. In the later case you have to return zeroes on
@@ -124,28 +140,24 @@
* true, it is locked for writing. Otherwise is is locked for
* reading.
*/
-runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
- const BOOL need_write)
+runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
+ const BOOL write_locked)
{
runlist_element *rl;
int err = 0;
BOOL is_retry = FALSE;
- ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.",
+ ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
ni->mft_no, (unsigned long long)vcn,
- !need_write ? "read" : "writ");
+ write_locked ? "write" : "read");
BUG_ON(!ni);
BUG_ON(!NInoNonResident(ni));
BUG_ON(vcn < 0);
-lock_retry_remap:
- if (!need_write)
- down_read(&ni->runlist.lock);
- else
- down_write(&ni->runlist.lock);
+retry_remap:
rl = ni->runlist.rl;
if (likely(rl && vcn >= rl[0].vcn)) {
while (likely(rl->length)) {
- if (likely(vcn < rl[1].vcn)) {
+ if (unlikely(vcn < rl[1].vcn)) {
if (likely(rl->lcn >= LCN_HOLE)) {
ntfs_debug("Done.");
return rl;
@@ -161,19 +173,23 @@
err = -EIO;
}
}
- if (!need_write)
- up_read(&ni->runlist.lock);
- else
- up_write(&ni->runlist.lock);
if (!err && !is_retry) {
/*
* The @vcn is in an unmapped region, map the runlist and
* retry.
*/
- err = ntfs_map_runlist(ni, vcn);
+ if (!write_locked) {
+ up_read(&ni->runlist.lock);
+ down_write(&ni->runlist.lock);
+ }
+ err = ntfs_map_runlist_nolock(ni, vcn);
+ if (!write_locked) {
+ up_write(&ni->runlist.lock);
+ down_read(&ni->runlist.lock);
+ }
if (likely(!err)) {
is_retry = TRUE;
- goto lock_retry_remap;
+ goto retry_remap;
}
/*
* -EINVAL and -ENOENT coming from a failed mapping attempt are
@@ -184,7 +200,8 @@
err = -EIO;
} else if (!err)
err = -EIO;
- ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
+ if (err != -ENOENT)
+ ntfs_error(ni->vol->sb, "Failed with error code %i.", err);
return ERR_PTR(err);
}
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index e0c2c6c..3eb4516 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -60,10 +60,11 @@
ATTR_RECORD *base_attr;
} ntfs_attr_search_ctx;
+extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn);
extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn);
-extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn,
- const BOOL need_write);
+extern runlist_element *ntfs_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
+ const BOOL write_locked);
int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
const u32 name_len, const IGNORE_CASE_BOOL ic,
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 5346596..8db4492 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -849,7 +849,8 @@
total_freed = real_freed = 0;
/* This returns with ni->runlist locked for reading on success. */
- rl = ntfs_find_vcn(ni, start_vcn, FALSE);
+ down_read(&ni->runlist.lock);
+ rl = ntfs_find_vcn_nolock(ni, start_vcn, FALSE);
if (IS_ERR(rl)) {
if (!is_rollback)
ntfs_error(vol->sb, "Failed to find first runlist "
@@ -863,7 +864,7 @@
ntfs_error(vol->sb, "First runlist element has "
"invalid lcn, aborting.");
err = -EIO;
- goto unl_err_out;
+ goto err_out;
}
/* Find the starting cluster inside the run that needs freeing. */
delta = start_vcn - rl->vcn;
@@ -881,7 +882,7 @@
if (!is_rollback)
ntfs_error(vol->sb, "Failed to clear first run "
"(error %i), aborting.", err);
- goto unl_err_out;
+ goto err_out;
}
/* We have freed @to_free real clusters. */
real_freed = to_free;
@@ -901,30 +902,15 @@
if (unlikely(rl->lcn < LCN_HOLE)) {
VCN vcn;
- /*
- * Attempt to map runlist, dropping runlist lock for
- * the duration.
- */
+ /* Attempt to map runlist. */
vcn = rl->vcn;
- up_read(&ni->runlist.lock);
- err = ntfs_map_runlist(ni, vcn);
- if (err) {
- if (!is_rollback)
- ntfs_error(vol->sb, "Failed to map "
- "runlist fragment.");
- if (err == -EINVAL || err == -ENOENT)
- err = -EIO;
- goto err_out;
- }
- /*
- * This returns with ni->runlist locked for reading on
- * success.
- */
- rl = ntfs_find_vcn(ni, vcn, FALSE);
+ rl = ntfs_find_vcn_nolock(ni, vcn, FALSE);
if (IS_ERR(rl)) {
err = PTR_ERR(rl);
if (!is_rollback)
- ntfs_error(vol->sb, "Failed to find "
+ ntfs_error(vol->sb, "Failed to map "
+ "runlist fragment or "
+ "failed to find "
"subsequent runlist "
"element.");
goto err_out;
@@ -937,7 +923,7 @@
(unsigned long long)
rl->lcn);
err = -EIO;
- goto unl_err_out;
+ goto err_out;
}
}
/* The number of clusters in this run that need freeing. */
@@ -953,7 +939,7 @@
if (!is_rollback)
ntfs_error(vol->sb, "Failed to clear "
"subsequent run.");
- goto unl_err_out;
+ goto err_out;
}
/* We have freed @to_free real clusters. */
real_freed += to_free;
@@ -974,9 +960,8 @@
/* We are done. Return the number of actually freed clusters. */
ntfs_debug("Done.");
return real_freed;
-unl_err_out:
- up_read(&ni->runlist.lock);
err_out:
+ up_read(&ni->runlist.lock);
if (is_rollback)
return err;
/* If no real clusters were freed, no need to rollback. */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 4e0bf39..0975d73 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
/**
* mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -287,7 +287,7 @@
}
unmap_mft_record(ni);
ntfs_error(base_ni->vol->sb, "Found stale extent mft "
- "reference! Corrupt file system. "
+ "reference! Corrupt filesystem. "
"Run chkdsk.");
return ERR_PTR(-EIO);
}
@@ -318,7 +318,7 @@
/* Verify the sequence number if it is present. */
if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) {
ntfs_error(base_ni->vol->sb, "Found stale extent mft "
- "reference! Corrupt file system. Run chkdsk.");
+ "reference! Corrupt filesystem. Run chkdsk.");
destroy_ni = TRUE;
m = ERR_PTR(-EIO);
goto unm_err_out;
@@ -1292,19 +1292,20 @@
/*
* Determine the last lcn of the mft bitmap. The allocated size of the
* mft bitmap cannot be zero so we are ok to do this.
- * ntfs_find_vcn() returns the runlist locked on success.
*/
+ down_write(&mftbmp_ni->runlist.lock);
read_lock_irqsave(&mftbmp_ni->size_lock, flags);
ll = mftbmp_ni->allocated_size;
read_unlock_irqrestore(&mftbmp_ni->size_lock, flags);
- rl = ntfs_find_vcn(mftbmp_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
+ rl = ntfs_find_vcn_nolock(mftbmp_ni,
+ (ll - 1) >> vol->cluster_size_bits, TRUE);
if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+ up_write(&mftbmp_ni->runlist.lock);
ntfs_error(vol->sb, "Failed to determine last allocated "
"cluster of mft bitmap attribute.");
- if (!IS_ERR(rl)) {
- up_write(&mftbmp_ni->runlist.lock);
+ if (!IS_ERR(rl))
ret = -EIO;
- } else
+ else
ret = PTR_ERR(rl);
return ret;
}
@@ -1428,6 +1429,8 @@
// TODO: Deal with this by moving this extent to a new mft
// record or by starting a new extent in a new mft record or by
// moving other attributes out of this mft record.
+ // Note: It will need to be a special mft record and if none of
+ // those are available it gets rather complicated...
ntfs_error(vol->sb, "Not enough space in this mft record to "
"accomodate extended mft bitmap attribute "
"extent. Cannot handle this yet.");
@@ -1719,19 +1722,20 @@
* Determine the preferred allocation location, i.e. the last lcn of
* the mft data attribute. The allocated size of the mft data
* attribute cannot be zero so we are ok to do this.
- * ntfs_find_vcn() returns the runlist locked on success.
*/
+ down_write(&mft_ni->runlist.lock);
read_lock_irqsave(&mft_ni->size_lock, flags);
ll = mft_ni->allocated_size;
read_unlock_irqrestore(&mft_ni->size_lock, flags);
- rl = ntfs_find_vcn(mft_ni, (ll - 1) >> vol->cluster_size_bits, TRUE);
+ rl = ntfs_find_vcn_nolock(mft_ni, (ll - 1) >> vol->cluster_size_bits,
+ TRUE);
if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) {
+ up_write(&mft_ni->runlist.lock);
ntfs_error(vol->sb, "Failed to determine last allocated "
"cluster of mft data attribute.");
- if (!IS_ERR(rl)) {
- up_write(&mft_ni->runlist.lock);
+ if (!IS_ERR(rl))
ret = -EIO;
- } else
+ else
ret = PTR_ERR(rl);
return ret;
}
@@ -1858,7 +1862,11 @@
// moving other attributes out of this mft record.
// Note: Use the special reserved mft records and ensure that
// this extent is not required to find the mft record in
- // question.
+ // question. If no free special records left we would need to
+ // move an existing record away, insert ours in its place, and
+ // then place the moved record into the newly allocated space
+ // and we would then need to update all references to this mft
+ // record appropriately. This is rather complicated...
ntfs_error(vol->sb, "Not enough space in this mft record to "
"accomodate extended mft data attribute "
"extent. Cannot handle this yet.");
@@ -2021,7 +2029,7 @@
"reports this as corruption, please email "
"linux-ntfs-dev@lists.sourceforge.net stating "
"that you saw this message and that the "
- "modified file system created was corrupt. "
+ "modified filesystem created was corrupt. "
"Thank you.");
}
/* Set the update sequence number to 1. */