Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block bits from Jens Axboe:
"As vacation is coming up, thought I'd better get rid of my pending
changes in my for-linus branch for this iteration. It contains:
- Two patches for mtip32xx. Killing a non-compliant sysfs interface
and moving it to debugfs, where it belongs.
- A few patches from Asias. Two legit bug fixes, and one killing an
interface that is no longer in use.
- A patch from Jan, making the annoying partition ioctl warning a bit
less annoying, by restricting it to !CAP_SYS_RAWIO only.
- Three bug fixes for drbd from Lars Ellenberg.
- A fix for an old regression for umem, it hasn't really worked since
the plugging scheme was changed in 3.0.
- A few fixes from Tejun.
- A splice fix from Eric Dumazet, fixing an issue with pipe
resizing."
* 'for-linus' of git://git.kernel.dk/linux-block:
scsi: Silence unnecessary warnings about ioctl to partition
block: Drop dead function blk_abort_queue()
block: Mitigate lock unbalance caused by lock switching
block: Avoid missed wakeup in request waitqueue
umem: fix up unplugging
splice: fix racy pipe->buffers uses
drbd: fix null pointer dereference with on-congestion policy when diskless
drbd: fix list corruption by failing but already aborted reads
drbd: fix access of unallocated pages and kernel panic
xen/blkfront: Add WARN to deal with misbehaving backends.
blkcg: drop local variable @q from blkg_destroy()
mtip32xx: Create debugfs entries for troubleshooting
mtip32xx: Remove 'registers' and 'flags' from sysfs
blkcg: fix blkg_alloc() failure path
block: blkcg_policy_cfq shouldn't be used if !CONFIG_CFQ_GROUP_IOSCHED
block: fix return value on cfq_init() failure
mtip32xx: Remove version.h header file inclusion
xen/blkback: Copy id field when doing BLKIF_DISCARD.
diff --git a/Documentation/ABI/testing/sysfs-block-rssd b/Documentation/ABI/testing/sysfs-block-rssd
index 679ce35..beef30c 100644
--- a/Documentation/ABI/testing/sysfs-block-rssd
+++ b/Documentation/ABI/testing/sysfs-block-rssd
@@ -1,26 +1,5 @@
-What: /sys/block/rssd*/registers
-Date: March 2012
-KernelVersion: 3.3
-Contact: Asai Thambi S P <asamymuthupa@micron.com>
-Description: This is a read-only file. Dumps below driver information and
- hardware registers.
- - S ACTive
- - Command Issue
- - Completed
- - PORT IRQ STAT
- - HOST IRQ STAT
- - Allocated
- - Commands in Q
-
What: /sys/block/rssd*/status
Date: April 2012
KernelVersion: 3.4
Contact: Asai Thambi S P <asamymuthupa@micron.com>
Description: This is a read-only file. Indicates the status of the device.
-
-What: /sys/block/rssd*/flags
-Date: May 2012
-KernelVersion: 3.5
-Contact: Asai Thambi S P <asamymuthupa@micron.com>
-Description: This is a read-only file. Dumps the flags in port and driver
- data structure
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 02cf633..e7dee61 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -125,12 +125,8 @@
blkg->pd[i] = pd;
pd->blkg = blkg;
- }
- /* invoke per-policy init */
- for (i = 0; i < BLKCG_MAX_POLS; i++) {
- struct blkcg_policy *pol = blkcg_policy[i];
-
+ /* invoke per-policy init */
if (blkcg_policy_enabled(blkg->q, pol))
pol->pd_init_fn(blkg);
}
@@ -245,10 +241,9 @@
static void blkg_destroy(struct blkcg_gq *blkg)
{
- struct request_queue *q = blkg->q;
struct blkcg *blkcg = blkg->blkcg;
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(blkg->q->queue_lock);
lockdep_assert_held(&blkcg->lock);
/* Something wrong if we are trying to remove same group twice */
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c923a7..93eb3e4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -361,9 +361,10 @@
*/
void blk_drain_queue(struct request_queue *q, bool drain_all)
{
+ int i;
+
while (true) {
bool drain = false;
- int i;
spin_lock_irq(q->queue_lock);
@@ -408,6 +409,18 @@
break;
msleep(10);
}
+
+ /*
+ * With queue marked dead, any woken up waiter will fail the
+ * allocation path, so the wakeup chaining is lost and we're
+ * left with hung waiters. We need to wake up those waiters.
+ */
+ if (q->request_fn) {
+ spin_lock_irq(q->queue_lock);
+ for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++)
+ wake_up_all(&q->rq.wait[i]);
+ spin_unlock_irq(q->queue_lock);
+ }
}
/**
@@ -467,7 +480,6 @@
/* mark @q DEAD, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
-
spin_lock_irq(lock);
/*
@@ -485,10 +497,6 @@
queue_flag_set(QUEUE_FLAG_NOMERGES, q);
queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
queue_flag_set(QUEUE_FLAG_DEAD, q);
-
- if (q->queue_lock != &q->__queue_lock)
- q->queue_lock = &q->__queue_lock;
-
spin_unlock_irq(lock);
mutex_unlock(&q->sysfs_lock);
@@ -499,6 +507,11 @@
del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
blk_sync_queue(q);
+ spin_lock_irq(lock);
+ if (q->queue_lock != &q->__queue_lock)
+ q->queue_lock = &q->__queue_lock;
+ spin_unlock_irq(lock);
+
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 7803548..6e4744c 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -197,44 +197,3 @@
mod_timer(&q->timeout, expiry);
}
-/**
- * blk_abort_queue -- Abort all request on given queue
- * @queue: pointer to queue
- *
- */
-void blk_abort_queue(struct request_queue *q)
-{
- unsigned long flags;
- struct request *rq, *tmp;
- LIST_HEAD(list);
-
- /*
- * Not a request based block device, nothing to abort
- */
- if (!q->request_fn)
- return;
-
- spin_lock_irqsave(q->queue_lock, flags);
-
- elv_abort_queue(q);
-
- /*
- * Splice entries to local list, to avoid deadlocking if entries
- * get readded to the timeout list by error handling
- */
- list_splice_init(&q->timeout_list, &list);
-
- list_for_each_entry_safe(rq, tmp, &list, timeout_list)
- blk_abort_request(rq);
-
- /*
- * Occasionally, blk_abort_request() will return without
- * deleting the element from the list. Make sure we add those back
- * instead of leaving them on the local stack list.
- */
- list_splice(&list, &q->timeout_list);
-
- spin_unlock_irqrestore(q->queue_lock, flags);
-
-}
-EXPORT_SYMBOL_GPL(blk_abort_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 673c977..fb52df97 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -17,8 +17,6 @@
#include "blk.h"
#include "blk-cgroup.h"
-static struct blkcg_policy blkcg_policy_cfq __maybe_unused;
-
/*
* tunables
*/
@@ -418,11 +416,6 @@
return pd ? container_of(pd, struct cfq_group, pd) : NULL;
}
-static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
-{
- return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
-}
-
static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
{
return pd_to_blkg(&cfqg->pd);
@@ -572,6 +565,13 @@
#ifdef CONFIG_CFQ_GROUP_IOSCHED
+static struct blkcg_policy blkcg_policy_cfq;
+
+static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
+{
+ return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
+}
+
static inline void cfqg_get(struct cfq_group *cfqg)
{
return blkg_get(cfqg_to_blkg(cfqg));
@@ -3951,10 +3951,11 @@
cfq_shutdown_timer_wq(cfqd);
-#ifndef CONFIG_CFQ_GROUP_IOSCHED
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ blkcg_deactivate_policy(q, &blkcg_policy_cfq);
+#else
kfree(cfqd->root_group);
#endif
- blkcg_deactivate_policy(q, &blkcg_policy_cfq);
kfree(cfqd);
}
@@ -4194,14 +4195,15 @@
#ifdef CONFIG_CFQ_GROUP_IOSCHED
if (!cfq_group_idle)
cfq_group_idle = 1;
-#else
- cfq_group_idle = 0;
-#endif
ret = blkcg_policy_register(&blkcg_policy_cfq);
if (ret)
return ret;
+#else
+ cfq_group_idle = 0;
+#endif
+ ret = -ENOMEM;
cfq_pool = KMEM_CACHE(cfq_queue, 0);
if (!cfq_pool)
goto err_pol_unreg;
@@ -4215,13 +4217,17 @@
err_free_pool:
kmem_cache_destroy(cfq_pool);
err_pol_unreg:
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
blkcg_policy_unregister(&blkcg_policy_cfq);
+#endif
return ret;
}
static void __exit cfq_exit(void)
{
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
blkcg_policy_unregister(&blkcg_policy_cfq);
+#endif
elv_unregister(&iosched_cfq);
kmem_cache_destroy(cfq_pool);
}
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 260fa80e..9a87daa 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -721,11 +721,14 @@
break;
}
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
/* In particular, rule out all resets and host-specific ioctls. */
printk_ratelimited(KERN_WARNING
"%s: sending ioctl %x to a partition!\n", current->comm, cmd);
- return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD;
+ return -ENOIOCTLCMD;
}
EXPORT_SYMBOL(scsi_verify_blk_ioctl);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index b5c5ff5..fcb956b 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1475,10 +1475,17 @@
first_word = 0;
spin_lock_irq(&b->bm_lock);
}
-
/* last page (respectively only page, for first page == last page) */
last_word = MLPP(el >> LN2_BPL);
- bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
+
+ /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples).
+ * ==> e = 32767, el = 32768, last_page = 2,
+ * and now last_word = 0.
+ * We do not want to touch last_page in this case,
+ * as we did not allocate it, it is not present in bitmap->bm_pages.
+ */
+ if (last_word)
+ bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
/* possibly trailing bits.
* example: (e & 63) == 63, el will be e+1.
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 9c5c849..8e93a6a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -472,12 +472,17 @@
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
- D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+ if (req->rq_state & RQ_LOCAL_ABORTED) {
+ _req_may_be_done(req, m);
+ break;
+ }
__drbd_chk_io_error(mdev, false);
goto_queue_for_net_read:
+ D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+
/* no point in retrying if there is no good remote data,
* or we have no connection. */
if (mdev->state.pdsk != D_UP_TO_DATE) {
@@ -765,6 +770,40 @@
return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
}
+static void maybe_pull_ahead(struct drbd_conf *mdev)
+{
+ int congested = 0;
+
+ /* If I don't even have good local storage, we can not reasonably try
+ * to pull ahead of the peer. We also need the local reference to make
+ * sure mdev->act_log is there.
+ * Note: caller has to make sure that net_conf is there.
+ */
+ if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
+ return;
+
+ if (mdev->net_conf->cong_fill &&
+ atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
+ dev_info(DEV, "Congestion-fill threshold reached\n");
+ congested = 1;
+ }
+
+ if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
+ dev_info(DEV, "Congestion-extents threshold reached\n");
+ congested = 1;
+ }
+
+ if (congested) {
+ queue_barrier(mdev); /* last barrier, after mirrored writes */
+
+ if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
+ _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
+ else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
+ _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
+ }
+ put_ldev(mdev);
+}
+
static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
{
const int rw = bio_rw(bio);
@@ -972,29 +1011,8 @@
_req_mod(req, queue_for_send_oos);
if (remote &&
- mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
- int congested = 0;
-
- if (mdev->net_conf->cong_fill &&
- atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
- dev_info(DEV, "Congestion-fill threshold reached\n");
- congested = 1;
- }
-
- if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
- dev_info(DEV, "Congestion-extents threshold reached\n");
- congested = 1;
- }
-
- if (congested) {
- queue_barrier(mdev); /* last barrier, after mirrored writes */
-
- if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
- _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
- else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
- _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
- }
- }
+ mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96)
+ maybe_pull_ahead(mdev);
spin_unlock_irq(&mdev->req_lock);
kfree(b); /* if someone else has beaten us to it... */
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 264bc77..a8fddeb 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -37,6 +37,7 @@
#include <linux/kthread.h>
#include <../drivers/ata/ahci.h>
#include <linux/export.h>
+#include <linux/debugfs.h>
#include "mtip32xx.h"
#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
@@ -85,6 +86,7 @@
* allocated in mtip_init().
*/
static int mtip_major;
+static struct dentry *dfs_parent;
static DEFINE_SPINLOCK(rssd_index_lock);
static DEFINE_IDA(rssd_index_ida);
@@ -2546,7 +2548,7 @@
}
/*
- * Sysfs register/status dump.
+ * Sysfs status dump.
*
* @dev Pointer to the device structure, passed by the kernrel.
* @attr Pointer to the device_attribute structure passed by the kernel.
@@ -2555,71 +2557,6 @@
* return value
* The size, in bytes, of the data copied into buf.
*/
-static ssize_t mtip_hw_show_registers(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- u32 group_allocated;
- struct driver_data *dd = dev_to_disk(dev)->private_data;
- int size = 0;
- int n;
-
- size += sprintf(&buf[size], "Hardware\n--------\n");
- size += sprintf(&buf[size], "S ACTive : [ 0x");
-
- for (n = dd->slot_groups-1; n >= 0; n--)
- size += sprintf(&buf[size], "%08X ",
- readl(dd->port->s_active[n]));
-
- size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "Command Issue : [ 0x");
-
- for (n = dd->slot_groups-1; n >= 0; n--)
- size += sprintf(&buf[size], "%08X ",
- readl(dd->port->cmd_issue[n]));
-
- size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "Completed : [ 0x");
-
- for (n = dd->slot_groups-1; n >= 0; n--)
- size += sprintf(&buf[size], "%08X ",
- readl(dd->port->completed[n]));
-
- size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "PORT IRQ STAT : [ 0x%08X ]\n",
- readl(dd->port->mmio + PORT_IRQ_STAT));
- size += sprintf(&buf[size], "HOST IRQ STAT : [ 0x%08X ]\n",
- readl(dd->mmio + HOST_IRQ_STAT));
- size += sprintf(&buf[size], "\n");
-
- size += sprintf(&buf[size], "Local\n-----\n");
- size += sprintf(&buf[size], "Allocated : [ 0x");
-
- for (n = dd->slot_groups-1; n >= 0; n--) {
- if (sizeof(long) > sizeof(u32))
- group_allocated =
- dd->port->allocated[n/2] >> (32*(n&1));
- else
- group_allocated = dd->port->allocated[n];
- size += sprintf(&buf[size], "%08X ", group_allocated);
- }
- size += sprintf(&buf[size], "]\n");
-
- size += sprintf(&buf[size], "Commands in Q: [ 0x");
-
- for (n = dd->slot_groups-1; n >= 0; n--) {
- if (sizeof(long) > sizeof(u32))
- group_allocated =
- dd->port->cmds_to_issue[n/2] >> (32*(n&1));
- else
- group_allocated = dd->port->cmds_to_issue[n];
- size += sprintf(&buf[size], "%08X ", group_allocated);
- }
- size += sprintf(&buf[size], "]\n");
-
- return size;
-}
-
static ssize_t mtip_hw_show_status(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -2637,24 +2574,121 @@
return size;
}
-static ssize_t mtip_hw_show_flags(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
+
+static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
+ size_t len, loff_t *offset)
{
- struct driver_data *dd = dev_to_disk(dev)->private_data;
- int size = 0;
+ struct driver_data *dd = (struct driver_data *)f->private_data;
+ char buf[MTIP_DFS_MAX_BUF_SIZE];
+ u32 group_allocated;
+ int size = *offset;
+ int n;
- size += sprintf(&buf[size], "Flag in port struct : [ %08lX ]\n",
- dd->port->flags);
- size += sprintf(&buf[size], "Flag in dd struct : [ %08lX ]\n",
- dd->dd_flag);
+ if (!len || size)
+ return 0;
- return size;
+ if (size < 0)
+ return -EINVAL;
+
+ size += sprintf(&buf[size], "H/ S ACTive : [ 0x");
+
+ for (n = dd->slot_groups-1; n >= 0; n--)
+ size += sprintf(&buf[size], "%08X ",
+ readl(dd->port->s_active[n]));
+
+ size += sprintf(&buf[size], "]\n");
+ size += sprintf(&buf[size], "H/ Command Issue : [ 0x");
+
+ for (n = dd->slot_groups-1; n >= 0; n--)
+ size += sprintf(&buf[size], "%08X ",
+ readl(dd->port->cmd_issue[n]));
+
+ size += sprintf(&buf[size], "]\n");
+ size += sprintf(&buf[size], "H/ Completed : [ 0x");
+
+ for (n = dd->slot_groups-1; n >= 0; n--)
+ size += sprintf(&buf[size], "%08X ",
+ readl(dd->port->completed[n]));
+
+ size += sprintf(&buf[size], "]\n");
+ size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n",
+ readl(dd->port->mmio + PORT_IRQ_STAT));
+ size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n",
+ readl(dd->mmio + HOST_IRQ_STAT));
+ size += sprintf(&buf[size], "\n");
+
+ size += sprintf(&buf[size], "L/ Allocated : [ 0x");
+
+ for (n = dd->slot_groups-1; n >= 0; n--) {
+ if (sizeof(long) > sizeof(u32))
+ group_allocated =
+ dd->port->allocated[n/2] >> (32*(n&1));
+ else
+ group_allocated = dd->port->allocated[n];
+ size += sprintf(&buf[size], "%08X ", group_allocated);
+ }
+ size += sprintf(&buf[size], "]\n");
+
+ size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
+
+ for (n = dd->slot_groups-1; n >= 0; n--) {
+ if (sizeof(long) > sizeof(u32))
+ group_allocated =
+ dd->port->cmds_to_issue[n/2] >> (32*(n&1));
+ else
+ group_allocated = dd->port->cmds_to_issue[n];
+ size += sprintf(&buf[size], "%08X ", group_allocated);
+ }
+ size += sprintf(&buf[size], "]\n");
+
+ *offset = size <= len ? size : len;
+ size = copy_to_user(ubuf, buf, *offset);
+ if (size)
+ return -EFAULT;
+
+ return *offset;
}
-static DEVICE_ATTR(registers, S_IRUGO, mtip_hw_show_registers, NULL);
-static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
-static DEVICE_ATTR(flags, S_IRUGO, mtip_hw_show_flags, NULL);
+static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
+ size_t len, loff_t *offset)
+{
+ struct driver_data *dd = (struct driver_data *)f->private_data;
+ char buf[MTIP_DFS_MAX_BUF_SIZE];
+ int size = *offset;
+
+ if (!len || size)
+ return 0;
+
+ if (size < 0)
+ return -EINVAL;
+
+ size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
+ dd->port->flags);
+ size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n",
+ dd->dd_flag);
+
+ *offset = size <= len ? size : len;
+ size = copy_to_user(ubuf, buf, *offset);
+ if (size)
+ return -EFAULT;
+
+ return *offset;
+}
+
+static const struct file_operations mtip_regs_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = mtip_hw_read_registers,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations mtip_flags_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = mtip_hw_read_flags,
+ .llseek = no_llseek,
+};
/*
* Create the sysfs related attributes.
@@ -2671,15 +2705,9 @@
if (!kobj || !dd)
return -EINVAL;
- if (sysfs_create_file(kobj, &dev_attr_registers.attr))
- dev_warn(&dd->pdev->dev,
- "Error creating 'registers' sysfs entry\n");
if (sysfs_create_file(kobj, &dev_attr_status.attr))
dev_warn(&dd->pdev->dev,
"Error creating 'status' sysfs entry\n");
- if (sysfs_create_file(kobj, &dev_attr_flags.attr))
- dev_warn(&dd->pdev->dev,
- "Error creating 'flags' sysfs entry\n");
return 0;
}
@@ -2698,13 +2726,39 @@
if (!kobj || !dd)
return -EINVAL;
- sysfs_remove_file(kobj, &dev_attr_registers.attr);
sysfs_remove_file(kobj, &dev_attr_status.attr);
- sysfs_remove_file(kobj, &dev_attr_flags.attr);
return 0;
}
+static int mtip_hw_debugfs_init(struct driver_data *dd)
+{
+ if (!dfs_parent)
+ return -1;
+
+ dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent);
+ if (IS_ERR_OR_NULL(dd->dfs_node)) {
+ dev_warn(&dd->pdev->dev,
+ "Error creating node %s under debugfs\n",
+ dd->disk->disk_name);
+ dd->dfs_node = NULL;
+ return -1;
+ }
+
+ debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
+ &mtip_flags_fops);
+ debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
+ &mtip_regs_fops);
+
+ return 0;
+}
+
+static void mtip_hw_debugfs_exit(struct driver_data *dd)
+{
+ debugfs_remove_recursive(dd->dfs_node);
+}
+
+
/*
* Perform any init/resume time hardware setup
*
@@ -3730,6 +3784,7 @@
mtip_hw_sysfs_init(dd, kobj);
kobject_put(kobj);
}
+ mtip_hw_debugfs_init(dd);
if (dd->mtip_svc_handler) {
set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
@@ -3755,6 +3810,8 @@
return rv;
kthread_run_error:
+ mtip_hw_debugfs_exit(dd);
+
/* Delete our gendisk. This also removes the device from /dev */
del_gendisk(dd->disk);
@@ -3805,6 +3862,7 @@
kobject_put(kobj);
}
}
+ mtip_hw_debugfs_exit(dd);
/*
* Delete our gendisk structure. This also removes the device
@@ -4152,10 +4210,20 @@
}
mtip_major = error;
+ if (!dfs_parent) {
+ dfs_parent = debugfs_create_dir("rssd", NULL);
+ if (IS_ERR_OR_NULL(dfs_parent)) {
+ printk(KERN_WARNING "Error creating debugfs parent\n");
+ dfs_parent = NULL;
+ }
+ }
+
/* Register our PCI operations. */
error = pci_register_driver(&mtip_pci_driver);
- if (error)
+ if (error) {
+ debugfs_remove(dfs_parent);
unregister_blkdev(mtip_major, MTIP_DRV_NAME);
+ }
return error;
}
@@ -4172,6 +4240,8 @@
*/
static void __exit mtip_exit(void)
{
+ debugfs_remove_recursive(dfs_parent);
+
/* Release the allocated major block device number. */
unregister_blkdev(mtip_major, MTIP_DRV_NAME);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index b2c88da..f51fc23 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -26,7 +26,6 @@
#include <linux/ata.h>
#include <linux/interrupt.h>
#include <linux/genhd.h>
-#include <linux/version.h>
/* Offset of Subsystem Device ID in pci confoguration space */
#define PCI_SUBSYSTEM_DEVICEID 0x2E
@@ -111,6 +110,8 @@
#define dbg_printk(format, arg...)
#endif
+#define MTIP_DFS_MAX_BUF_SIZE 1024
+
#define __force_bit2int (unsigned int __force)
enum {
@@ -447,6 +448,8 @@
unsigned long dd_flag; /* NOTE: use atomic bit operations on this */
struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
+
+ struct dentry *dfs_node;
};
#endif
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index aa27120..9a72277 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -513,6 +513,44 @@
}
}
+struct mm_plug_cb {
+ struct blk_plug_cb cb;
+ struct cardinfo *card;
+};
+
+static void mm_unplug(struct blk_plug_cb *cb)
+{
+ struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb);
+
+ spin_lock_irq(&mmcb->card->lock);
+ activate(mmcb->card);
+ spin_unlock_irq(&mmcb->card->lock);
+ kfree(mmcb);
+}
+
+static int mm_check_plugged(struct cardinfo *card)
+{
+ struct blk_plug *plug = current->plug;
+ struct mm_plug_cb *mmcb;
+
+ if (!plug)
+ return 0;
+
+ list_for_each_entry(mmcb, &plug->cb_list, cb.list) {
+ if (mmcb->cb.callback == mm_unplug && mmcb->card == card)
+ return 1;
+ }
+ /* Not currently on the callback list */
+ mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC);
+ if (!mmcb)
+ return 0;
+
+ mmcb->card = card;
+ mmcb->cb.callback = mm_unplug;
+ list_add(&mmcb->cb.list, &plug->cb_list);
+ return 1;
+}
+
static void mm_make_request(struct request_queue *q, struct bio *bio)
{
struct cardinfo *card = q->queuedata;
@@ -523,6 +561,8 @@
*card->biotail = bio;
bio->bi_next = NULL;
card->biotail = &bio->bi_next;
+ if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card))
+ activate(card);
spin_unlock_irq(&card->lock);
return;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 773cf27..9ad3b5e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -257,6 +257,7 @@
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
@@ -287,6 +288,7 @@
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 60eed4b..e4fb337 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -141,14 +141,36 @@
return free;
}
-static void add_id_to_freelist(struct blkfront_info *info,
+static int add_id_to_freelist(struct blkfront_info *info,
unsigned long id)
{
+ if (info->shadow[id].req.u.rw.id != id)
+ return -EINVAL;
+ if (info->shadow[id].request == NULL)
+ return -EINVAL;
info->shadow[id].req.u.rw.id = info->shadow_free;
info->shadow[id].request = NULL;
info->shadow_free = id;
+ return 0;
}
+static const char *op_name(int op)
+{
+ static const char *const names[] = {
+ [BLKIF_OP_READ] = "read",
+ [BLKIF_OP_WRITE] = "write",
+ [BLKIF_OP_WRITE_BARRIER] = "barrier",
+ [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
+ [BLKIF_OP_DISCARD] = "discard" };
+
+ if (op < 0 || op >= ARRAY_SIZE(names))
+ return "unknown";
+
+ if (!names[op])
+ return "reserved";
+
+ return names[op];
+}
static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
{
unsigned int end = minor + nr;
@@ -746,20 +768,36 @@
bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
+ /*
+ * The backend has messed up and given us an id that we would
+ * never have given to it (we stamp it up to BLK_RING_SIZE -
+ * look in get_id_from_freelist.
+ */
+ if (id >= BLK_RING_SIZE) {
+ WARN(1, "%s: response to %s has incorrect id (%ld)\n",
+ info->gd->disk_name, op_name(bret->operation), id);
+ /* We can't safely get the 'struct request' as
+ * the id is busted. */
+ continue;
+ }
req = info->shadow[id].request;
if (bret->operation != BLKIF_OP_DISCARD)
blkif_completion(&info->shadow[id]);
- add_id_to_freelist(info, id);
+ if (add_id_to_freelist(info, id)) {
+ WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
+ info->gd->disk_name, op_name(bret->operation), id);
+ continue;
+ }
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
- printk(KERN_WARNING "blkfront: %s: discard op failed\n",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
@@ -771,18 +809,14 @@
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
- printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
- info->flush_op == BLKIF_OP_WRITE_BARRIER ?
- "barrier" : "flush disk cache",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.u.rw.nr_segments == 0)) {
- printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
- info->flush_op == BLKIF_OP_WRITE_BARRIER ?
- "barrier" : "flush disk cache",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
if (unlikely(error)) {
diff --git a/fs/splice.c b/fs/splice.c
index c9f1318..7bf08fa 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -273,13 +273,16 @@
* Check if we need to grow the arrays holding pages and partial page
* descriptions.
*/
-int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
{
- if (pipe->buffers <= PIPE_DEF_BUFFERS)
+ unsigned int buffers = ACCESS_ONCE(pipe->buffers);
+
+ spd->nr_pages_max = buffers;
+ if (buffers <= PIPE_DEF_BUFFERS)
return 0;
- spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
- spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
+ spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL);
+ spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL);
if (spd->pages && spd->partial)
return 0;
@@ -289,10 +292,9 @@
return -ENOMEM;
}
-void splice_shrink_spd(struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
+void splice_shrink_spd(struct splice_pipe_desc *spd)
{
- if (pipe->buffers <= PIPE_DEF_BUFFERS)
+ if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
return;
kfree(spd->pages);
@@ -315,6 +317,7 @@
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &page_cache_pipe_buf_ops,
.spd_release = spd_release_page,
@@ -326,7 +329,7 @@
index = *ppos >> PAGE_CACHE_SHIFT;
loff = *ppos & ~PAGE_CACHE_MASK;
req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- nr_pages = min(req_pages, pipe->buffers);
+ nr_pages = min(req_pages, spd.nr_pages_max);
/*
* Lookup the (hopefully) full range of pages we need.
@@ -497,7 +500,7 @@
if (spd.nr_pages)
error = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return error;
}
@@ -598,6 +601,7 @@
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &default_pipe_buf_ops,
.spd_release = spd_release_page,
@@ -608,8 +612,8 @@
res = -ENOMEM;
vec = __vec;
- if (pipe->buffers > PIPE_DEF_BUFFERS) {
- vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
+ if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
+ vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
if (!vec)
goto shrink_ret;
}
@@ -617,7 +621,7 @@
offset = *ppos & ~PAGE_CACHE_MASK;
nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
+ for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
struct page *page;
page = alloc_page(GFP_USER);
@@ -665,7 +669,7 @@
shrink_ret:
if (vec != __vec)
kfree(vec);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return res;
err:
@@ -1614,6 +1618,7 @@
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &user_page_pipe_buf_ops,
.spd_release = spd_release_page,
@@ -1629,13 +1634,13 @@
spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
spd.partial, false,
- pipe->buffers);
+ spd.nr_pages_max);
if (spd.nr_pages <= 0)
ret = spd.nr_pages;
else
ret = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return ret;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ba43f40..07954b0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -827,7 +827,6 @@
extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
extern void blk_abort_request(struct request *);
-extern void blk_abort_queue(struct request_queue *);
extern void blk_unprep_request(struct request *);
/*
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 26e5b61..09a545a 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -51,7 +51,8 @@
struct splice_pipe_desc {
struct page **pages; /* page map */
struct partial_page *partial; /* pages[] may not be contig */
- int nr_pages; /* number of pages in map */
+ int nr_pages; /* number of populated pages in map */
+ unsigned int nr_pages_max; /* pages[] & partial[] arrays size */
unsigned int flags; /* splice flags */
const struct pipe_buf_operations *ops;/* ops associated with output pipe */
void (*spd_release)(struct splice_pipe_desc *, unsigned int);
@@ -85,9 +86,8 @@
/*
* for dynamic pipe sizing
*/
-extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *);
-extern void splice_shrink_spd(struct pipe_inode_info *,
- struct splice_pipe_desc *);
+extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *);
+extern void splice_shrink_spd(struct splice_pipe_desc *);
extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
diff --git a/kernel/relay.c b/kernel/relay.c
index ab56a17..e8cd202 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1235,6 +1235,7 @@
struct splice_pipe_desc spd = {
.pages = pages,
.nr_pages = 0,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.partial = partial,
.flags = flags,
.ops = &relay_pipe_buf_ops,
@@ -1302,8 +1303,8 @@
ret += padding;
out:
- splice_shrink_spd(pipe, &spd);
- return ret;
+ splice_shrink_spd(&spd);
+ return ret;
}
static ssize_t relay_file_splice_read(struct file *in,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 49249c2..a7fa070 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3609,6 +3609,7 @@
.pages = pages_def,
.partial = partial_def,
.nr_pages = 0, /* This gets updated below. */
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &tracing_pipe_buf_ops,
.spd_release = tracing_spd_release_pipe,
@@ -3680,7 +3681,7 @@
ret = splice_to_pipe(pipe, &spd);
out:
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return ret;
out_err:
@@ -4231,6 +4232,7 @@
struct splice_pipe_desc spd = {
.pages = pages_def,
.partial = partial_def,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &buffer_pipe_buf_ops,
.spd_release = buffer_spd_release,
@@ -4318,7 +4320,7 @@
}
ret = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
out:
return ret;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index a15a466..4ce02e0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1594,6 +1594,7 @@
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &page_cache_pipe_buf_ops,
.spd_release = spd_release_page,
@@ -1682,7 +1683,7 @@
if (spd.nr_pages)
error = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
if (error > 0) {
*ppos += error;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d78671e..46a3d23 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1755,6 +1755,7 @@
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = MAX_SKB_FRAGS,
.flags = flags,
.ops = &sock_pipe_buf_ops,
.spd_release = sock_spd_release,