31 files changed, 1283 insertions, 1480 deletions
diff --git a/drivers/block/acsi.c b/drivers/block/acsi.c
index ce933de..0e1f34f 100644
--- a/drivers/block/acsi.c
+++ b/drivers/block/acsi.c
@@ -371,7 +371,7 @@ static int acsi_revalidate (struct gendisk *disk);
 /************************* End of Prototypes **************************/
 
 
-struct timer_list acsi_timer = TIMER_INITIALIZER(acsi_times_out, 0, 0);
+DEFINE_TIMER(acsi_timer, acsi_times_out, 0, 0);
 
 
 #ifdef CONFIG_ATARI_SLM
diff --git a/drivers/block/acsi_slm.c b/drivers/block/acsi_slm.c
index e3be8c3..a5c1c8e 100644
--- a/drivers/block/acsi_slm.c
+++ b/drivers/block/acsi_slm.c
@@ -268,7 +268,7 @@ static int slm_get_pagesize( int device, int *w, int *h );
 /************************* End of Prototypes **************************/
 
 
-static struct timer_list slm_timer = TIMER_INITIALIZER(slm_test_ready, 0, 0);
+static DEFINE_TIMER(slm_timer, slm_test_ready, 0, 0);
 
 static struct file_operations slm_fops = {
 	.owner =	THIS_MODULE,
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 721ba80..0e9e586 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2004 Coraid, Inc.  See COPYING for GPL terms. */
-#define VERSION "10"
+#define VERSION "12"
 #define AOE_MAJOR 152
 #define DEVICE_NAME "aoe"
 
@@ -7,12 +7,12 @@
  * default is 16, which is 15 partitions plus the whole disk
  */
 #ifndef AOE_PARTITIONS
-#define AOE_PARTITIONS 16
+#define AOE_PARTITIONS (16)
 #endif
 
-#define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * 10 + (aoeminor))
-#define AOEMAJOR(sysminor) ((sysminor) / 10)
-#define AOEMINOR(sysminor) ((sysminor) % 10)
+#define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * NPERSHELF + (aoeminor))
+#define AOEMAJOR(sysminor) ((sysminor) / NPERSHELF)
+#define AOEMINOR(sysminor) ((sysminor) % NPERSHELF)
 #define WHITESPACE " \t\v\f\n"
 
 enum {
@@ -83,7 +83,7 @@ enum {
 
 enum {
 	MAXATADATA = 1024,
-	NPERSHELF = 10,
+	NPERSHELF = 16,		/* number of slots per shelf address */
 	FREETAG = -1,
 	MIN_BUFS = 8,
 };
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 95c0a36..4081c36 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -98,7 +98,6 @@ struct as_data {
 
 	struct as_rq *next_arq[2];	/* next in sort order */
 	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
-	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
 
 	unsigned long exit_prob;	/* probability a task will exit while
@@ -239,6 +238,25 @@ static struct io_context *as_get_io_context(void)
 	return ioc;
 }
 
+static void as_put_io_context(struct as_rq *arq)
+{
+	struct as_io_context *aic;
+
+	if (unlikely(!arq->io_context))
+		return;
+
+	aic = arq->io_context->aic;
+
+	if (arq->is_sync == REQ_SYNC && aic) {
+		spin_lock(&aic->lock);
+		set_bit(AS_TASK_IORUNNING, &aic->state);
+		aic->last_end_request = jiffies;
+		spin_unlock(&aic->lock);
+	}
+
+	put_io_context(arq->io_context);
+}
+
 /*
  * the back merge hash support functions
  */
@@ -261,14 +279,6 @@ static inline void as_del_arq_hash(struct as_rq *arq)
 		__as_del_arq_hash(arq);
 }
 
-static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq)
-{
-	as_del_arq_hash(arq);
-
-	if (q->last_merge == arq->request)
-		q->last_merge = NULL;
-}
-
 static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
@@ -312,7 +322,7 @@ static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
 		BUG_ON(!arq->on_hash);
 
 		if (!rq_mergeable(__rq)) {
-			as_remove_merge_hints(ad->q, arq);
+			as_del_arq_hash(arq);
 			continue;
 		}
 
@@ -950,23 +960,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 
 	WARN_ON(!list_empty(&rq->queuelist));
 
-	if (arq->state == AS_RQ_PRESCHED) {
-		WARN_ON(arq->io_context);
-		goto out;
-	}
-
-	if (arq->state == AS_RQ_MERGED)
-		goto out_ioc;
-
 	if (arq->state != AS_RQ_REMOVED) {
 		printk("arq->state %d\n", arq->state);
 		WARN_ON(1);
 		goto out;
 	}
 
-	if (!blk_fs_request(rq))
-		goto out;
-
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
 		kblockd_schedule_work(&ad->antic_work);
 		ad->changed_batch = 0;
@@ -1001,21 +1000,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 		}
 	}
 
-out_ioc:
-	if (!arq->io_context)
-		goto out;
-
-	if (arq->is_sync == REQ_SYNC) {
-		struct as_io_context *aic = arq->io_context->aic;
-		if (aic) {
-			spin_lock(&aic->lock);
-			set_bit(AS_TASK_IORUNNING, &aic->state);
-			aic->last_end_request = jiffies;
-			spin_unlock(&aic->lock);
-		}
-	}
-
-	put_io_context(arq->io_context);
+	as_put_io_context(arq);
 out:
 	arq->state = AS_RQ_POSTSCHED;
 }
@@ -1047,73 +1032,11 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 		ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
 
 	list_del_init(&arq->fifo);
-	as_remove_merge_hints(q, arq);
+	as_del_arq_hash(arq);
 	as_del_arq_rb(ad, arq);
 }
 
 /*
- * as_remove_dispatched_request is called to remove a request which has gone
- * to the dispatch list.
- */
-static void as_remove_dispatched_request(request_queue_t *q, struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-	struct as_io_context *aic;
-
-	if (!arq) {
-		WARN_ON(1);
-		return;
-	}
-
-	WARN_ON(arq->state != AS_RQ_DISPATCHED);
-	WARN_ON(ON_RB(&arq->rb_node));
-	if (arq->io_context && arq->io_context->aic) {
-		aic = arq->io_context->aic;
-		if (aic) {
-			WARN_ON(!atomic_read(&aic->nr_dispatched));
-			atomic_dec(&aic->nr_dispatched);
-		}
-	}
-}
-
-/*
- * as_remove_request is called when a driver has finished with a request.
- * This should be only called for dispatched requests, but for some reason
- * a POWER4 box running hwscan it does not.
- */
-static void as_remove_request(request_queue_t *q, struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-
-	if (unlikely(arq->state == AS_RQ_NEW))
-		goto out;
-
-	if (ON_RB(&arq->rb_node)) {
-		if (arq->state != AS_RQ_QUEUED) {
-			printk("arq->state %d\n", arq->state);
-			WARN_ON(1);
-			goto out;
-		}
-		/*
-		 * We'll lose the aliased request(s) here. I don't think this
-		 * will ever happen, but if it does, hopefully someone will
-		 * report it.
-		 */
-		WARN_ON(!list_empty(&rq->queuelist));
-		as_remove_queued_request(q, rq);
-	} else {
-		if (arq->state != AS_RQ_DISPATCHED) {
-			printk("arq->state %d\n", arq->state);
-			WARN_ON(1);
-			goto out;
-		}
-		as_remove_dispatched_request(q, rq);
-	}
-out:
-	arq->state = AS_RQ_REMOVED;
-}
-
-/*
  * as_fifo_expired returns 0 if there are no expired reads on the fifo,
  * 1 otherwise.  It is ratelimited so that we only perform the check once per
  * `fifo_expire' interval.  Otherwise a large number of expired requests
@@ -1165,7 +1088,6 @@ static inline int as_batch_expired(struct as_data *ad)
 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
-	struct list_head *insert;
 	const int data_dir = arq->is_sync;
 
 	BUG_ON(!ON_RB(&arq->rb_node));
@@ -1198,13 +1120,13 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	/*
 	 * take it off the sort and fifo list, add to dispatch queue
 	 */
-	insert = ad->dispatch->prev;
-
 	while (!list_empty(&rq->queuelist)) {
 		struct request *__rq = list_entry_rq(rq->queuelist.next);
 		struct as_rq *__arq = RQ_DATA(__rq);
 
-		list_move_tail(&__rq->queuelist, ad->dispatch);
+		list_del(&__rq->queuelist);
+
+		elv_dispatch_add_tail(ad->q, __rq);
 
 		if (__arq->io_context && __arq->io_context->aic)
 			atomic_inc(&__arq->io_context->aic->nr_dispatched);
@@ -1218,7 +1140,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	as_remove_queued_request(ad->q, rq);
 	WARN_ON(arq->state != AS_RQ_QUEUED);
 
-	list_add(&rq->queuelist, insert);
+	elv_dispatch_sort(ad->q, rq);
+
 	arq->state = AS_RQ_DISPATCHED;
 	if (arq->io_context && arq->io_context->aic)
 		atomic_inc(&arq->io_context->aic->nr_dispatched);
@@ -1230,12 +1153,42 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
  * read/write expire, batch expire, etc, and moves it to the dispatch
  * queue. Returns 1 if a request was found, 0 otherwise.
  */
-static int as_dispatch_request(struct as_data *ad)
+static int as_dispatch_request(request_queue_t *q, int force)
 {
+	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq;
 	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
 	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
 
+	if (unlikely(force)) {
+		/*
+		 * Forced dispatch, accounting is useless.  Reset
+		 * accounting states and dump fifo_lists.  Note that
+		 * batch_data_dir is reset to REQ_SYNC to avoid
+		 * screwing write batch accounting as write batch
+		 * accounting occurs on W->R transition.
+		 */
+		int dispatched = 0;
+
+		ad->batch_data_dir = REQ_SYNC;
+		ad->changed_batch = 0;
+		ad->new_batch = 0;
+
+		while (ad->next_arq[REQ_SYNC]) {
+			as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+			dispatched++;
+		}
+		ad->last_check_fifo[REQ_SYNC] = jiffies;
+
+		while (ad->next_arq[REQ_ASYNC]) {
+			as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+			dispatched++;
+		}
+		ad->last_check_fifo[REQ_ASYNC] = jiffies;
+
+		return dispatched;
+	}
+
 	/* Signal that the write batch was uncontended, so we can't time it */
 	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
 		if (ad->current_write_count == 0 || !writes)
@@ -1359,20 +1312,6 @@ fifo_expired:
 	return 1;
 }
 
-static struct request *as_next_request(request_queue_t *q)
-{
-	struct as_data *ad = q->elevator->elevator_data;
-	struct request *rq = NULL;
-
-	/*
-	 * if there are still requests on the dispatch queue, grab the first
-	 */
-	if (!list_empty(ad->dispatch) || as_dispatch_request(ad))
-		rq = list_entry_rq(ad->dispatch->next);
-
-	return rq;
-}
-
 /*
  * Add arq to a list behind alias
  */
@@ -1404,17 +1343,25 @@ as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alia
 	/*
 	 * Don't want to have to handle merges.
 	 */
-	as_remove_merge_hints(ad->q, arq);
+	as_del_arq_hash(arq);
 }
 
 /*
  * add arq to rbtree and fifo
  */
-static void as_add_request(struct as_data *ad, struct as_rq *arq)
+static void as_add_request(request_queue_t *q, struct request *rq)
 {
+	struct as_data *ad = q->elevator->elevator_data;
+	struct as_rq *arq = RQ_DATA(rq);
 	struct as_rq *alias;
 	int data_dir;
 
+	if (arq->state != AS_RQ_PRESCHED) {
+		printk("arq->state: %d\n", arq->state);
+		WARN_ON(1);
+	}
+	arq->state = AS_RQ_NEW;
+
 	if (rq_data_dir(arq->request) == READ
 			|| current->flags&PF_SYNCWRITE)
 		arq->is_sync = 1;
@@ -1437,12 +1384,8 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
 		arq->expires = jiffies + ad->fifo_expire[data_dir];
 		list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
 
-		if (rq_mergeable(arq->request)) {
+		if (rq_mergeable(arq->request))
 			as_add_arq_hash(ad, arq);
-
-			if (!ad->q->last_merge)
-				ad->q->last_merge = arq->request;
-		}
 		as_update_arq(ad, arq); /* keep state machine up to date */
 
 	} else {
@@ -1463,96 +1406,24 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
 	arq->state = AS_RQ_QUEUED;
 }
 
-static void as_deactivate_request(request_queue_t *q, struct request *rq)
+static void as_activate_request(request_queue_t *q, struct request *rq)
 {
-	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(rq);
 
-	if (arq) {
-		if (arq->state == AS_RQ_REMOVED) {
-			arq->state = AS_RQ_DISPATCHED;
-			if (arq->io_context && arq->io_context->aic)
-				atomic_inc(&arq->io_context->aic->nr_dispatched);
-		}
-	} else
-		WARN_ON(blk_fs_request(rq)
-			&& (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
-
-	/* Stop anticipating - let this request get through */
-	as_antic_stop(ad);
-}
-
-/*
- * requeue the request. The request has not been completed, nor is it a
- * new request, so don't touch accounting.
- */
-static void as_requeue_request(request_queue_t *q, struct request *rq)
-{
-	as_deactivate_request(q, rq);
-	list_add(&rq->queuelist, &q->queue_head);
-}
-
-/*
- * Account a request that is inserted directly onto the dispatch queue.
- * arq->io_context->aic->nr_dispatched should not need to be incremented
- * because only new requests should come through here: requeues go through
- * our explicit requeue handler.
- */
-static void as_account_queued_request(struct as_data *ad, struct request *rq)
-{
-	if (blk_fs_request(rq)) {
-		struct as_rq *arq = RQ_DATA(rq);
-		arq->state = AS_RQ_DISPATCHED;
-		ad->nr_dispatched++;
-	}
+	WARN_ON(arq->state != AS_RQ_DISPATCHED);
+	arq->state = AS_RQ_REMOVED;
+	if (arq->io_context && arq->io_context->aic)
+		atomic_dec(&arq->io_context->aic->nr_dispatched);
 }
 
-static void
-as_insert_request(request_queue_t *q, struct request *rq, int where)
+static void as_deactivate_request(request_queue_t *q, struct request *rq)
 {
-	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(rq);
 
-	if (arq) {
-		if (arq->state != AS_RQ_PRESCHED) {
-			printk("arq->state: %d\n", arq->state);
-			WARN_ON(1);
-		}
-		arq->state = AS_RQ_NEW;
-	}
-
-	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-			&& where == ELEVATOR_INSERT_SORT)) {
-		WARN_ON(1);
-		where = ELEVATOR_INSERT_BACK;
-	}
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (ad->next_arq[REQ_SYNC])
-				as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
-
-			while (ad->next_arq[REQ_ASYNC])
-				as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
-
-			list_add_tail(&rq->queuelist, ad->dispatch);
-			as_account_queued_request(ad, rq);
-			as_antic_stop(ad);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, ad->dispatch);
-			as_account_queued_request(ad, rq);
-			as_antic_stop(ad);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			as_add_request(ad, arq);
-			break;
-		default:
-			BUG();
-			return;
-	}
+	WARN_ON(arq->state != AS_RQ_REMOVED);
+	arq->state = AS_RQ_DISPATCHED;
+	if (arq->io_context && arq->io_context->aic)
+		atomic_inc(&arq->io_context->aic->nr_dispatched);
 }
 
 /*
@@ -1565,12 +1436,8 @@ static int as_queue_empty(request_queue_t *q)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 
-	if (!list_empty(&ad->fifo_list[REQ_ASYNC])
-		|| !list_empty(&ad->fifo_list[REQ_SYNC])
-		|| !list_empty(ad->dispatch))
-			return 0;
-
-	return 1;
+	return list_empty(&ad->fifo_list[REQ_ASYNC])
+		&& list_empty(&ad->fifo_list[REQ_SYNC]);
 }
 
 static struct request *
@@ -1608,15 +1475,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	int ret;
 
 	/*
-	 * try last_merge to avoid going to hash
-	 */
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
-	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
 	__rq = as_find_arq_hash(ad, bio->bi_sector);
@@ -1644,9 +1502,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	if (rq_mergeable(__rq))
-		q->last_merge = __rq;
-out_insert:
 	if (ret) {
 		if (rq_mergeable(__rq))
 			as_hot_arq_hash(ad, RQ_DATA(__rq));
@@ -1693,9 +1548,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
 		 * behind the disk head. We currently don't bother adjusting.
 		 */
 	}
-
-	if (arq->on_hash)
-		q->last_merge = req;
 }
 
 static void
@@ -1763,6 +1615,7 @@ as_merged_requests(request_queue_t *q, struct request *req,
 	 * kill knowledge of next, this one is a goner
 	 */
 	as_remove_queued_request(q, next);
+	as_put_io_context(anext);
 
 	anext->state = AS_RQ_MERGED;
 }
@@ -1782,7 +1635,7 @@ static void as_work_handler(void *data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (as_next_request(q))
+	if (!as_queue_empty(q))
 		q->request_fn(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -1797,7 +1650,9 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 		return;
 	}
 
-	if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) {
+	if (unlikely(arq->state != AS_RQ_POSTSCHED &&
+		     arq->state != AS_RQ_PRESCHED &&
+		     arq->state != AS_RQ_MERGED)) {
 		printk("arq->state %d\n", arq->state);
 		WARN_ON(1);
 	}
@@ -1807,7 +1662,7 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 }
 
 static int as_set_request(request_queue_t *q, struct request *rq,
-			  struct bio *bio, int gfp_mask)
+			  struct bio *bio, gfp_t gfp_mask)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1907,7 +1762,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
 	ad->sort_list[REQ_SYNC] = RB_ROOT;
 	ad->sort_list[REQ_ASYNC] = RB_ROOT;
-	ad->dispatch = &q->queue_head;
 	ad->fifo_expire[REQ_SYNC] = default_read_expire;
 	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
 	ad->antic_expire = default_antic_expire;
@@ -2072,10 +1926,9 @@ static struct elevator_type iosched_as = {
 		.elevator_merge_fn = 		as_merge,
 		.elevator_merged_fn =		as_merged_request,
 		.elevator_merge_req_fn =	as_merged_requests,
-		.elevator_next_req_fn =		as_next_request,
-		.elevator_add_req_fn =		as_insert_request,
-		.elevator_remove_req_fn =	as_remove_request,
-		.elevator_requeue_req_fn = 	as_requeue_request,
+		.elevator_dispatch_fn =		as_dispatch_request,
+		.elevator_add_req_fn =		as_add_request,
+		.elevator_activate_req_fn =	as_activate_request,
 		.elevator_deactivate_req_fn = 	as_deactivate_request,
 		.elevator_queue_empty_fn =	as_queue_empty,
 		.elevator_completed_req_fn =	as_completed_request,
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index db05a5a..22bda05 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -371,16 +371,10 @@ static int floppy_release( struct inode * inode, struct file * filp );
 
 /************************* End of Prototypes **************************/
 
-static struct timer_list motor_off_timer =
-	TIMER_INITIALIZER(fd_motor_off_timer, 0, 0);
-static struct timer_list readtrack_timer =
-	TIMER_INITIALIZER(fd_readtrack_check, 0, 0);
-
-static struct timer_list timeout_timer =
-	TIMER_INITIALIZER(fd_times_out, 0, 0);
-
-static struct timer_list fd_timer =
-	TIMER_INITIALIZER(check_change, 0, 0);
+static DEFINE_TIMER(motor_off_timer, fd_motor_off_timer, 0, 0);
+static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
+static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
+static DEFINE_TIMER(fd_timer, check_change, 0, 0);
 	
 static inline void start_motor_off_timer(void)
 {
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 418b146..486b6e1 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -47,14 +47,14 @@
 #include <linux/completion.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
-#define DRIVER_NAME "HP CISS Driver (v 2.6.6)"
-#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,6)
+#define DRIVER_NAME "HP CISS Driver (v 2.6.8)"
+#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,8)
 
 /* Embedded module documentation macros - see modules.h */
 MODULE_AUTHOR("Hewlett-Packard Company");
-MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.6");
+MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.8");
 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
-			" SA6i P600 P800 E400 E300");
+			" SA6i P600 P800 P400 P400i E200 E200i");
 MODULE_LICENSE("GPL");
 
 #include "cciss_cmd.h"
@@ -83,12 +83,22 @@ static const struct pci_device_id cciss_pci_device_id[] = {
 		0x0E11, 0x4091, 0, 0, 0},
 	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSA,
 		0x103C, 0x3225, 0, 0, 0},
-	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSB,
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
 		0x103c, 0x3223, 0, 0, 0},
 	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
-		0x103c, 0x3231, 0, 0, 0},
+		0x103c, 0x3234, 0, 0, 0},
 	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSC,
-		0x103c, 0x3233, 0, 0, 0},
+		0x103c, 0x3235, 0, 0, 0},
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
+		0x103c, 0x3211, 0, 0, 0},
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
+		0x103c, 0x3212, 0, 0, 0},
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
+		0x103c, 0x3213, 0, 0, 0},
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
+		0x103c, 0x3214, 0, 0, 0},
+	{ PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSD,
+		0x103c, 0x3215, 0, 0, 0},
 	{0,}
 };
 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
@@ -111,8 +121,13 @@ static struct board_type products[] = {
 	{ 0x40910E11, "Smart Array 6i", &SA5_access},
 	{ 0x3225103C, "Smart Array P600", &SA5_access},
 	{ 0x3223103C, "Smart Array P800", &SA5_access},
-	{ 0x3231103C, "Smart Array E400", &SA5_access},
-	{ 0x3233103C, "Smart Array E300", &SA5_access},
+	{ 0x3234103C, "Smart Array P400", &SA5_access},
+	{ 0x3235103C, "Smart Array P400i", &SA5_access},
+	{ 0x3211103C, "Smart Array E200i", &SA5_access},
+	{ 0x3212103C, "Smart Array E200", &SA5_access},
+	{ 0x3213103C, "Smart Array E200i", &SA5_access},
+	{ 0x3214103C, "Smart Array E200i", &SA5_access},
+	{ 0x3215103C, "Smart Array E200i", &SA5_access},
 };
 
 /* How long to wait (in millesconds) for board to go into simple mode */
@@ -140,15 +155,26 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
 
 static int revalidate_allvol(ctlr_info_t *host);
 static int cciss_revalidate(struct gendisk *disk);
-static int deregister_disk(struct gendisk *disk);
-static int register_new_disk(ctlr_info_t *h);
+static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk);
+static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, int clear_all);
 
+static void cciss_read_capacity(int ctlr, int logvol, ReadCapdata_struct *buf,
+	int withirq, unsigned int *total_size, unsigned int *block_size);
+static void cciss_geometry_inquiry(int ctlr, int logvol,
+			int withirq, unsigned int total_size,
+			unsigned int block_size, InquiryData_struct *inq_buff,
+			drive_info_struct *drv);
 static void cciss_getgeometry(int cntl_num);
 
 static void start_io( ctlr_info_t *h);
 static int sendcmd( __u8 cmd, int ctlr, void *buff, size_t size,
 	unsigned int use_unit_num, unsigned int log_unit, __u8 page_code,
 	unsigned char *scsi3addr, int cmd_type);
+static int sendcmd_withirq(__u8	cmd, int ctlr, void *buff, size_t size,
+	unsigned int use_unit_num, unsigned int log_unit, __u8	page_code,
+	int cmd_type);
+
+static void fail_all_cmds(unsigned long ctlr);
 
 #ifdef CONFIG_PROC_FS
 static int cciss_proc_get_info(char *buffer, char **start, off_t offset, 
@@ -265,7 +291,7 @@ static int cciss_proc_get_info(char *buffer, char **start, off_t offset,
 	for(i=0; i<=h->highest_lun; i++) {
 
                 drv = &h->drv[i];
-		if (drv->block_size == 0)
+		if (drv->heads == 0)
 			continue;
 
 		vol_sz = drv->nr_blocks;
@@ -363,6 +389,8 @@ static CommandList_struct * cmd_alloc(ctlr_info_t *h, int get_from_pool)
                  	return NULL;
 		memset(c, 0, sizeof(CommandList_struct));
 
+		c->cmdindex = -1;
+
 		c->err_info = (ErrorInfo_struct *)pci_alloc_consistent(
 					h->pdev, sizeof(ErrorInfo_struct), 
 					&err_dma_handle);
@@ -393,6 +421,8 @@ static CommandList_struct * cmd_alloc(ctlr_info_t *h, int get_from_pool)
 		err_dma_handle = h->errinfo_pool_dhandle 
 					+ i*sizeof(ErrorInfo_struct);
                 h->nr_allocs++;
+
+		c->cmdindex = i;
         }
 
 	c->busaddr = (__u32) cmd_dma_handle;
@@ -453,6 +483,8 @@ static int cciss_open(struct inode *inode, struct file *filep)
 	printk(KERN_DEBUG "cciss_open %s\n", inode->i_bdev->bd_disk->disk_name);
 #endif /* CCISS_DEBUG */ 
 
+	if (host->busy_initializing || drv->busy_configuring)
+		return -EBUSY;
 	/*
 	 * Root is allowed to open raw volume zero even if it's not configured
 	 * so array config can still work. Root is also allowed to open any
@@ -796,10 +828,10 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
  		return(0);
  	}
 	case CCISS_DEREGDISK:
-		return deregister_disk(disk);
+		return rebuild_lun_table(host, disk);
 
 	case CCISS_REGNEWD:
-		return register_new_disk(host);
+		return rebuild_lun_table(host, NULL);
 
 	case CCISS_PASSTHRU:
 	{
@@ -1143,48 +1175,323 @@ static int revalidate_allvol(ctlr_info_t *host)
         return 0;
 }
 
-static int deregister_disk(struct gendisk *disk)
+/* This function will check the usage_count of the drive to be updated/added.
+ * If the usage_count is zero then the drive information will be updated and
+ * the disk will be re-registered with the kernel.  If not then it will be
+ * left alone for the next reboot.  The exception to this is disk 0 which
+ * will always be left registered with the kernel since it is also the
+ * controller node.  Any changes to disk 0 will show up on the next
+ * reboot.
+*/
+static void cciss_update_drive_info(int ctlr, int drv_index)
+  {
+	ctlr_info_t *h = hba[ctlr];
+	struct gendisk *disk;
+	ReadCapdata_struct *size_buff = NULL;
+	InquiryData_struct *inq_buff = NULL;
+	unsigned int block_size;
+	unsigned int total_size;
+	unsigned long flags = 0;
+	int ret = 0;
+
+	/* if the disk already exists then deregister it before proceeding*/
+	if (h->drv[drv_index].raid_level != -1){
+		spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+		h->drv[drv_index].busy_configuring = 1;
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		ret = deregister_disk(h->gendisk[drv_index],
+			&h->drv[drv_index], 0);
+		h->drv[drv_index].busy_configuring = 0;
+	}
+
+	/* If the disk is in use return */
+	if (ret)
+		return;
+
+
+	/* Get information about the disk and modify the driver sturcture */
+	size_buff = kmalloc(sizeof( ReadCapdata_struct), GFP_KERNEL);
+        if (size_buff == NULL)
+		goto mem_msg;
+	inq_buff = kmalloc(sizeof( InquiryData_struct), GFP_KERNEL);
+	if (inq_buff == NULL)
+		goto mem_msg;
+
+	cciss_read_capacity(ctlr, drv_index, size_buff, 1,
+		&total_size, &block_size);
+	cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
+		inq_buff, &h->drv[drv_index]);
+
+	++h->num_luns;
+	disk = h->gendisk[drv_index];
+	set_capacity(disk, h->drv[drv_index].nr_blocks);
+
+
+	/* if it's the controller it's already added */
+	if (drv_index){
+		disk->queue = blk_init_queue(do_cciss_request, &h->lock);
+
+		/* Set up queue information */
+		disk->queue->backing_dev_info.ra_pages = READ_AHEAD;
+		blk_queue_bounce_limit(disk->queue, hba[ctlr]->pdev->dma_mask);
+
+		/* This is a hardware imposed limit. */
+		blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
+
+		/* This is a limit in the driver and could be eliminated. */
+		blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
+
+		blk_queue_max_sectors(disk->queue, 512);
+
+		disk->queue->queuedata = hba[ctlr];
+
+		blk_queue_hardsect_size(disk->queue,
+			hba[ctlr]->drv[drv_index].block_size);
+
+		h->drv[drv_index].queue = disk->queue;
+		add_disk(disk);
+	}
+
+freeret:
+	kfree(size_buff);
+	kfree(inq_buff);
+	return;
+mem_msg:
+	printk(KERN_ERR "cciss: out of memory\n");
+	goto freeret;
+}
+
+/* This function will find the first index of the controllers drive array
+ * that has a -1 for the raid_level and will return that index.  This is
+ * where new drives will be added.  If the index to be returned is greater
+ * than the highest_lun index for the controller then highest_lun is set
+ * to this new index.  If there are no available indexes then -1 is returned.
+*/
+static int cciss_find_free_drive_index(int ctlr)
 {
+	int i;
+
+	for (i=0; i < CISS_MAX_LUN; i++){
+		if (hba[ctlr]->drv[i].raid_level == -1){
+			if (i > hba[ctlr]->highest_lun)
+				hba[ctlr]->highest_lun = i;
+			return i;
+		}
+	}
+	return -1;
+}
+
+/* This function will add and remove logical drives from the Logical
+ * drive array of the controller and maintain persistancy of ordering
+ * so that mount points are preserved until the next reboot.  This allows
+ * for the removal of logical drives in the middle of the drive array
+ * without a re-ordering of those drives.
+ * INPUT
+ * h		= The controller to perform the operations on
+ * del_disk	= The disk to remove if specified.  If the value given
+ *		  is NULL then no disk is removed.
+*/
+static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk)
+{
+	int ctlr = h->ctlr;
+	int num_luns;
+	ReportLunData_struct *ld_buff = NULL;
+	drive_info_struct *drv = NULL;
+	int return_code;
+	int listlength = 0;
+	int i;
+	int drv_found;
+	int drv_index = 0;
+	__u32 lunid = 0;
 	unsigned long flags;
+
+	/* Set busy_configuring flag for this operation */
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->num_luns >= CISS_MAX_LUN){
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		return -EINVAL;
+	}
+
+	if (h->busy_configuring){
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		return -EBUSY;
+	}
+	h->busy_configuring = 1;
+
+	/* if del_disk is NULL then we are being called to add a new disk
+	 * and update the logical drive table.  If it is not NULL then
+	 * we will check if the disk is in use or not.
+	 */
+	if (del_disk != NULL){
+		drv = get_drv(del_disk);
+		drv->busy_configuring = 1;
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		return_code = deregister_disk(del_disk, drv, 1);
+		drv->busy_configuring = 0;
+		h->busy_configuring = 0;
+		return return_code;
+	} else {
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		if (!capable(CAP_SYS_RAWIO))
+			return -EPERM;
+
+		ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
+		if (ld_buff == NULL)
+			goto mem_msg;
+
+		return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
+				sizeof(ReportLunData_struct), 0, 0, 0,
+				TYPE_CMD);
+
+		if (return_code == IO_OK){
+			listlength |= (0xff & (unsigned int)(ld_buff->LUNListLength[0])) << 24;
+			listlength |= (0xff & (unsigned int)(ld_buff->LUNListLength[1])) << 16;
+			listlength |= (0xff & (unsigned int)(ld_buff->LUNListLength[2])) << 8;
+			listlength |= 0xff & (unsigned int)(ld_buff->LUNListLength[3]);
+		} else{ /* reading number of logical volumes failed */
+			printk(KERN_WARNING "cciss: report logical volume"
+				" command failed\n");
+			listlength = 0;
+			goto freeret;
+		}
+
+		num_luns = listlength / 8;	/* 8 bytes per entry */
+		if (num_luns > CISS_MAX_LUN){
+			num_luns = CISS_MAX_LUN;
+			printk(KERN_WARNING "cciss: more luns configured"
+				" on controller than can be handled by"
+				" this driver.\n");
+		}
+
+		/* Compare controller drive array to drivers drive array.
+	 	* Check for updates in the drive information and any new drives
+	 	* on the controller.
+	 	*/
+		for (i=0; i < num_luns; i++){
+			int j;
+
+			drv_found = 0;
+
+	  		lunid = (0xff &
+				(unsigned int)(ld_buff->LUN[i][3])) << 24;
+        		lunid |= (0xff &
+				(unsigned int)(ld_buff->LUN[i][2])) << 16;
+        		lunid |= (0xff &
+				(unsigned int)(ld_buff->LUN[i][1])) << 8;
+        		lunid |= 0xff &
+				(unsigned int)(ld_buff->LUN[i][0]);
+
+			/* Find if the LUN is already in the drive array
+			 * of the controller.  If so then update its info
+			 * if not is use.  If it does not exist then find
+			 * the first free index and add it.
+			*/
+			for (j=0; j <= h->highest_lun; j++){
+				if (h->drv[j].LunID == lunid){
+					drv_index = j;
+					drv_found = 1;
+				}
+			}
+
+			/* check if the drive was found already in the array */
+			if (!drv_found){
+				drv_index = cciss_find_free_drive_index(ctlr);
+				if (drv_index == -1)
+					goto freeret;
+
+			}
+			h->drv[drv_index].LunID = lunid;
+			cciss_update_drive_info(ctlr, drv_index);
+		} /* end for */
+	} /* end else */
+
+freeret:
+	kfree(ld_buff);
+	h->busy_configuring = 0;
+	/* We return -1 here to tell the ACU that we have registered/updated
+	 * all of the drives that we can and to keep it from calling us
+	 * additional times.
+	*/
+	return -1;
+mem_msg:
+	printk(KERN_ERR "cciss: out of memory\n");
+	goto freeret;
+}
+
+/* This function will deregister the disk and it's queue from the
+ * kernel.  It must be called with the controller lock held and the
+ * drv structures busy_configuring flag set.  It's parameters are:
+ *
+ * disk = This is the disk to be deregistered
+ * drv  = This is the drive_info_struct associated with the disk to be
+ *        deregistered.  It contains information about the disk used
+ *        by the driver.
+ * clear_all = This flag determines whether or not the disk information
+ *             is going to be completely cleared out and the highest_lun
+ *             reset.  Sometimes we want to clear out information about
+ *             the disk in preperation for re-adding it.  In this case
+ *             the highest_lun should be left unchanged and the LunID
+ *             should not be cleared.
+*/
+static int deregister_disk(struct gendisk *disk, drive_info_struct *drv,
+			   int clear_all)
+{
 	ctlr_info_t *h = get_host(disk);
-	drive_info_struct *drv = get_drv(disk);
-	int ctlr = h->ctlr;
 
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
 
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
 	/* make sure logical volume is NOT is use */
-	if( drv->usage_count > 1) {
-		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+	if(clear_all || (h->gendisk[0] == disk)) {
+	if (drv->usage_count > 1)
                 return -EBUSY;
 	}
-	drv->usage_count++;
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+        else
+        	if( drv->usage_count > 0 )
+                	return -EBUSY;
 
-	/* invalidate the devices and deregister the disk */ 
-	if (disk->flags & GENHD_FL_UP)
+	/* invalidate the devices and deregister the disk.  If it is disk
+	 * zero do not deregister it but just zero out it's values.  This
+	 * allows us to delete disk zero but keep the controller registered.
+	*/
+	if (h->gendisk[0] != disk){
+		if (disk->flags & GENHD_FL_UP){
+			blk_cleanup_queue(disk->queue);
 		del_gendisk(disk);
+			drv->queue = NULL;
+		}
+	}
+
+	--h->num_luns;
+	/* zero out the disk size info */
+	drv->nr_blocks = 0;
+	drv->block_size = 0;
+	drv->heads = 0;
+	drv->sectors = 0;
+	drv->cylinders = 0;
+	drv->raid_level = -1;	/* This can be used as a flag variable to
+				 * indicate that this element of the drive
+				 * array is free.
+				*/
+
+	if (clear_all){
 	/* check to see if it was the last disk */
 	if (drv == h->drv + h->highest_lun) {
 		/* if so, find the new hightest lun */
 		int i, newhighest =-1;
 		for(i=0; i<h->highest_lun; i++) {
 			/* if the disk has size > 0, it is available */
-			if (h->drv[i].nr_blocks)
+				if (h->drv[i].heads)
 				newhighest = i;
 		}
 		h->highest_lun = newhighest;
-				
 	}
-	--h->num_luns;
-	/* zero out the disk size info */ 
-	drv->nr_blocks = 0;
-	drv->block_size = 0;
-	drv->cylinders = 0;
+
 	drv->LunID = 0;
+	}
 	return(0);
 }
+
 static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
 	size_t size,
 	unsigned int use_unit_num, /* 0: address the controller,
@@ -1420,8 +1727,10 @@ case CMD_HARDWARE_ERR:
 		}
 	}	
 	/* unlock the buffers from DMA */
+	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
+	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
 	pci_unmap_single( h->pdev, (dma_addr_t) buff_dma_handle.val,
-			size, PCI_DMA_BIDIRECTIONAL);
+			c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
 	cmd_free(h, c, 0);
         return(return_status);
 
@@ -1495,164 +1804,6 @@ cciss_read_capacity(int ctlr, int logvol, ReadCapdata_struct *buf,
 	return;
 }
 
-static int register_new_disk(ctlr_info_t *h)
-{
-        struct gendisk *disk;
-	int ctlr = h->ctlr;
-        int i;
-	int num_luns;
-	int logvol;
-	int new_lun_found = 0;
-	int new_lun_index = 0;
-	int free_index_found = 0;
-	int free_index = 0;
-	ReportLunData_struct *ld_buff = NULL;
-	ReadCapdata_struct *size_buff = NULL;
-	InquiryData_struct *inq_buff = NULL;
-	int return_code;
-	int listlength = 0;
-	__u32 lunid = 0;
-	unsigned int block_size;
-	unsigned int total_size;
-
-        if (!capable(CAP_SYS_RAWIO))
-                return -EPERM;
-	/* if we have no space in our disk array left to add anything */
-	if(  h->num_luns >= CISS_MAX_LUN)
-		return -EINVAL;
-	
-	ld_buff = kmalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
-	if (ld_buff == NULL)
-		goto mem_msg;
-	memset(ld_buff, 0, sizeof(ReportLunData_struct));
-	size_buff = kmalloc(sizeof( ReadCapdata_struct), GFP_KERNEL);
-        if (size_buff == NULL)
-		goto mem_msg;
-	inq_buff = kmalloc(sizeof( InquiryData_struct), GFP_KERNEL);
-        if (inq_buff == NULL)
-		goto mem_msg;
-	
-	return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff, 
-			sizeof(ReportLunData_struct), 0, 0, 0, TYPE_CMD);
-
-	if( return_code == IO_OK)
-	{
-		
-		// printk("LUN Data\n--------------------------\n");
-
-		listlength |= (0xff & (unsigned int)(ld_buff->LUNListLength[0])) << 24;
-		listlength |= (0xff & (unsigned int)(ld_buff->LUNListLength[1])) << 16;
-		listlength |= (0xff & (unsigned int)(ld_buff->LUNListLength[2])) << 8;	
-		listlength |= 0xff & (unsigned int)(ld_buff->LUNListLength[3]);
-	} else /* reading number of logical volumes failed */
-	{
-		printk(KERN_WARNING "cciss: report logical volume"
-			" command failed\n");
-		listlength = 0;
-		goto free_err;
-	}
-	num_luns = listlength / 8; // 8 bytes pre entry
-	if (num_luns > CISS_MAX_LUN)
-	{
-		num_luns = CISS_MAX_LUN;
-	}
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "Length = %x %x %x %x = %d\n", ld_buff->LUNListLength[0],
-		ld_buff->LUNListLength[1], ld_buff->LUNListLength[2],
-		ld_buff->LUNListLength[3],  num_luns);
-#endif 
-	for(i=0; i<  num_luns; i++)
-	{
-		int j;
-		int lunID_found = 0;
-
-	  	lunid = (0xff & (unsigned int)(ld_buff->LUN[i][3])) << 24;
-        	lunid |= (0xff & (unsigned int)(ld_buff->LUN[i][2])) << 16;
-        	lunid |= (0xff & (unsigned int)(ld_buff->LUN[i][1])) << 8;
-        	lunid |= 0xff & (unsigned int)(ld_buff->LUN[i][0]);
-		
- 		/* check to see if this is a new lun */ 
-		for(j=0; j <= h->highest_lun; j++)
-		{
-#ifdef CCISS_DEBUG
-			printk("Checking %d %x against %x\n", j,h->drv[j].LunID,
-						lunid);
-#endif /* CCISS_DEBUG */
-			if (h->drv[j].LunID == lunid)
-			{
-				lunID_found = 1;
-				break;
-			}
-			
-		}
-		if( lunID_found == 1)
-			continue;
-		else
-		{	/* It is the new lun we have been looking for */
-#ifdef CCISS_DEBUG
-			printk("new lun found at %d\n", i);
-#endif /* CCISS_DEBUG */
-			new_lun_index = i;
-			new_lun_found = 1;
-			break;	
-		}
-	 }
-	 if (!new_lun_found)
-	 {
-		printk(KERN_WARNING "cciss:  New Logical Volume not found\n");
-		goto free_err;
-	 }
-	 /* Now find the free index 	*/
-	for(i=0; i <CISS_MAX_LUN; i++)
-	{
-#ifdef CCISS_DEBUG
-		printk("Checking Index %d\n", i);
-#endif /* CCISS_DEBUG */
-		if(h->drv[i].LunID == 0)
-		{
-#ifdef CCISS_DEBUG
-			printk("free index found at %d\n", i);
-#endif /* CCISS_DEBUG */
-			free_index_found = 1;
-			free_index = i;
-			break;
-		}
-	}
-	if (!free_index_found)
-	{
-		printk(KERN_WARNING "cciss: unable to find free slot for disk\n");
-		goto free_err;
-         }
-
-	logvol = free_index;
-	h->drv[logvol].LunID = lunid;
-		/* there could be gaps in lun numbers, track hightest */
-	if(h->highest_lun < lunid)
-		h->highest_lun = logvol;
-	cciss_read_capacity(ctlr, logvol, size_buff, 1,
-		&total_size, &block_size);
-	cciss_geometry_inquiry(ctlr, logvol, 1, total_size, block_size,
-			inq_buff, &h->drv[logvol]);
-	h->drv[logvol].usage_count = 0;
-	++h->num_luns;
-	/* setup partitions per disk */
-        disk = h->gendisk[logvol];
-	set_capacity(disk, h->drv[logvol].nr_blocks);
-	/* if it's the controller it's already added */
-	if(logvol)
-		add_disk(disk);
-freeret:
-	kfree(ld_buff);
-	kfree(size_buff);
-	kfree(inq_buff);
-	return (logvol);
-mem_msg:
-	printk(KERN_ERR "cciss: out of memory\n");
-free_err:
-	logvol = -1;
-	goto freeret;
-}
-
 static int cciss_revalidate(struct gendisk *disk)
 {
 	ctlr_info_t *h = get_host(disk);
@@ -1713,10 +1864,9 @@ static unsigned long pollcomplete(int ctlr)
 
 	for (i = 20 * HZ; i > 0; i--) {
 		done = hba[ctlr]->access.command_completed(hba[ctlr]);
-		if (done == FIFO_EMPTY) {
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(1);
-		} else
+		if (done == FIFO_EMPTY)
+			schedule_timeout_uninterruptible(1);
+		else
 			return (done);
 	}
 	/* Invalid address to tell caller we ran out of time */
@@ -1860,8 +2010,10 @@ resend_cmd1:
 		
 cleanup1:	
 	/* unlock the data buffer from DMA */
+	buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
+	buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
 	pci_unmap_single(info_p->pdev, (dma_addr_t) buff_dma_handle.val,
-				size, PCI_DMA_BIDIRECTIONAL);
+				c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
 	cmd_free(info_p, c, 1);
 	return (status);
 } 
@@ -2112,7 +2264,11 @@ queue:
 	/* fill in the request */ 
 	drv = creq->rq_disk->private_data;
 	c->Header.ReplyQueue = 0;  // unused in simple mode
-	c->Header.Tag.lower = c->busaddr;  // use the physical address the cmd block for tag
+	/* got command from pool, so use the command block index instead */
+	/* for direct lookups. */
+	/* The first 2 bits are reserved for controller error reporting. */
+	c->Header.Tag.lower = (c->cmdindex << 3);
+	c->Header.Tag.lower |= 0x04; /* flag for direct lookup. */
 	c->Header.LUN.LogDev.VolId= drv->LunID;
 	c->Header.LUN.LogDev.Mode = 1;
 	c->Request.CDBLen = 10; // 12 byte commands not in FW yet;
@@ -2187,7 +2343,7 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs)
 	ctlr_info_t *h = dev_id;
 	CommandList_struct *c;
 	unsigned long flags;
-	__u32 a, a1;
+	__u32 a, a1, a2;
 	int j;
 	int start_queue = h->next_to_run;
 
@@ -2205,10 +2361,21 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs)
 		while((a = h->access.command_completed(h)) != FIFO_EMPTY) 
 		{
 			a1 = a;
+			if ((a & 0x04)) {
+				a2 = (a >> 3);
+				if (a2 >= NR_CMDS) {
+					printk(KERN_WARNING "cciss: controller cciss%d failed, stopping.\n", h->ctlr);
+					fail_all_cmds(h->ctlr);
+					return IRQ_HANDLED;
+				}
+
+				c = h->cmd_pool + a2;
+				a = c->busaddr;
+
+			} else {
 			a &= ~3;
-			if ((c = h->cmpQ) == NULL)
-			{  
-				printk(KERN_WARNING "cciss: Completion of %08lx ignored\n", (unsigned long)a1);
+				if ((c = h->cmpQ) == NULL) {
+					printk(KERN_WARNING "cciss: Completion of %08x ignored\n", a1);
 				continue;	
 			} 
 			while(c->busaddr != a) {
@@ -2216,6 +2383,7 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs)
 				if (c == h->cmpQ) 
 					break;
 			}
+			}
 			/*
 			 * If we've found the command, take it off the
 			 * completion Q and free it
@@ -2635,12 +2803,16 @@ static void cciss_getgeometry(int cntl_num)
 #endif /* CCISS_DEBUG */
 
 	hba[cntl_num]->highest_lun = hba[cntl_num]->num_luns-1;
-	for(i=0; i<  hba[cntl_num]->num_luns; i++)
+//	for(i=0; i<  hba[cntl_num]->num_luns; i++)
+	for(i=0; i < CISS_MAX_LUN; i++)
 	{
-
-	  	lunid = (0xff & (unsigned int)(ld_buff->LUN[i][3])) << 24;
-        	lunid |= (0xff & (unsigned int)(ld_buff->LUN[i][2])) << 16;
-        	lunid |= (0xff & (unsigned int)(ld_buff->LUN[i][1])) << 8;
+		if (i < hba[cntl_num]->num_luns){
+		  	lunid = (0xff & (unsigned int)(ld_buff->LUN[i][3]))
+				 << 24;
+        		lunid |= (0xff & (unsigned int)(ld_buff->LUN[i][2]))
+				 << 16;
+        		lunid |= (0xff & (unsigned int)(ld_buff->LUN[i][1]))
+				 << 8;
         	lunid |= 0xff & (unsigned int)(ld_buff->LUN[i][0]);
 		
 		hba[cntl_num]->drv[i].LunID = lunid;
@@ -2648,13 +2820,18 @@ static void cciss_getgeometry(int cntl_num)
 
 #ifdef CCISS_DEBUG
 	  	printk(KERN_DEBUG "LUN[%d]:  %x %x %x %x = %x\n", i, 
-		ld_buff->LUN[i][0], ld_buff->LUN[i][1],ld_buff->LUN[i][2], 
-		ld_buff->LUN[i][3], hba[cntl_num]->drv[i].LunID);
+			ld_buff->LUN[i][0], ld_buff->LUN[i][1],
+			ld_buff->LUN[i][2], ld_buff->LUN[i][3],
+			hba[cntl_num]->drv[i].LunID);
 #endif /* CCISS_DEBUG */
 		cciss_read_capacity(cntl_num, i, size_buff, 0,
 			&total_size, &block_size);
-		cciss_geometry_inquiry(cntl_num, i, 0, total_size, block_size,
-			inq_buff, &hba[cntl_num]->drv[i]);
+			cciss_geometry_inquiry(cntl_num, i, 0, total_size,
+				block_size, inq_buff, &hba[cntl_num]->drv[i]);
+		} else {
+			/* initialize raid_level to indicate a free space */
+			hba[cntl_num]->drv[i].raid_level = -1;
+		}
 	}
 	kfree(ld_buff);
 	kfree(size_buff);
@@ -2728,6 +2905,9 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	i = alloc_cciss_hba();
 	if(i < 0)
 		return (-1);
+
+	hba[i]->busy_initializing = 1;
+
 	if (cciss_pci_init(hba[i], pdev) != 0)
 		goto clean1;
 
@@ -2808,6 +2988,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
 
 	cciss_procinit(i);
+	hba[i]->busy_initializing = 0;
 
 	for(j=0; j < NWD; j++) { /* mfm */
 		drive_info_struct *drv = &(hba[i]->drv[j]);
@@ -2870,6 +3051,7 @@ clean2:
 clean1:
 	release_io_mem(hba[i]);
 	free_hba(i);
+	hba[i]->busy_initializing = 0;
 	return(-1);
 }
 
@@ -2914,9 +3096,10 @@ static void __devexit cciss_remove_one (struct pci_dev *pdev)
 	/* remove it from the disk list */
 	for (j = 0; j < NWD; j++) {
 		struct gendisk *disk = hba[i]->gendisk[j];
-		if (disk->flags & GENHD_FL_UP)
-			blk_cleanup_queue(disk->queue);
+		if (disk->flags & GENHD_FL_UP) {
 			del_gendisk(disk);
+			blk_cleanup_queue(disk->queue);
+		}
 	}
 
 	pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct),
@@ -2965,5 +3148,43 @@ static void __exit cciss_cleanup(void)
 	remove_proc_entry("cciss", proc_root_driver);
 }
 
+static void fail_all_cmds(unsigned long ctlr)
+{
+	/* If we get here, the board is apparently dead. */
+	ctlr_info_t *h = hba[ctlr];
+	CommandList_struct *c;
+	unsigned long flags;
+
+	printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
+	h->alive = 0;	/* the controller apparently died... */
+
+	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+
+	pci_disable_device(h->pdev); /* Make sure it is really dead. */
+
+	/* move everything off the request queue onto the completed queue */
+	while( (c = h->reqQ) != NULL ) {
+		removeQ(&(h->reqQ), c);
+		h->Qdepth--;
+		addQ (&(h->cmpQ), c);
+	}
+
+	/* Now, fail everything on the completed queue with a HW error */
+	while( (c = h->cmpQ) != NULL ) {
+		removeQ(&h->cmpQ, c);
+		c->err_info->CommandStatus = CMD_HARDWARE_ERR;
+		if (c->cmd_type == CMD_RWREQ) {
+			complete_command(h, c, 0);
+		} else if (c->cmd_type == CMD_IOCTL_PEND)
+			complete(c->waiting);
+#ifdef CONFIG_CISS_SCSI_TAPE
+			else if (c->cmd_type == CMD_SCSI)
+				complete_scsi_command(c, 0, 0);
+#endif
+	}
+	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+	return;
+}
+
 module_init(cciss_init);
 module_exit(cciss_cleanup);
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 566587d..ef277ba 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -35,7 +35,13 @@ typedef struct _drive_info_struct
 	int 	heads;
 	int	sectors;
 	int 	cylinders;
-	int	raid_level;
+	int	raid_level; /* set to -1 to indicate that
+			     * the drive is not in use/configured
+			    */
+	int	busy_configuring; /*This is set when the drive is being removed
+				   *to prevent it from being opened or it's queue
+				   *from being started.
+				  */
 } drive_info_struct;
 
 struct ctlr_info 
@@ -83,6 +89,7 @@ struct ctlr_info
 	int			nr_allocs;
 	int			nr_frees; 
 	int			busy_configuring;
+	int			busy_initializing;
 
 	/* This element holds the zero based queue number of the last
 	 * queue to be started.  It is used for fairness.
@@ -94,6 +101,7 @@ struct ctlr_info
 #ifdef CONFIG_CISS_SCSI_TAPE
 	void *scsi_ctlr; /* ptr to structure containing scsi related stuff */
 #endif
+	unsigned char alive;
 };
 
 /*  Defining the diffent access_menthods */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index a88a888..53fea54 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -226,6 +226,10 @@ typedef struct _ErrorInfo_struct {
 #define CMD_MSG_DONE	0x04
 #define CMD_MSG_TIMEOUT 0x05
 
+/* This structure needs to be divisible by 8 for new
+ * indexing method.
+ */
+#define PADSIZE (sizeof(long) - 4)
 typedef struct _CommandList_struct {
   CommandListHeader_struct Header;
   RequestBlock_struct      Request;
@@ -236,14 +240,14 @@ typedef struct _CommandList_struct {
   ErrorInfo_struct * 	   err_info; /* pointer to the allocated mem */ 
   int			   ctlr;
   int			   cmd_type; 
+  long			   cmdindex;
   struct _CommandList_struct *prev;
   struct _CommandList_struct *next;
   struct request *	   rq;
   struct completion *waiting;
   int	 retry_count;
-#ifdef CONFIG_CISS_SCSI_TAPE
   void * scsi_cmd;
-#endif
+  char   pad[PADSIZE];
 } CommandList_struct;
 
 //Configuration Table Structure
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index f16e3ca..e183a3e 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -93,6 +93,7 @@ struct cciss_scsi_cmd_stack_elem_t {
 	CommandList_struct cmd;
 	ErrorInfo_struct Err;
 	__u32 busaddr;
+	__u32 pad;
 };
 
 #pragma pack()
@@ -877,7 +878,7 @@ cciss_scsi_interpret_error(CommandList_struct *cp)
 
 static int
 cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, 
-		 InquiryData_struct *buf)
+		 unsigned char *buf, unsigned char bufsize)
 {
 	int rc;
 	CommandList_struct *cp;
@@ -900,11 +901,10 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr,
 	cdb[1] = 0;
 	cdb[2] = 0;
 	cdb[3] = 0;
-	cdb[4] = sizeof(*buf) & 0xff;
+	cdb[4] = bufsize;
 	cdb[5] = 0;
 	rc = cciss_scsi_do_simple_cmd(c, cp, scsi3addr, cdb, 
-				6, (unsigned char *) buf, 
-				sizeof(*buf), XFER_READ);
+				6, buf, bufsize, XFER_READ);
 
 	if (rc != 0) return rc; /* something went wrong */
 
@@ -1000,9 +1000,10 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
 	   that though.  
 
 	 */
-
+#define OBDR_TAPE_INQ_SIZE 49
+#define OBDR_TAPE_SIG "$DR-10"
 	ReportLunData_struct *ld_buff;
-	InquiryData_struct *inq_buff;
+	unsigned char *inq_buff;
 	unsigned char scsi3addr[8];
 	ctlr_info_t *c;
 	__u32 num_luns=0;
@@ -1020,7 +1021,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
 		return;
 	}
 	memset(ld_buff, 0, reportlunsize);
-	inq_buff = kmalloc(sizeof( InquiryData_struct), GFP_KERNEL);
+	inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
         if (inq_buff == NULL) {
                 printk(KERN_ERR "cciss: out of memory\n");
                 kfree(ld_buff);
@@ -1051,19 +1052,36 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
 
 		/* for each physical lun, do an inquiry */
 		if (ld_buff->LUN[i][3] & 0xC0) continue;
-		memset(inq_buff, 0, sizeof(InquiryData_struct));
+		memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
 		memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
 
-		if (cciss_scsi_do_inquiry(hba[cntl_num], 
-			scsi3addr, inq_buff) != 0)
-		{
+		if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff,
+			(unsigned char) OBDR_TAPE_INQ_SIZE) != 0) {
 			/* Inquiry failed (msg printed already) */
 			devtype = 0; /* so we will skip this device. */
 		} else /* what kind of device is this? */
-			devtype = (inq_buff->data_byte[0] & 0x1f);
+			devtype = (inq_buff[0] & 0x1f);
 
 		switch (devtype)
 		{
+		  case 0x05: /* CD-ROM */ {
+
+			/* We don't *really* support actual CD-ROM devices,
+			 * just this "One Button Disaster Recovery" tape drive
+			 * which temporarily pretends to be a CD-ROM drive.
+			 * So we check that the device is really an OBDR tape
+			 * device by checking for "$DR-10" in bytes 43-48 of
+			 * the inquiry data.
+			 */
+				char obdr_sig[7];
+
+				strncpy(obdr_sig, &inq_buff[43], 6);
+				obdr_sig[6] = '\0';
+				if (strncmp(obdr_sig, OBDR_TAPE_SIG, 6) != 0)
+					/* Not OBDR device, ignore it. */
+					break;
+			}
+			/* fall through . . . */
 		  case 0x01: /* sequential access, (tape) */
 		  case 0x08: /* medium changer */
 			if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
@@ -1126,6 +1144,7 @@ cciss_scsi_proc_info(struct Scsi_Host *sh,
 
 	int buflen, datalen;
 	ctlr_info_t *ci;
+	int i;
 	int cntl_num;
 
 
@@ -1136,8 +1155,28 @@ cciss_scsi_proc_info(struct Scsi_Host *sh,
 	cntl_num = ci->ctlr;	/* Get our index into the hba[] array */
 
 	if (func == 0) {	/* User is reading from /proc/scsi/ciss*?/?*  */
-		buflen = sprintf(buffer, "hostnum=%d\n", sh->host_no); 	
-
+		buflen = sprintf(buffer, "cciss%d: SCSI host: %d\n",
+				cntl_num, sh->host_no);
+
+		/* this information is needed by apps to know which cciss
+		   device corresponds to which scsi host number without
+		   having to open a scsi target device node.  The device
+		   information is not a duplicate of /proc/scsi/scsi because
+		   the two may be out of sync due to scsi hotplug, rather
+		   this info is for an app to be able to use to know how to
+		   get them back in sync. */
+
+		for (i=0;i<ccissscsi[cntl_num].ndevices;i++) {
+			struct cciss_scsi_dev_t *sd = &ccissscsi[cntl_num].dev[i];
+			buflen += sprintf(&buffer[buflen], "c%db%dt%dl%d %02d "
+				"0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+				sh->host_no, sd->bus, sd->target, sd->lun,
+				sd->devtype,
+				sd->scsi3addr[0], sd->scsi3addr[1],
+				sd->scsi3addr[2], sd->scsi3addr[3],
+				sd->scsi3addr[4], sd->scsi3addr[5],
+				sd->scsi3addr[6], sd->scsi3addr[7]);
+		}
 		datalen = buflen - offset;
 		if (datalen < 0) { 	/* they're reading past EOF. */
 			datalen = 0;
@@ -1399,7 +1438,7 @@ cciss_proc_tape_report(int ctlr, unsigned char *buffer, off_t *pos, off_t *len)
 
 	CPQ_TAPE_LOCK(ctlr, flags);
 	size = sprintf(buffer + *len, 
-		"       Sequential access devices: %d\n\n",
+		"Sequential access devices: %d\n\n",
 			ccissscsi[ctlr].ndevices);
 	CPQ_TAPE_UNLOCK(ctlr, flags);
 	*pos += size; *len += size;
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index 30c0903..94690e4 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -84,7 +84,6 @@ static int cfq_max_depth = 2;
 	(node)->rb_left = NULL;		\
 } while (0)
 #define RB_CLEAR_ROOT(root)	((root)->rb_node = NULL)
-#define ON_RB(node)		((node)->rb_color != RB_NONE)
 #define rb_entry_crq(node)	rb_entry((node), struct cfq_rq, rb_node)
 #define rq_rb_key(rq)		(rq)->sector
 
@@ -271,10 +270,7 @@ CFQ_CFQQ_FNS(expired);
 #undef CFQ_CFQQ_FNS
 
 enum cfq_rq_state_flags {
-	CFQ_CRQ_FLAG_in_flight = 0,
-	CFQ_CRQ_FLAG_in_driver,
-	CFQ_CRQ_FLAG_is_sync,
-	CFQ_CRQ_FLAG_requeued,
+	CFQ_CRQ_FLAG_is_sync = 0,
 };
 
 #define CFQ_CRQ_FNS(name)						\
@@ -291,14 +287,11 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq)		\
 	return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0;	\
 }
 
-CFQ_CRQ_FNS(in_flight);
-CFQ_CRQ_FNS(in_driver);
 CFQ_CRQ_FNS(is_sync);
-CFQ_CRQ_FNS(requeued);
 #undef CFQ_CRQ_FNS
 
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
 static void cfq_put_cfqd(struct cfq_data *cfqd);
 
 #define process_sync(tsk)	((tsk)->flags & PF_SYNCWRITE)
@@ -311,14 +304,6 @@ static inline void cfq_del_crq_hash(struct cfq_rq *crq)
 	hlist_del_init(&crq->hash);
 }
 
-static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
-{
-	cfq_del_crq_hash(crq);
-
-	if (q->last_merge == crq->request)
-		q->last_merge = NULL;
-}
-
 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
 	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
@@ -347,18 +332,13 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
 	return NULL;
 }
 
-static inline int cfq_pending_requests(struct cfq_data *cfqd)
-{
-	return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
-}
-
 /*
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
-	if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd))
+	if (!cfqd->rq_in_driver && cfqd->busy_queues)
 		kblockd_schedule_work(&cfqd->unplug_work);
 }
 
@@ -366,7 +346,7 @@ static int cfq_queue_empty(request_queue_t *q)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 
-	return !cfq_pending_requests(cfqd);
+	return !cfqd->busy_queues;
 }
 
 /*
@@ -386,11 +366,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 	if (crq2 == NULL)
 		return crq1;
 
-	if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2))
-		return crq1;
-	else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1))
-		return crq2;
-
 	if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
 		return crq1;
 	else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
@@ -461,10 +436,7 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
 	struct rb_node *rbnext, *rbprev;
 
-	rbnext = NULL;
-	if (ON_RB(&last->rb_node))
-		rbnext = rb_next(&last->rb_node);
-	if (!rbnext) {
+	if (!(rbnext = rb_next(&last->rb_node))) {
 		rbnext = rb_first(&cfqq->sort_list);
 		if (rbnext == &last->rb_node)
 			rbnext = NULL;
@@ -545,13 +517,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
  * the pending list according to last request service
  */
 static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
 
-	cfq_resort_rr_list(cfqq, requeue);
+	cfq_resort_rr_list(cfqq, 0);
 }
 
 static inline void
@@ -571,22 +543,19 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static inline void cfq_del_crq_rb(struct cfq_rq *crq)
 {
 	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	const int sync = cfq_crq_is_sync(crq);
 
-	if (ON_RB(&crq->rb_node)) {
-		struct cfq_data *cfqd = cfqq->cfqd;
-		const int sync = cfq_crq_is_sync(crq);
+	BUG_ON(!cfqq->queued[sync]);
+	cfqq->queued[sync]--;
 
-		BUG_ON(!cfqq->queued[sync]);
-		cfqq->queued[sync]--;
+	cfq_update_next_crq(crq);
 
-		cfq_update_next_crq(crq);
+	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	RB_CLEAR_COLOR(&crq->rb_node);
 
-		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		RB_CLEAR_COLOR(&crq->rb_node);
-
-		if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
-			cfq_del_cfqq_rr(cfqd, cfqq);
-	}
+	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
+		cfq_del_cfqq_rr(cfqd, cfqq);
 }
 
 static struct cfq_rq *
@@ -627,12 +596,12 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 	 * if that happens, put the alias on the dispatch list
 	 */
 	while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
-		cfq_dispatch_sort(cfqd->queue, __alias);
+		cfq_dispatch_insert(cfqd->queue, __alias);
 
 	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
 
 	if (!cfq_cfqq_on_rr(cfqq))
-		cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq));
+		cfq_add_cfqq_rr(cfqd, cfqq);
 
 	/*
 	 * check if this request is a better next-serve candidate
@@ -643,10 +612,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 static inline void
 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
-	if (ON_RB(&crq->rb_node)) {
-		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		cfqq->queued[cfq_crq_is_sync(crq)]--;
-	}
+	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	cfqq->queued[cfq_crq_is_sync(crq)]--;
 
 	cfq_add_crq_rb(crq);
 }
@@ -676,49 +643,28 @@ out:
 	return NULL;
 }
 
-static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
+static void cfq_activate_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_rq *crq = RQ_DATA(rq);
-
-	if (crq) {
-		struct cfq_queue *cfqq = crq->cfq_queue;
-
-		if (cfq_crq_in_driver(crq)) {
-			cfq_clear_crq_in_driver(crq);
-			WARN_ON(!cfqd->rq_in_driver);
-			cfqd->rq_in_driver--;
-		}
-		if (cfq_crq_in_flight(crq)) {
-			const int sync = cfq_crq_is_sync(crq);
 
-			cfq_clear_crq_in_flight(crq);
-			WARN_ON(!cfqq->on_dispatch[sync]);
-			cfqq->on_dispatch[sync]--;
-		}
-		cfq_mark_crq_requeued(crq);
-	}
+	cfqd->rq_in_driver++;
 }
 
-/*
- * make sure the service time gets corrected on reissue of this request
- */
-static void cfq_requeue_request(request_queue_t *q, struct request *rq)
+static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
 {
-	cfq_deactivate_request(q, rq);
-	list_add(&rq->queuelist, &q->queue_head);
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+
+	WARN_ON(!cfqd->rq_in_driver);
+	cfqd->rq_in_driver--;
 }
 
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+static void cfq_remove_request(struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
 
-	if (crq) {
-		list_del_init(&rq->queuelist);
-		cfq_del_crq_rb(crq);
-		cfq_remove_merge_hints(q, crq);
-
-	}
+	list_del_init(&rq->queuelist);
+	cfq_del_crq_rb(crq);
+	cfq_del_crq_hash(crq);
 }
 
 static int
@@ -728,12 +674,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
 	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
 		ret = ELEVATOR_BACK_MERGE;
@@ -748,8 +688,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	q->last_merge = __rq;
-out_insert:
 	*req = __rq;
 	return ret;
 }
@@ -762,14 +700,12 @@ static void cfq_merged_request(request_queue_t *q, struct request *req)
 	cfq_del_crq_hash(crq);
 	cfq_add_crq_hash(cfqd, crq);
 
-	if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
+	if (rq_rb_key(req) != crq->rb_key) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
 		cfq_update_next_crq(crq);
 		cfq_reposition_crq_rb(cfqq, crq);
 	}
-
-	q->last_merge = req;
 }
 
 static void
@@ -785,7 +721,7 @@ cfq_merged_requests(request_queue_t *q, struct request *rq,
 	    time_before(next->start_time, rq->start_time))
 		list_move(&rq->queuelist, &next->queuelist);
 
-	cfq_remove_request(q, next);
+	cfq_remove_request(next);
 }
 
 static inline void
@@ -992,53 +928,15 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return 1;
 }
 
-/*
- * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
- * this function sector sorts the selected request to minimize seeks. we start
- * at cfqd->last_sector, not 0.
- */
-static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
+static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct list_head *head = &q->queue_head, *entry = head;
-	struct request *__rq;
-	sector_t last;
-
-	list_del(&crq->request->queuelist);
-
-	last = cfqd->last_sector;
-	list_for_each_entry_reverse(__rq, head, queuelist) {
-		struct cfq_rq *__crq = RQ_DATA(__rq);
-
-		if (blk_barrier_rq(__rq))
-			break;
-		if (!blk_fs_request(__rq))
-			break;
-		if (cfq_crq_requeued(__crq))
-			break;
-
-		if (__rq->sector <= crq->request->sector)
-			break;
-		if (__rq->sector > last && crq->request->sector < last) {
-			last = crq->request->sector + crq->request->nr_sectors;
-			break;
-		}
-		entry = &__rq->queuelist;
-	}
-
-	cfqd->last_sector = last;
 
 	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
-
-	cfq_del_crq_rb(crq);
-	cfq_remove_merge_hints(q, crq);
-
-	cfq_mark_crq_in_flight(crq);
-	cfq_clear_crq_requeued(crq);
-
+	cfq_remove_request(crq->request);
 	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
-	list_add_tail(&crq->request->queuelist, entry);
+	elv_dispatch_sort(q, crq->request);
 }
 
 /*
@@ -1159,7 +1057,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		/*
 		 * finally, insert request into driver dispatch list
 		 */
-		cfq_dispatch_sort(cfqd->queue, crq);
+		cfq_dispatch_insert(cfqd->queue, crq);
 
 		cfqd->dispatch_slice++;
 		dispatched++;
@@ -1194,7 +1092,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 static int
-cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
+cfq_dispatch_requests(request_queue_t *q, int force)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
@@ -1204,12 +1102,25 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 
 	cfqq = cfq_select_queue(cfqd, force);
 	if (cfqq) {
+		int max_dispatch;
+
+		/*
+		 * if idle window is disabled, allow queue buildup
+		 */
+		if (!cfq_cfqq_idle_window(cfqq) &&
+		    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
+			return 0;
+
 		cfq_clear_cfqq_must_dispatch(cfqq);
 		cfq_clear_cfqq_wait_request(cfqq);
 		del_timer(&cfqd->idle_slice_timer);
 
-		if (cfq_class_idle(cfqq))
-			max_dispatch = 1;
+		if (!force) {
+			max_dispatch = cfqd->cfq_quantum;
+			if (cfq_class_idle(cfqq))
+				max_dispatch = 1;
+		} else
+			max_dispatch = INT_MAX;
 
 		return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
 	}
@@ -1217,93 +1128,6 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 	return 0;
 }
 
-static inline void cfq_account_dispatch(struct cfq_rq *crq)
-{
-	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct cfq_data *cfqd = cfqq->cfqd;
-
-	if (unlikely(!blk_fs_request(crq->request)))
-		return;
-
-	/*
-	 * accounted bit is necessary since some drivers will call
-	 * elv_next_request() many times for the same request (eg ide)
-	 */
-	if (cfq_crq_in_driver(crq))
-		return;
-
-	cfq_mark_crq_in_driver(crq);
-	cfqd->rq_in_driver++;
-}
-
-static inline void
-cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
-{
-	struct cfq_data *cfqd = cfqq->cfqd;
-	unsigned long now;
-
-	if (!cfq_crq_in_driver(crq))
-		return;
-
-	now = jiffies;
-
-	WARN_ON(!cfqd->rq_in_driver);
-	cfqd->rq_in_driver--;
-
-	if (!cfq_class_idle(cfqq))
-		cfqd->last_end_request = now;
-
-	if (!cfq_cfqq_dispatched(cfqq)) {
-		if (cfq_cfqq_on_rr(cfqq)) {
-			cfqq->service_last = now;
-			cfq_resort_rr_list(cfqq, 0);
-		}
-		if (cfq_cfqq_expired(cfqq)) {
-			__cfq_slice_expired(cfqd, cfqq, 0);
-			cfq_schedule_dispatch(cfqd);
-		}
-	}
-
-	if (cfq_crq_is_sync(crq))
-		crq->io_context->last_end_request = now;
-}
-
-static struct request *cfq_next_request(request_queue_t *q)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct request *rq;
-
-	if (!list_empty(&q->queue_head)) {
-		struct cfq_rq *crq;
-dispatch:
-		rq = list_entry_rq(q->queue_head.next);
-
-		crq = RQ_DATA(rq);
-		if (crq) {
-			struct cfq_queue *cfqq = crq->cfq_queue;
-
-			/*
-			 * if idle window is disabled, allow queue buildup
-			 */
-			if (!cfq_crq_in_driver(crq) &&
-			    !cfq_cfqq_idle_window(cfqq) &&
-			    !blk_barrier_rq(rq) &&
-			    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
-				return NULL;
-
-			cfq_remove_merge_hints(q, crq);
-			cfq_account_dispatch(crq);
-		}
-
-		return rq;
-	}
-
-	if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0))
-		goto dispatch;
-
-	return NULL;
-}
-
 /*
  * task holds one reference to the queue, dropped when task exits. each crq
  * in-flight on this queue also holds a reference, dropped when crq is freed.
@@ -1422,7 +1246,7 @@ static void cfq_exit_io_context(struct cfq_io_context *cic)
 }
 
 static struct cfq_io_context *
-cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask)
+cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
 
@@ -1517,7 +1341,7 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 
 static struct cfq_queue *
 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
-	      int gfp_mask)
+	      gfp_t gfp_mask)
 {
 	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -1578,7 +1402,7 @@ out:
  * cfqq, so we don't need to worry about it disappearing
  */
 static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask)
+cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
 {
 	struct io_context *ioc = NULL;
 	struct cfq_io_context *cic;
@@ -1816,8 +1640,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	}
 }
 
-static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
+static void cfq_insert_request(request_queue_t *q, struct request *rq)
 {
+	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_rq *crq = RQ_DATA(rq);
 	struct cfq_queue *cfqq = crq->cfq_queue;
 
@@ -1827,66 +1652,43 @@ static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
 
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 
-	if (rq_mergeable(rq)) {
+	if (rq_mergeable(rq))
 		cfq_add_crq_hash(cfqd, crq);
 
-		if (!cfqd->queue->last_merge)
-			cfqd->queue->last_merge = rq;
-	}
-
 	cfq_crq_enqueued(cfqd, cfqq, crq);
 }
 
-static void
-cfq_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (cfq_dispatch_requests(q, INT_MAX, 1))
-				;
-			list_add_tail(&rq->queuelist, &q->queue_head);
-			/*
-			 * If we were idling with pending requests on
-			 * inactive cfqqs, force dispatching will
-			 * remove the idle timer and the queue won't
-			 * be kicked by __make_request() afterward.
-			 * Kick it here.
-			 */
-			cfq_schedule_dispatch(cfqd);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, &q->queue_head);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			cfq_enqueue(cfqd, rq);
-			break;
-		default:
-			printk("%s: bad insert point %d\n", __FUNCTION__,where);
-			return;
-	}
-}
-
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
-	struct cfq_queue *cfqq;
+	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	const int sync = cfq_crq_is_sync(crq);
+	unsigned long now;
 
-	if (unlikely(!blk_fs_request(rq)))
-		return;
+	now = jiffies;
 
-	cfqq = crq->cfq_queue;
+	WARN_ON(!cfqd->rq_in_driver);
+	WARN_ON(!cfqq->on_dispatch[sync]);
+	cfqd->rq_in_driver--;
+	cfqq->on_dispatch[sync]--;
 
-	if (cfq_crq_in_flight(crq)) {
-		const int sync = cfq_crq_is_sync(crq);
+	if (!cfq_class_idle(cfqq))
+		cfqd->last_end_request = now;
 
-		WARN_ON(!cfqq->on_dispatch[sync]);
-		cfqq->on_dispatch[sync]--;
+	if (!cfq_cfqq_dispatched(cfqq)) {
+		if (cfq_cfqq_on_rr(cfqq)) {
+			cfqq->service_last = now;
+			cfq_resort_rr_list(cfqq, 0);
+		}
+		if (cfq_cfqq_expired(cfqq)) {
+			__cfq_slice_expired(cfqd, cfqq, 0);
+			cfq_schedule_dispatch(cfqd);
+		}
 	}
 
-	cfq_account_completion(cfqq, crq);
+	if (cfq_crq_is_sync(crq))
+		crq->io_context->last_end_request = now;
 }
 
 static struct request *
@@ -2075,7 +1877,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
  */
 static int
 cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		int gfp_mask)
+		gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
@@ -2118,9 +1920,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		INIT_HLIST_NODE(&crq->hash);
 		crq->cfq_queue = cfqq;
 		crq->io_context = cic;
-		cfq_clear_crq_in_flight(crq);
-		cfq_clear_crq_in_driver(crq);
-		cfq_clear_crq_requeued(crq);
 
 		if (rw == READ || process_sync(tsk))
 			cfq_mark_crq_is_sync(crq);
@@ -2201,7 +2000,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 		 * only expire and reinvoke request handler, if there are
 		 * other queues with pending requests
 		 */
-		if (!cfq_pending_requests(cfqd)) {
+		if (!cfqd->busy_queues) {
 			cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
 			add_timer(&cfqd->idle_slice_timer);
 			goto out_cont;
@@ -2260,6 +2059,8 @@ static void cfq_put_cfqd(struct cfq_data *cfqd)
 	if (!atomic_dec_and_test(&cfqd->ref))
 		return;
 
+	blk_put_queue(q);
+
 	cfq_shutdown_timer_wq(cfqd);
 	q->elevator->elevator_data = NULL;
 
@@ -2316,6 +2117,7 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e)
 	e->elevator_data = cfqd;
 
 	cfqd->queue = q;
+	atomic_inc(&q->refcnt);
 
 	cfqd->max_queued = q->nr_requests / 4;
 	q->nr_batching = cfq_queued;
@@ -2573,10 +2375,9 @@ static struct elevator_type iosched_cfq = {
 		.elevator_merge_fn = 		cfq_merge,
 		.elevator_merged_fn =		cfq_merged_request,
 		.elevator_merge_req_fn =	cfq_merged_requests,
-		.elevator_next_req_fn =		cfq_next_request,
+		.elevator_dispatch_fn =		cfq_dispatch_requests,
 		.elevator_add_req_fn =		cfq_insert_request,
-		.elevator_remove_req_fn =	cfq_remove_request,
-		.elevator_requeue_req_fn =	cfq_requeue_request,
+		.elevator_activate_req_fn =	cfq_activate_request,
 		.elevator_deactivate_req_fn =	cfq_deactivate_request,
 		.elevator_queue_empty_fn =	cfq_queue_empty,
 		.elevator_completed_req_fn =	cfq_completed_request,
diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c
index 24594c5..7929471 100644
--- a/drivers/block/deadline-iosched.c
+++ b/drivers/block/deadline-iosched.c
@@ -50,7 +50,6 @@ struct deadline_data {
 	 * next in sort order. read, write or both are NULL
 	 */
 	struct deadline_rq *next_drq[2];
-	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
 	unsigned int batching;		/* number of sequential requests made */
 	sector_t last_sector;		/* head position */
@@ -113,15 +112,6 @@ static inline void deadline_del_drq_hash(struct deadline_rq *drq)
 		__deadline_del_drq_hash(drq);
 }
 
-static void
-deadline_remove_merge_hints(request_queue_t *q, struct deadline_rq *drq)
-{
-	deadline_del_drq_hash(drq);
-
-	if (q->last_merge == drq->request)
-		q->last_merge = NULL;
-}
-
 static inline void
 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
 {
@@ -239,10 +229,9 @@ deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
 			dd->next_drq[data_dir] = rb_entry_drq(rbnext);
 	}
 
-	if (ON_RB(&drq->rb_node)) {
-		rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-		RB_CLEAR(&drq->rb_node);
-	}
+	BUG_ON(!ON_RB(&drq->rb_node));
+	rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
+	RB_CLEAR(&drq->rb_node);
 }
 
 static struct request *
@@ -286,7 +275,7 @@ deadline_find_first_drq(struct deadline_data *dd, int data_dir)
 /*
  * add drq to rbtree and fifo
  */
-static inline void
+static void
 deadline_add_request(struct request_queue *q, struct request *rq)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
@@ -301,12 +290,8 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 	drq->expires = jiffies + dd->fifo_expire[data_dir];
 	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
 
-	if (rq_mergeable(rq)) {
+	if (rq_mergeable(rq))
 		deadline_add_drq_hash(dd, drq);
-
-		if (!q->last_merge)
-			q->last_merge = rq;
-	}
 }
 
 /*
@@ -315,14 +300,11 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
 	struct deadline_rq *drq = RQ_DATA(rq);
+	struct deadline_data *dd = q->elevator->elevator_data;
 
-	if (drq) {
-		struct deadline_data *dd = q->elevator->elevator_data;
-
-		list_del_init(&drq->fifo);
-		deadline_remove_merge_hints(q, drq);
-		deadline_del_drq_rb(dd, drq);
-	}
+	list_del_init(&drq->fifo);
+	deadline_del_drq_rb(dd, drq);
+	deadline_del_drq_hash(drq);
 }
 
 static int
@@ -333,15 +315,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	int ret;
 
 	/*
-	 * try last_merge to avoid going to hash
-	 */
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
-	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
 	__rq = deadline_find_drq_hash(dd, bio->bi_sector);
@@ -373,8 +346,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	q->last_merge = __rq;
-out_insert:
 	if (ret)
 		deadline_hot_drq_hash(dd, RQ_DATA(__rq));
 	*req = __rq;
@@ -399,8 +370,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
 		deadline_del_drq_rb(dd, drq);
 		deadline_add_drq_rb(dd, drq);
 	}
-
-	q->last_merge = req;
 }
 
 static void
@@ -452,7 +421,7 @@ deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
 	request_queue_t *q = drq->request->q;
 
 	deadline_remove_request(q, drq->request);
-	list_add_tail(&drq->request->queuelist, dd->dispatch);
+	elv_dispatch_add_tail(q, drq->request);
 }
 
 /*
@@ -502,8 +471,9 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
  * deadline_dispatch_requests selects the best request according to
  * read/write expire, fifo_batch, etc
  */
-static int deadline_dispatch_requests(struct deadline_data *dd)
+static int deadline_dispatch_requests(request_queue_t *q, int force)
 {
+	struct deadline_data *dd = q->elevator->elevator_data;
 	const int reads = !list_empty(&dd->fifo_list[READ]);
 	const int writes = !list_empty(&dd->fifo_list[WRITE]);
 	struct deadline_rq *drq;
@@ -512,7 +482,10 @@ static int deadline_dispatch_requests(struct deadline_data *dd)
 	/*
 	 * batches are currently reads XOR writes
 	 */
-	drq = dd->next_drq[WRITE] ? : dd->next_drq[READ];
+	if (dd->next_drq[WRITE])
+		drq = dd->next_drq[WRITE];
+	else
+		drq = dd->next_drq[READ];
 
 	if (drq) {
 		/* we have a "next request" */
@@ -594,65 +567,12 @@ dispatch_request:
 	return 1;
 }
 
-static struct request *deadline_next_request(request_queue_t *q)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-	struct request *rq;
-
-	/*
-	 * if there are still requests on the dispatch queue, grab the first one
-	 */
-	if (!list_empty(dd->dispatch)) {
-dispatch:
-		rq = list_entry_rq(dd->dispatch->next);
-		return rq;
-	}
-
-	if (deadline_dispatch_requests(dd))
-		goto dispatch;
-
-	return NULL;
-}
-
-static void
-deadline_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-
-	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-			&& where == ELEVATOR_INSERT_SORT))
-		where = ELEVATOR_INSERT_BACK;
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (deadline_dispatch_requests(dd))
-				;
-			list_add_tail(&rq->queuelist, dd->dispatch);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, dd->dispatch);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			deadline_add_request(q, rq);
-			break;
-		default:
-			printk("%s: bad insert point %d\n", __FUNCTION__,where);
-			return;
-	}
-}
-
 static int deadline_queue_empty(request_queue_t *q)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 
-	if (!list_empty(&dd->fifo_list[WRITE])
-	    || !list_empty(&dd->fifo_list[READ])
-	    || !list_empty(dd->dispatch))
-		return 0;
-
-	return 1;
+	return list_empty(&dd->fifo_list[WRITE])
+		&& list_empty(&dd->fifo_list[READ]);
 }
 
 static struct request *
@@ -730,7 +650,6 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 	dd->sort_list[READ] = RB_ROOT;
 	dd->sort_list[WRITE] = RB_ROOT;
-	dd->dispatch = &q->queue_head;
 	dd->fifo_expire[READ] = read_expire;
 	dd->fifo_expire[WRITE] = write_expire;
 	dd->writes_starved = writes_starved;
@@ -745,15 +664,13 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq = RQ_DATA(rq);
 
-	if (drq) {
-		mempool_free(drq, dd->drq_pool);
-		rq->elevator_private = NULL;
-	}
+	mempool_free(drq, dd->drq_pool);
+	rq->elevator_private = NULL;
 }
 
 static int
 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		     int gfp_mask)
+		     gfp_t gfp_mask)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq;
@@ -914,9 +831,8 @@ static struct elevator_type iosched_deadline = {
 		.elevator_merge_fn = 		deadline_merge,
 		.elevator_merged_fn =		deadline_merged_request,
 		.elevator_merge_req_fn =	deadline_merged_requests,
-		.elevator_next_req_fn =		deadline_next_request,
-		.elevator_add_req_fn =		deadline_insert_request,
-		.elevator_remove_req_fn =	deadline_remove_request,
+		.elevator_dispatch_fn =		deadline_dispatch_requests,
+		.elevator_add_req_fn =		deadline_add_request,
 		.elevator_queue_empty_fn =	deadline_queue_empty,
 		.elevator_former_req_fn =	deadline_former_request,
 		.elevator_latter_req_fn =	deadline_latter_request,
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index 98f0126..55621d5 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
+#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 
@@ -83,21 +84,11 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_try_merge);
 
-inline int elv_try_last_merge(request_queue_t *q, struct bio *bio)
-{
-	if (q->last_merge)
-		return elv_try_merge(q->last_merge, bio);
-
-	return ELEVATOR_NO_MERGE;
-}
-EXPORT_SYMBOL(elv_try_last_merge);
-
 static struct elevator_type *elevator_find(const char *name)
 {
 	struct elevator_type *e = NULL;
 	struct list_head *entry;
 
-	spin_lock_irq(&elv_list_lock);
 	list_for_each(entry, &elv_list) {
 		struct elevator_type *__e;
 
@@ -108,7 +99,6 @@ static struct elevator_type *elevator_find(const char *name)
 			break;
 		}
 	}
-	spin_unlock_irq(&elv_list_lock);
 
 	return e;
 }
@@ -120,12 +110,15 @@ static void elevator_put(struct elevator_type *e)
 
 static struct elevator_type *elevator_get(const char *name)
 {
-	struct elevator_type *e = elevator_find(name);
+	struct elevator_type *e;
 
-	if (!e)
-		return NULL;
-	if (!try_module_get(e->elevator_owner))
-		return NULL;
+	spin_lock_irq(&elv_list_lock);
+
+	e = elevator_find(name);
+	if (e && !try_module_get(e->elevator_owner))
+		e = NULL;
+
+	spin_unlock_irq(&elv_list_lock);
 
 	return e;
 }
@@ -139,8 +132,6 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e,
 	eq->ops = &e->ops;
 	eq->elevator_type = e;
 
-	INIT_LIST_HEAD(&q->queue_head);
-	q->last_merge = NULL;
 	q->elevator = eq;
 
 	if (eq->ops->elevator_init_fn)
@@ -153,11 +144,15 @@ static char chosen_elevator[16];
 
 static void elevator_setup_default(void)
 {
+	struct elevator_type *e;
+
 	/*
 	 * check if default is set and exists
 	 */
-	if (chosen_elevator[0] && elevator_find(chosen_elevator))
+	if (chosen_elevator[0] && (e = elevator_get(chosen_elevator))) {
+		elevator_put(e);
 		return;
+	}
 
 #if defined(CONFIG_IOSCHED_AS)
 	strcpy(chosen_elevator, "anticipatory");
@@ -186,6 +181,11 @@ int elevator_init(request_queue_t *q, char *name)
 	struct elevator_queue *eq;
 	int ret = 0;
 
+	INIT_LIST_HEAD(&q->queue_head);
+	q->last_merge = NULL;
+	q->end_sector = 0;
+	q->boundary_rq = NULL;
+
 	elevator_setup_default();
 
 	if (!name)
@@ -220,9 +220,52 @@ void elevator_exit(elevator_t *e)
 	kfree(e);
 }
 
+/*
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
+ * appended to the dispatch queue.  To be used by specific elevators.
+ */
+void elv_dispatch_sort(request_queue_t *q, struct request *rq)
+{
+	sector_t boundary;
+	struct list_head *entry;
+
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+
+	boundary = q->end_sector;
+
+	list_for_each_prev(entry, &q->queue_head) {
+		struct request *pos = list_entry_rq(entry);
+
+		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+			break;
+		if (rq->sector >= boundary) {
+			if (pos->sector < boundary)
+				continue;
+		} else {
+			if (pos->sector >= boundary)
+				break;
+		}
+		if (rq->sector >= pos->sector)
+			break;
+	}
+
+	list_add(&rq->queuelist, entry);
+}
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
+	int ret;
+
+	if (q->last_merge) {
+		ret = elv_try_merge(q->last_merge, bio);
+		if (ret != ELEVATOR_NO_MERGE) {
+			*req = q->last_merge;
+			return ret;
+		}
+	}
 
 	if (e->ops->elevator_merge_fn)
 		return e->ops->elevator_merge_fn(q, req, bio);
@@ -236,6 +279,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq)
 
 	if (e->ops->elevator_merged_fn)
 		e->ops->elevator_merged_fn(q, rq);
+
+	q->last_merge = rq;
 }
 
 void elv_merge_requests(request_queue_t *q, struct request *rq,
@@ -243,20 +288,13 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 {
 	elevator_t *e = q->elevator;
 
-	if (q->last_merge == next)
-		q->last_merge = NULL;
-
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
+
+	q->last_merge = rq;
 }
 
-/*
- * For careful internal use by the block layer. Essentially the same as
- * a requeue in that it tells the io scheduler that this request is not
- * active in the driver or hardware anymore, but we don't want the request
- * added back to the scheduler. Function is not exported.
- */
-void elv_deactivate_request(request_queue_t *q, struct request *rq)
+void elv_requeue_request(request_queue_t *q, struct request *rq)
 {
 	elevator_t *e = q->elevator;
 
@@ -264,19 +302,14 @@ void elv_deactivate_request(request_queue_t *q, struct request *rq)
 	 * it already went through dequeue, we need to decrement the
 	 * in_flight count again
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight--;
+		if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
+			e->ops->elevator_deactivate_req_fn(q, rq);
+	}
 
 	rq->flags &= ~REQ_STARTED;
 
-	if (e->ops->elevator_deactivate_req_fn)
-		e->ops->elevator_deactivate_req_fn(q, rq);
-}
-
-void elv_requeue_request(request_queue_t *q, struct request *rq)
-{
-	elv_deactivate_request(q, rq);
-
 	/*
 	 * if this is the flush, requeue the original instead and drop the flush
 	 */
@@ -285,31 +318,27 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 		rq = rq->end_io_data;
 	}
 
-	/*
-	 * the request is prepped and may have some resources allocated.
-	 * allowing unprepped requests to pass this one may cause resource
-	 * deadlock.  turn on softbarrier.
-	 */
-	rq->flags |= REQ_SOFTBARRIER;
-
-	/*
-	 * if iosched has an explicit requeue hook, then use that. otherwise
-	 * just put the request at the front of the queue
-	 */
-	if (q->elevator->ops->elevator_requeue_req_fn)
-		q->elevator->ops->elevator_requeue_req_fn(q, rq);
-	else
-		__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
 }
 
 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		       int plug)
 {
-	/*
-	 * barriers implicitly indicate back insertion
-	 */
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) &&
-	    where == ELEVATOR_INSERT_SORT)
+	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+		/*
+		 * barriers implicitly indicate back insertion
+		 */
+		if (where == ELEVATOR_INSERT_SORT)
+			where = ELEVATOR_INSERT_BACK;
+
+		/*
+		 * this request is scheduling boundary, update end_sector
+		 */
+		if (blk_fs_request(rq)) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = rq;
+		}
+	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 
 	if (plug)
@@ -317,23 +346,54 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 
 	rq->q = q;
 
-	if (!test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
-		q->elevator->ops->elevator_add_req_fn(q, rq, where);
+	switch (where) {
+	case ELEVATOR_INSERT_FRONT:
+		rq->flags |= REQ_SOFTBARRIER;
 
-		if (blk_queue_plugged(q)) {
-			int nrq = q->rq.count[READ] + q->rq.count[WRITE]
-				  - q->in_flight;
+		list_add(&rq->queuelist, &q->queue_head);
+		break;
 
-			if (nrq >= q->unplug_thresh)
-				__generic_unplug_device(q);
-		}
-	} else
+	case ELEVATOR_INSERT_BACK:
+		rq->flags |= REQ_SOFTBARRIER;
+
+		while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+			;
+		list_add_tail(&rq->queuelist, &q->queue_head);
 		/*
-		 * if drain is set, store the request "locally". when the drain
-		 * is finished, the requests will be handed ordered to the io
-		 * scheduler
+		 * We kick the queue here for the following reasons.
+		 * - The elevator might have returned NULL previously
+		 *   to delay requests and returned them now.  As the
+		 *   queue wasn't empty before this request, ll_rw_blk
+		 *   won't run the queue on return, resulting in hang.
+		 * - Usually, back inserted requests won't be merged
+		 *   with anything.  There's no point in delaying queue
+		 *   processing.
 		 */
-		list_add_tail(&rq->queuelist, &q->drain_list);
+		blk_remove_plug(q);
+		q->request_fn(q);
+		break;
+
+	case ELEVATOR_INSERT_SORT:
+		BUG_ON(!blk_fs_request(rq));
+		rq->flags |= REQ_SORTED;
+		q->elevator->ops->elevator_add_req_fn(q, rq);
+		if (q->last_merge == NULL && rq_mergeable(rq))
+			q->last_merge = rq;
+		break;
+
+	default:
+		printk(KERN_ERR "%s: bad insertion point %d\n",
+		       __FUNCTION__, where);
+		BUG();
+	}
+
+	if (blk_queue_plugged(q)) {
+		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
+			- q->in_flight;
+
+		if (nrq >= q->unplug_thresh)
+			__generic_unplug_device(q);
+	}
 }
 
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
@@ -348,13 +408,19 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
 
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
-	struct request *rq = q->elevator->ops->elevator_next_req_fn(q);
+	struct request *rq;
+
+	if (unlikely(list_empty(&q->queue_head) &&
+		     !q->elevator->ops->elevator_dispatch_fn(q, 0)))
+		return NULL;
+
+	rq = list_entry_rq(q->queue_head.next);
 
 	/*
 	 * if this is a barrier write and the device has to issue a
 	 * flush sequence to support it, check how far we are
 	 */
-	if (rq && blk_fs_request(rq) && blk_barrier_rq(rq)) {
+	if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
 		BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
 
 		if (q->ordered == QUEUE_ORDERED_FLUSH &&
@@ -371,15 +437,30 @@ struct request *elv_next_request(request_queue_t *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
-		/*
-		 * just mark as started even if we don't start it, a request
-		 * that has been delayed should not be passed by new incoming
-		 * requests
-		 */
-		rq->flags |= REQ_STARTED;
+		if (!(rq->flags & REQ_STARTED)) {
+			elevator_t *e = q->elevator;
 
-		if (rq == q->last_merge)
-			q->last_merge = NULL;
+			/*
+			 * This is the first time the device driver
+			 * sees this request (possibly after
+			 * requeueing).  Notify IO scheduler.
+			 */
+			if (blk_sorted_rq(rq) &&
+			    e->ops->elevator_activate_req_fn)
+				e->ops->elevator_activate_req_fn(q, rq);
+
+			/*
+			 * just mark as started even if we don't start
+			 * it, a request that has been delayed should
+			 * not be passed by new incoming requests
+			 */
+			rq->flags |= REQ_STARTED;
+		}
+
+		if (!q->boundary_rq || q->boundary_rq == rq) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = NULL;
+		}
 
 		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
 			break;
@@ -391,9 +472,9 @@ struct request *elv_next_request(request_queue_t *q)
 			/*
 			 * the request may have been (partially) prepped.
 			 * we need to keep this request in the front to
-			 * avoid resource deadlock.  turn on softbarrier.
+			 * avoid resource deadlock.  REQ_STARTED will
+			 * prevent other fs requests from passing this one.
 			 */
-			rq->flags |= REQ_SOFTBARRIER;
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
@@ -416,42 +497,32 @@ struct request *elv_next_request(request_queue_t *q)
 	return rq;
 }
 
-void elv_remove_request(request_queue_t *q, struct request *rq)
+void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	BUG_ON(list_empty(&rq->queuelist));
+
+	list_del_init(&rq->queuelist);
 
 	/*
 	 * the time frame between a request being removed from the lists
 	 * and to it is freed is accounted as io that is in progress at
-	 * the driver side. note that we only account requests that the
-	 * driver has seen (REQ_STARTED set), to avoid false accounting
-	 * for request-request merges
+	 * the driver side.
 	 */
 	if (blk_account_rq(rq))
 		q->in_flight++;
-
-	/*
-	 * the main clearing point for q->last_merge is on retrieval of
-	 * request by driver (it calls elv_next_request()), but it _can_
-	 * also happen here if a request is added to the queue but later
-	 * deleted without ever being given to driver (merged with another
-	 * request).
-	 */
-	if (rq == q->last_merge)
-		q->last_merge = NULL;
-
-	if (e->ops->elevator_remove_req_fn)
-		e->ops->elevator_remove_req_fn(q, rq);
 }
 
 int elv_queue_empty(request_queue_t *q)
 {
 	elevator_t *e = q->elevator;
 
+	if (!list_empty(&q->queue_head))
+		return 0;
+
 	if (e->ops->elevator_queue_empty_fn)
 		return e->ops->elevator_queue_empty_fn(q);
 
-	return list_empty(&q->queue_head);
+	return 1;
 }
 
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
@@ -487,7 +558,7 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 }
 
 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		    int gfp_mask)
+		    gfp_t gfp_mask)
 {
 	elevator_t *e = q->elevator;
 
@@ -523,11 +594,11 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
 	/*
 	 * request is released from the driver, io must be done
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight--;
-
-	if (e->ops->elevator_completed_req_fn)
-		e->ops->elevator_completed_req_fn(q, rq);
+		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
+			e->ops->elevator_completed_req_fn(q, rq);
+	}
 }
 
 int elv_register_queue(struct request_queue *q)
@@ -555,10 +626,9 @@ void elv_unregister_queue(struct request_queue *q)
 
 int elv_register(struct elevator_type *e)
 {
+	spin_lock_irq(&elv_list_lock);
 	if (elevator_find(e->elevator_name))
 		BUG();
-
-	spin_lock_irq(&elv_list_lock);
 	list_add_tail(&e->list, &elv_list);
 	spin_unlock_irq(&elv_list_lock);
 
@@ -582,25 +652,36 @@ EXPORT_SYMBOL_GPL(elv_unregister);
  * switch to new_e io scheduler. be careful not to introduce deadlocks -
  * we don't free the old io scheduler, before we have allocated what we
  * need for the new one. this way we have a chance of going back to the old
- * one, if the new one fails init for some reason. we also do an intermediate
- * switch to noop to ensure safety with stack-allocated requests, since they
- * don't originate from the block layer allocator. noop is safe here, because
- * it never needs to touch the elevator itself for completion events. DRAIN
- * flags will make sure we don't touch it for additions either.
+ * one, if the new one fails init for some reason.
  */
 static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 {
-	elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
-	struct elevator_type *noop_elevator = NULL;
-	elevator_t *old_elevator;
+	elevator_t *old_elevator, *e;
 
+	/*
+	 * Allocate new elevator
+	 */
+	e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
 	if (!e)
 		goto error;
 
 	/*
-	 * first step, drain requests from the block freelist
+	 * Turn on BYPASS and drain all requests w/ elevator private data
 	 */
-	blk_wait_queue_drained(q, 0);
+	spin_lock_irq(q->queue_lock);
+
+	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+
+	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+		;
+
+	while (q->rq.elvpriv) {
+		spin_unlock_irq(q->queue_lock);
+		msleep(10);
+		spin_lock_irq(q->queue_lock);
+	}
+
+	spin_unlock_irq(q->queue_lock);
 
 	/*
 	 * unregister old elevator data
@@ -609,18 +690,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 	old_elevator = q->elevator;
 
 	/*
- 	 * next step, switch to noop since it uses no private rq structures
-	 * and doesn't allocate any memory for anything. then wait for any
-	 * non-fs requests in-flight
- 	 */
-	noop_elevator = elevator_get("noop");
-	spin_lock_irq(q->queue_lock);
-	elevator_attach(q, noop_elevator, e);
-	spin_unlock_irq(q->queue_lock);
-
-	blk_wait_queue_drained(q, 1);
-
-	/*
 	 * attach and start new elevator
 	 */
 	if (elevator_attach(q, new_e, e))
@@ -630,11 +699,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 		goto fail_register;
 
 	/*
-	 * finally exit old elevator and start queue again
+	 * finally exit old elevator and turn off BYPASS.
 	 */
 	elevator_exit(old_elevator);
-	blk_finish_queue_drain(q);
-	elevator_put(noop_elevator);
+	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 	return;
 
 fail_register:
@@ -643,13 +711,13 @@ fail_register:
 	 * one again (along with re-adding the sysfs dir)
 	 */
 	elevator_exit(e);
+	e = NULL;
 fail:
 	q->elevator = old_elevator;
 	elv_register_queue(q);
-	blk_finish_queue_drain(q);
+	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	kfree(e);
 error:
-	if (noop_elevator)
-		elevator_put(noop_elevator);
 	elevator_put(new_e);
 	printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name);
 }
@@ -701,11 +769,12 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
 	return len;
 }
 
+EXPORT_SYMBOL(elv_dispatch_sort);
 EXPORT_SYMBOL(elv_add_request);
 EXPORT_SYMBOL(__elv_add_request);
 EXPORT_SYMBOL(elv_requeue_request);
 EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_remove_request);
+EXPORT_SYMBOL(elv_dequeue_request);
 EXPORT_SYMBOL(elv_queue_empty);
 EXPORT_SYMBOL(elv_completed_request);
 EXPORT_SYMBOL(elevator_exit);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 888dad5..0089547 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -628,7 +628,7 @@ static inline void debugt(const char *message) { }
 #endif /* DEBUGT */
 
 typedef void (*timeout_fn) (unsigned long);
-static struct timer_list fd_timeout = TIMER_INITIALIZER(floppy_shutdown, 0, 0);
+static DEFINE_TIMER(fd_timeout, floppy_shutdown, 0, 0);
 
 static const char *timeout_message;
 
@@ -1012,7 +1012,7 @@ static void schedule_bh(void (*handler) (void))
 	schedule_work(&floppy_work);
 }
 
-static struct timer_list fd_timer = TIMER_INITIALIZER(NULL, 0, 0);
+static DEFINE_TIMER(fd_timer, NULL, 0, 0);
 
 static void cancel_activity(void)
 {
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 483d71b..0af7351 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	blk_queue_activity_fn(q, NULL, NULL);
-
-	INIT_LIST_HEAD(&q->drain_list);
 }
 
 EXPORT_SYMBOL(blk_queue_make_request);
@@ -353,6 +351,8 @@ static void blk_pre_flush_end_io(struct request *flush_rq)
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
+	elv_completed_request(q, flush_rq);
+
 	rq->flags |= REQ_BAR_PREFLUSH;
 
 	if (!flush_rq->errors)
@@ -369,6 +369,8 @@ static void blk_post_flush_end_io(struct request *flush_rq)
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
+	elv_completed_request(q, flush_rq);
+
 	rq->flags |= REQ_BAR_POSTFLUSH;
 
 	q->end_flush_fn(q, flush_rq);
@@ -408,8 +410,6 @@ struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
 	if (!list_empty(&rq->queuelist))
 		blkdev_dequeue_request(rq);
 
-	elv_deactivate_request(q, rq);
-
 	flush_rq->end_io_data = rq;
 	flush_rq->end_io = blk_pre_flush_end_io;
 
@@ -1040,6 +1040,7 @@ EXPORT_SYMBOL(blk_queue_invalidate_tags);
 static char *rq_flags[] = {
 	"REQ_RW",
 	"REQ_FAILFAST",
+	"REQ_SORTED",
 	"REQ_SOFTBARRIER",
 	"REQ_HARDBARRIER",
 	"REQ_CMD",
@@ -1047,6 +1048,7 @@ static char *rq_flags[] = {
 	"REQ_STARTED",
 	"REQ_DONTPREP",
 	"REQ_QUEUED",
+	"REQ_ELVPRIV",
 	"REQ_PC",
 	"REQ_BLOCK_PC",
 	"REQ_SENSE",
@@ -1637,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q)
 
 	rl->count[READ] = rl->count[WRITE] = 0;
 	rl->starved[READ] = rl->starved[WRITE] = 0;
+	rl->elvpriv = 0;
 	init_waitqueue_head(&rl->wait[READ]);
 	init_waitqueue_head(&rl->wait[WRITE]);
-	init_waitqueue_head(&rl->drain);
 
 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, request_cachep, q->node);
@@ -1652,13 +1654,13 @@ static int blk_init_free_list(request_queue_t *q)
 
 static int __make_request(request_queue_t *, struct bio *);
 
-request_queue_t *blk_alloc_queue(int gfp_mask)
+request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
 {
 	return blk_alloc_queue_node(gfp_mask, -1);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
-request_queue_t *blk_alloc_queue_node(int gfp_mask, int node_id)
+request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	request_queue_t *q;
 
@@ -1782,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
-	elv_put_request(q, rq);
+	if (rq->flags & REQ_ELVPRIV)
+		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 
 static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
+blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
+		  int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -1800,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
 	 */
 	rq->flags = rw;
 
-	if (!elv_set_request(q, rq, bio, gfp_mask))
-		return rq;
+	if (priv) {
+		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+			mempool_free(rq, q->rq.rq_pool);
+			return NULL;
+		}
+		rq->flags |= REQ_ELVPRIV;
+	}
 
-	mempool_free(rq, q->rq.rq_pool);
-	return NULL;
+	return rq;
 }
 
 /*
@@ -1860,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw)
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(request_queue_t *q, int rw)
+static void freed_request(request_queue_t *q, int rw, int priv)
 {
 	struct request_list *rl = &q->rq;
 
 	rl->count[rw]--;
+	if (priv)
+		rl->elvpriv--;
 
 	__freed_request(q, rw);
 
 	if (unlikely(rl->starved[rw ^ 1]))
 		__freed_request(q, rw ^ 1);
-
-	if (!rl->count[READ] && !rl->count[WRITE]) {
-		smp_mb();
-		if (unlikely(waitqueue_active(&rl->drain)))
-			wake_up(&rl->drain);
-	}
 }
 
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1885,14 +1889,12 @@ static void freed_request(request_queue_t *q, int rw)
  * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
-				   int gfp_mask)
+				   gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = current_io_context(GFP_ATOMIC);
-
-	if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
-		goto out;
+	int priv;
 
 	if (rl->count[rw]+1 >= q->nr_requests) {
 		/*
@@ -1937,9 +1939,14 @@ get_rq:
 	rl->starved[rw] = 0;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, rw);
+
+	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	if (priv)
+		rl->elvpriv++;
+
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw, bio, gfp_mask);
+	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
 	if (!rq) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -1949,7 +1956,7 @@ get_rq:
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
-		freed_request(q, rw);
+		freed_request(q, rw, priv);
 
 		/*
 		 * in the very unlikely event that allocation failed and no
@@ -2019,7 +2026,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 	return rq;
 }
 
-struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
+struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 
@@ -2251,7 +2258,7 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
  * @gfp_mask:	memory allocation flags
  */
 int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
-		    unsigned int len, unsigned int gfp_mask)
+		    unsigned int len, gfp_t gfp_mask)
 {
 	struct bio *bio;
 
@@ -2373,44 +2380,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-/**
- * blkdev_scsi_issue_flush_fn - issue flush for SCSI devices
- * @q:		device queue
- * @disk:	gendisk
- * @error_sector:	error offset
- *
- * Description:
- *    Devices understanding the SCSI command set, can use this function as
- *    a helper for issuing a cache flush. Note: driver is required to store
- *    the error offset (in case of error flushing) in ->sector of struct
- *    request.
- */
-int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
-			       sector_t *error_sector)
-{
-	struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	int ret;
-
-	rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
-	rq->sector = 0;
-	memset(rq->cmd, 0, sizeof(rq->cmd));
-	rq->cmd[0] = 0x35;
-	rq->cmd_len = 12;
-	rq->data = NULL;
-	rq->data_len = 0;
-	rq->timeout = 60 * HZ;
-
-	ret = blk_execute_rq(q, disk, rq, 0);
-
-	if (ret && error_sector)
-		*error_sector = rq->sector;
-
-	blk_put_request(rq);
-	return ret;
-}
-
-EXPORT_SYMBOL(blkdev_scsi_issue_flush_fn);
-
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
 {
 	int rw = rq_data_dir(rq);
@@ -2471,13 +2440,15 @@ void disk_round_stats(struct gendisk *disk)
 {
 	unsigned long now = jiffies;
 
-	__disk_stat_add(disk, time_in_queue,
-			disk->in_flight * (now - disk->stamp));
-	disk->stamp = now;
+	if (now == disk->stamp)
+		return;
 
-	if (disk->in_flight)
-		__disk_stat_add(disk, io_ticks, (now - disk->stamp_idle));
-	disk->stamp_idle = now;
+	if (disk->in_flight) {
+		__disk_stat_add(disk, time_in_queue,
+				disk->in_flight * (now - disk->stamp));
+		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
+	}
+	disk->stamp = now;
 }
 
 /*
@@ -2492,6 +2463,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	if (unlikely(--req->ref_count))
 		return;
 
+	elv_completed_request(q, req);
+
 	req->rq_status = RQ_INACTIVE;
 	req->rl = NULL;
 
@@ -2501,26 +2474,25 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	 */
 	if (rl) {
 		int rw = rq_data_dir(req);
-
-		elv_completed_request(q, req);
+		int priv = req->flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
 
 		blk_free_request(q, req);
-		freed_request(q, rw);
+		freed_request(q, rw, priv);
 	}
 }
 
 void blk_put_request(struct request *req)
 {
+	unsigned long flags;
+	request_queue_t *q = req->q;
+
 	/*
-	 * if req->rl isn't set, this request didnt originate from the
-	 * block layer, so it's safe to just disregard it
+	 * Gee, IDE calls in w/ NULL q.  Fix IDE and remove the
+	 * following if (q) test.
 	 */
-	if (req->rl) {
-		unsigned long flags;
-		request_queue_t *q = req->q;
-
+	if (q) {
 		spin_lock_irqsave(q->queue_lock, flags);
 		__blk_put_request(q, req);
 		spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2835,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio)
 	}
 }
 
-void blk_finish_queue_drain(request_queue_t *q)
-{
-	struct request_list *rl = &q->rq;
-	struct request *rq;
-	int requeued = 0;
-
-	spin_lock_irq(q->queue_lock);
-	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
-
-	while (!list_empty(&q->drain_list)) {
-		rq = list_entry_rq(q->drain_list.next);
-
-		list_del_init(&rq->queuelist);
-		elv_requeue_request(q, rq);
-		requeued++;
-	}
-
-	if (requeued)
-		q->request_fn(q);
-
-	spin_unlock_irq(q->queue_lock);
-
-	wake_up(&rl->wait[0]);
-	wake_up(&rl->wait[1]);
-	wake_up(&rl->drain);
-}
-
-static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
-{
-	int wait = rl->count[READ] + rl->count[WRITE];
-
-	if (dispatch)
-		wait += !list_empty(&q->queue_head);
-
-	return wait;
-}
-
-/*
- * We rely on the fact that only requests allocated through blk_alloc_request()
- * have io scheduler private data structures associated with them. Any other
- * type of request (allocated on stack or through kmalloc()) should not go
- * to the io scheduler core, but be attached to the queue head instead.
- */
-void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
-{
-	struct request_list *rl = &q->rq;
-	DEFINE_WAIT(wait);
-
-	spin_lock_irq(q->queue_lock);
-	set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
-
-	while (wait_drain(q, rl, wait_dispatch)) {
-		prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
-
-		if (wait_drain(q, rl, wait_dispatch)) {
-			__generic_unplug_device(q);
-			spin_unlock_irq(q->queue_lock);
-			io_schedule();
-			spin_lock_irq(q->queue_lock);
-		}
-
-		finish_wait(&rl->drain, &wait);
-	}
-
-	spin_unlock_irq(q->queue_lock);
-}
-
-/*
- * block waiting for the io scheduler being started again.
- */
-static inline void block_wait_queue_running(request_queue_t *q)
-{
-	DEFINE_WAIT(wait);
-
-	while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
-		struct request_list *rl = &q->rq;
-
-		prepare_to_wait_exclusive(&rl->drain, &wait,
-				TASK_UNINTERRUPTIBLE);
-
-		/*
-		 * re-check the condition. avoids using prepare_to_wait()
-		 * in the fast path (queue is running)
-		 */
-		if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
-			io_schedule();
-
-		finish_wait(&rl->drain, &wait);
-	}
-}
-
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
@@ -3021,8 +2902,6 @@ end_io:
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 
-		block_wait_queue_running(q);
-
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
@@ -3431,7 +3310,7 @@ void exit_io_context(void)
  * but since the current task itself holds a reference, the context can be
  * used in general code, so long as it stays within `current` context.
  */
-struct io_context *current_io_context(int gfp_flags)
+struct io_context *current_io_context(gfp_t gfp_flags)
 {
 	struct task_struct *tsk = current;
 	struct io_context *ret;
@@ -3462,7 +3341,7 @@ EXPORT_SYMBOL(current_io_context);
  *
  * This is always called in the context of the task which submitted the I/O.
  */
-struct io_context *get_io_context(int gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags)
 {
 	struct io_context *ret;
 	ret = current_io_context(gfp_flags);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b35e088..96c664a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -881,7 +881,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
 static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 {
 	struct file *filp = lo->lo_backing_file;
-	int gfp = lo->old_gfp_mask;
+	gfp_t gfp = lo->old_gfp_mask;
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
diff --git a/drivers/block/noop-iosched.c b/drivers/block/noop-iosched.c
index b1730b6..f56b8ed 100644
--- a/drivers/block/noop-iosched.c
+++ b/drivers/block/noop-iosched.c
@@ -7,57 +7,19 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-/*
- * See if we can find a request that this buffer can be coalesced with.
- */
-static int elevator_noop_merge(request_queue_t *q, struct request **req,
-			       struct bio *bio)
-{
-	int ret;
-
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE)
-		*req = q->last_merge;
-
-	return ret;
-}
-
-static void elevator_noop_merge_requests(request_queue_t *q, struct request *req,
-					 struct request *next)
-{
-	list_del_init(&next->queuelist);
-}
-
-static void elevator_noop_add_request(request_queue_t *q, struct request *rq,
-				      int where)
+static void elevator_noop_add_request(request_queue_t *q, struct request *rq)
 {
-	if (where == ELEVATOR_INSERT_FRONT)
-		list_add(&rq->queuelist, &q->queue_head);
-	else
-		list_add_tail(&rq->queuelist, &q->queue_head);
-
-	/*
-	 * new merges must not precede this barrier
-	 */
-	if (rq->flags & REQ_HARDBARRIER)
-		q->last_merge = NULL;
-	else if (!q->last_merge)
-		q->last_merge = rq;
+	elv_dispatch_add_tail(q, rq);
 }
 
-static struct request *elevator_noop_next_request(request_queue_t *q)
+static int elevator_noop_dispatch(request_queue_t *q, int force)
 {
-	if (!list_empty(&q->queue_head))
-		return list_entry_rq(q->queue_head.next);
-
-	return NULL;
+	return 0;
 }
 
 static struct elevator_type elevator_noop = {
 	.ops = {
-		.elevator_merge_fn		= elevator_noop_merge,
-		.elevator_merge_req_fn		= elevator_noop_merge_requests,
-		.elevator_next_req_fn		= elevator_noop_next_request,
+		.elevator_dispatch_fn		= elevator_noop_dispatch,
 		.elevator_add_req_fn		= elevator_noop_add_request,
 	},
 	.elevator_name = "noop",
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 7289f67..ac5ba46 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -516,8 +516,7 @@ static int pcd_tray_move(struct cdrom_device_info *cdi, int position)
 
 static void pcd_sleep(int cs)
 {
-	current->state = TASK_INTERRUPTIBLE;
-	schedule_timeout(cs);
+	schedule_timeout_interruptible(cs);
 }
 
 static int pcd_reset(struct pcd_unit *cd)
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 060b1f2..94af920 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -507,8 +507,7 @@ static void pf_eject(struct pf_unit *pf)
 
 static void pf_sleep(int cs)
 {
-	current->state = TASK_INTERRUPTIBLE;
-	schedule_timeout(cs);
+	schedule_timeout_interruptible(cs);
 }
 
 /* the ATAPI standard actually specifies the contents of all 7 registers
@@ -751,6 +750,14 @@ static int pf_ready(void)
 
 static struct request_queue *pf_queue;
 
+static void pf_end_request(int uptodate)
+{
+	if (pf_req) {
+		end_request(pf_req, uptodate);
+		pf_req = NULL;
+	}
+}
+
 static void do_pf_request(request_queue_t * q)
 {
 	if (pf_busy)
@@ -766,7 +773,7 @@ repeat:
 	pf_count = pf_req->current_nr_sectors;
 
 	if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
-		end_request(pf_req, 0);
+		pf_end_request(0);
 		goto repeat;
 	}
 
@@ -781,7 +788,7 @@ repeat:
 		pi_do_claimed(pf_current->pi, do_pf_write);
 	else {
 		pf_busy = 0;
-		end_request(pf_req, 0);
+		pf_end_request(0);
 		goto repeat;
 	}
 }
@@ -799,9 +806,11 @@ static int pf_next_buf(void)
 	if (!pf_count)
 		return 1;
 	spin_lock_irqsave(&pf_spin_lock, saved_flags);
-	end_request(pf_req, 1);
-	pf_count = pf_req->current_nr_sectors;
-	pf_buf = pf_req->buffer;
+	pf_end_request(1);
+	if (pf_req) {
+		pf_count = pf_req->current_nr_sectors;
+		pf_buf = pf_req->buffer;
+	}
 	spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
 	return 1;
 }
@@ -811,7 +820,7 @@ static inline void next_request(int success)
 	unsigned long saved_flags;
 
 	spin_lock_irqsave(&pf_spin_lock, saved_flags);
-	end_request(pf_req, success);
+	pf_end_request(success);
 	pf_busy = 0;
 	do_pf_request(pf_queue);
 	spin_unlock_irqrestore(&pf_spin_lock, saved_flags);
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 84d8e29..b398239 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -276,8 +276,7 @@ static inline u8 DRIVE(struct pg *dev)
 
 static void pg_sleep(int cs)
 {
-	current->state = TASK_INTERRUPTIBLE;
-	schedule_timeout(cs);
+	schedule_timeout_interruptible(cs);
 }
 
 static int pg_wait(struct pg *dev, int go, int stop, unsigned long tmo, char *msg)
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 5fe8ee8..d8d3523 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -383,8 +383,7 @@ static int pt_atapi(struct pt_unit *tape, char *cmd, int dlen, char *buf, char *
 
 static void pt_sleep(int cs)
 {
-	current->state = TASK_INTERRUPTIBLE;
-	schedule_timeout(cs);
+	schedule_timeout_interruptible(cs);
 }
 
 static int pt_poll_dsc(struct pt_unit *tape, int pause, int tmo, char *msg)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7b83834..a280e67 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -5,29 +5,41 @@
  * May be copied or modified under the terms of the GNU General Public
  * License.  See linux/COPYING for more information.
  *
- * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
- * DVD-RW devices (aka an exercise in block layer masturbation)
+ * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and
+ * DVD-RAM devices.
  *
+ * Theory of operation:
  *
- * TODO: (circa order of when I will fix it)
- * - Only able to write on CD-RW media right now.
- * - check host application code on media and set it in write page
- * - interface for UDF <-> packet to negotiate a new location when a write
- *   fails.
- * - handle OPC, especially for -RW media
+ * At the lowest level, there is the standard driver for the CD/DVD device,
+ * typically ide-cd.c or sr.c. This driver can handle read and write requests,
+ * but it doesn't know anything about the special restrictions that apply to
+ * packet writing. One restriction is that write requests must be aligned to
+ * packet boundaries on the physical media, and the size of a write request
+ * must be equal to the packet size. Another restriction is that a
+ * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read
+ * command, if the previous command was a write.
  *
- * Theory of operation:
+ * The purpose of the packet writing driver is to hide these restrictions from
+ * higher layers, such as file systems, and present a block device that can be
+ * randomly read and written using 2kB-sized blocks.
+ *
+ * The lowest layer in the packet writing driver is the packet I/O scheduler.
+ * Its data is defined by the struct packet_iosched and includes two bio
+ * queues with pending read and write requests. These queues are processed
+ * by the pkt_iosched_process_queue() function. The write requests in this
+ * queue are already properly aligned and sized. This layer is responsible for
+ * issuing the flush cache commands and scheduling the I/O in a good order.
  *
- * We use a custom make_request_fn function that forwards reads directly to
- * the underlying CD device. Write requests are either attached directly to
- * a live packet_data object, or simply stored sequentially in a list for
- * later processing by the kcdrwd kernel thread. This driver doesn't use
- * any elevator functionally as defined by the elevator_s struct, but the
- * underlying CD device uses a standard elevator.
+ * The next layer transforms unaligned write requests to aligned writes. This
+ * transformation requires reading missing pieces of data from the underlying
+ * block device, assembling the pieces to full packets and queuing them to the
+ * packet I/O scheduler.
  *
- * This strategy makes it possible to do very late merging of IO requests.
- * A new bio sent to pkt_make_request can be merged with a live packet_data
- * object even if the object is in the data gathering state.
+ * At the top layer there is a custom make_request_fn function that forwards
+ * read requests directly to the iosched queue and puts write requests in the
+ * unaligned write queue. A kernel thread performs the necessary read
+ * gathering to convert the unaligned writes to aligned writes and then feeds
+ * them to the packet I/O scheduler.
  *
  *************************************************************************/
 
@@ -100,10 +112,9 @@ static struct bio *pkt_bio_alloc(int nr_iovecs)
 		goto no_bio;
 	bio_init(bio);
 
-	bvl = kmalloc(nr_iovecs * sizeof(struct bio_vec), GFP_KERNEL);
+	bvl = kcalloc(nr_iovecs, sizeof(struct bio_vec), GFP_KERNEL);
 	if (!bvl)
 		goto no_bvl;
-	memset(bvl, 0, nr_iovecs * sizeof(struct bio_vec));
 
 	bio->bi_max_vecs = nr_iovecs;
 	bio->bi_io_vec = bvl;
@@ -125,10 +136,9 @@ static struct packet_data *pkt_alloc_packet_data(void)
 	int i;
 	struct packet_data *pkt;
 
-	pkt = kmalloc(sizeof(struct packet_data), GFP_KERNEL);
+	pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL);
 	if (!pkt)
 		goto no_pkt;
-	memset(pkt, 0, sizeof(struct packet_data));
 
 	pkt->w_bio = pkt_bio_alloc(PACKET_MAX_SIZE);
 	if (!pkt->w_bio)
@@ -219,7 +229,7 @@ static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
 	return 1;
 }
 
-static void *pkt_rb_alloc(unsigned int __nocast gfp_mask, void *data)
+static void *pkt_rb_alloc(gfp_t gfp_mask, void *data)
 {
 	return kmalloc(sizeof(struct pkt_rb_node), gfp_mask);
 }
@@ -659,7 +669,6 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct page **pages, in
 		}
 		offs += CD_FRAMESIZE;
 		if (offs >= PAGE_SIZE) {
-			BUG_ON(offs > PAGE_SIZE);
 			offs = 0;
 			p++;
 		}
@@ -724,12 +733,6 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 	atomic_set(&pkt->io_wait, 0);
 	atomic_set(&pkt->io_errors, 0);
 
-	if (pkt->cache_valid) {
-		VPRINTK("pkt_gather_data: zone %llx cached\n",
-			(unsigned long long)pkt->sector);
-		goto out_account;
-	}
-
 	/*
 	 * Figure out which frames we need to read before we can write.
 	 */
@@ -738,6 +741,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 	for (bio = pkt->orig_bios; bio; bio = bio->bi_next) {
 		int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
 		int num_frames = bio->bi_size / CD_FRAMESIZE;
+		pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
 		BUG_ON(first_frame < 0);
 		BUG_ON(first_frame + num_frames > pkt->frames);
 		for (f = first_frame; f < first_frame + num_frames; f++)
@@ -745,6 +749,12 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 	}
 	spin_unlock(&pkt->lock);
 
+	if (pkt->cache_valid) {
+		VPRINTK("pkt_gather_data: zone %llx cached\n",
+			(unsigned long long)pkt->sector);
+		goto out_account;
+	}
+
 	/*
 	 * Schedule reads for missing parts of the packet.
 	 */
@@ -778,7 +788,6 @@ out_account:
 		frames_read, (unsigned long long)pkt->sector);
 	pd->stats.pkt_started++;
 	pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
-	pd->stats.secs_w += pd->settings.size;
 }
 
 /*
@@ -794,10 +803,11 @@ static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zo
 			list_del_init(&pkt->list);
 			if (pkt->sector != zone)
 				pkt->cache_valid = 0;
-			break;
+			return pkt;
 		}
 	}
-	return pkt;
+	BUG();
+	return NULL;
 }
 
 static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt)
@@ -941,12 +951,10 @@ try_next_bio:
 	}
 
 	pkt = pkt_get_packet_data(pd, zone);
-	BUG_ON(!pkt);
 
 	pd->current_sector = zone + pd->settings.size;
 	pkt->sector = zone;
 	pkt->frames = pd->settings.size >> 2;
-	BUG_ON(pkt->frames > PACKET_MAX_SIZE);
 	pkt->write_size = 0;
 
 	/*
@@ -1636,6 +1644,10 @@ static int pkt_probe_settings(struct pktcdvd_device *pd)
 		printk("pktcdvd: detected zero packet size!\n");
 		pd->settings.size = 128;
 	}
+	if (pd->settings.size > PACKET_MAX_SECTORS) {
+		printk("pktcdvd: packet size is too big\n");
+		return -ENXIO;
+	}
 	pd->settings.fp = ti.fp;
 	pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1);
 
@@ -2070,7 +2082,7 @@ static int pkt_close(struct inode *inode, struct file *file)
 }
 
 
-static void *psd_pool_alloc(unsigned int __nocast gfp_mask, void *data)
+static void *psd_pool_alloc(gfp_t gfp_mask, void *data)
 {
 	return kmalloc(sizeof(struct packet_stacked_data), gfp_mask);
 }
@@ -2198,7 +2210,6 @@ static int pkt_make_request(request_queue_t *q, struct bio *bio)
 	 * No matching packet found. Store the bio in the work queue.
 	 */
 	node = mempool_alloc(pd->rb_pool, GFP_NOIO);
-	BUG_ON(!node);
 	node->bio = bio;
 	spin_lock(&pd->lock);
 	BUG_ON(pd->bio_queue_size < 0);
@@ -2406,7 +2417,6 @@ static int pkt_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u
 	struct pktcdvd_device *pd = inode->i_bdev->bd_disk->private_data;
 
 	VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd, imajor(inode), iminor(inode));
-	BUG_ON(!pd);
 
 	switch (cmd) {
 	/*
@@ -2477,10 +2487,9 @@ static int pkt_setup_dev(struct pkt_ctrl_command *ctrl_cmd)
 		return -EBUSY;
 	}
 
-	pd = kmalloc(sizeof(struct pktcdvd_device), GFP_KERNEL);
+	pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL);
 	if (!pd)
 		return ret;
-	memset(pd, 0, sizeof(struct pktcdvd_device));
 
 	pd->rb_pool = mempool_create(PKT_RB_POOL_SIZE, pkt_rb_alloc, pkt_rb_free, NULL);
 	if (!pd->rb_pool)
diff --git a/drivers/block/ps2esdi.c b/drivers/block/ps2esdi.c
index 2954878..29d1518 100644
--- a/drivers/block/ps2esdi.c
+++ b/drivers/block/ps2esdi.c
@@ -99,8 +99,7 @@ static DECLARE_WAIT_QUEUE_HEAD(ps2esdi_int);
 static int no_int_yet;
 static int ps2esdi_drives;
 static u_short io_base;
-static struct timer_list esdi_timer =
-		TIMER_INITIALIZER(ps2esdi_reset_timer, 0, 0);
+static DEFINE_TIMER(esdi_timer, ps2esdi_reset_timer, 0, 0);
 static int reset_status;
 static int ps2esdi_slot = -1;
 static int tp720esdi = 0;	/* Is it Integrated ESDI of ThinkPad-720? */
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 145c1fb..68c60a5 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -348,7 +348,7 @@ static int rd_open(struct inode *inode, struct file *filp)
 		struct block_device *bdev = inode->i_bdev;
 		struct address_space *mapping;
 		unsigned bsize;
-		int gfp_mask;
+		gfp_t gfp_mask;
 
 		inode = igrab(bdev->bd_inode);
 		rd_bdev[unit] = bdev;
diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c
index abb2df2..382dea7 100644
--- a/drivers/block/scsi_ioctl.c
+++ b/drivers/block/scsi_ioctl.c
@@ -123,6 +123,7 @@ static int verify_command(struct file *file, unsigned char *cmd)
 		safe_for_read(READ_12),
 		safe_for_read(READ_16),
 		safe_for_read(READ_BUFFER),
+		safe_for_read(READ_DEFECT_DATA),
 		safe_for_read(READ_LONG),
 		safe_for_read(INQUIRY),
 		safe_for_read(MODE_SENSE),
@@ -167,6 +168,7 @@ static int verify_command(struct file *file, unsigned char *cmd)
 		safe_for_write(WRITE_VERIFY_12),
 		safe_for_write(WRITE_16),
 		safe_for_write(WRITE_LONG),
+		safe_for_write(WRITE_LONG_2),
 		safe_for_write(ERASE),
 		safe_for_write(GPCMD_MODE_SELECT_10),
 		safe_for_write(MODE_SELECT),
@@ -199,15 +201,15 @@ static int verify_command(struct file *file, unsigned char *cmd)
 			return 0;
 	}
 
+	/* And root can do any command.. */
+	if (capable(CAP_SYS_RAWIO))
+		return 0;
+
 	if (!type) {
 		cmd_type[cmd[0]] = CMD_WARNED;
 		printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
 	}
 
-	/* And root can do any command.. */
-	if (capable(CAP_SYS_RAWIO))
-		return 0;
-
 	/* Otherwise fail it with an "Operation not permitted" */
 	return -EPERM;
 }
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index e5f7494..e425ad3 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -834,8 +834,7 @@ static int fd_eject(struct floppy_state *fs)
 			break;
 		}
 		swim3_select(fs, RELAX);
-		current->state = TASK_INTERRUPTIBLE;
-		schedule_timeout(1);
+		schedule_timeout_interruptible(1);
 		if (swim3_readbit(fs, DISK_IN) == 0)
 			break;
 	}
@@ -906,8 +905,7 @@ static int floppy_open(struct inode *inode, struct file *filp)
 				break;
 			}
 			swim3_select(fs, RELAX);
-			current->state = TASK_INTERRUPTIBLE;
-			schedule_timeout(1);
+			schedule_timeout_interruptible(1);
 		}
 		if (err == 0 && (swim3_readbit(fs, SEEK_COMPLETE) == 0
 				 || swim3_readbit(fs, DISK_IN) == 0))
@@ -992,8 +990,7 @@ static int floppy_revalidate(struct gendisk *disk)
 		if (signal_pending(current))
 			break;
 		swim3_select(fs, RELAX);
-		current->state = TASK_INTERRUPTIBLE;
-		schedule_timeout(1);
+		schedule_timeout_interruptible(1);
 	}
 	ret = swim3_readbit(fs, SEEK_COMPLETE) == 0
 		|| swim3_readbit(fs, DISK_IN) == 0;
diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c
index a1283f6..89e3c2f 100644
--- a/drivers/block/swim_iop.c
+++ b/drivers/block/swim_iop.c
@@ -338,8 +338,7 @@ static int swimiop_eject(struct floppy_state *fs)
 			err = -EINTR;
 			break;
 		}
-		current->state = TASK_INTERRUPTIBLE;
-		schedule_timeout(1);
+		schedule_timeout_interruptible(1);
 	}
 	release_drive(fs);
 	return cmd->error;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index d57007b9..1ded3b4 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1,7 +1,7 @@
 /*
  *  sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware
  *
- *  Copyright 2004 Red Hat, Inc.
+ *  Copyright 2004-2005 Red Hat, Inc.
  *
  *  Author/maintainer:  Jeff Garzik <jgarzik@pobox.com>
  *
@@ -31,10 +31,6 @@
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
-MODULE_AUTHOR("Jeff Garzik");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Promise SATA SX8 block driver");
-
 #if 0
 #define CARM_DEBUG
 #define CARM_VERBOSE_DEBUG
@@ -45,9 +41,35 @@ MODULE_DESCRIPTION("Promise SATA SX8 block driver");
 #undef CARM_NDEBUG
 
 #define DRV_NAME "sx8"
-#define DRV_VERSION "0.8"
+#define DRV_VERSION "1.0"
 #define PFX DRV_NAME ": "
 
+MODULE_AUTHOR("Jeff Garzik");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Promise SATA SX8 block driver");
+MODULE_VERSION(DRV_VERSION);
+
+/*
+ * SX8 hardware has a single message queue for all ATA ports.
+ * When this driver was written, the hardware (firmware?) would
+ * corrupt data eventually, if more than one request was outstanding.
+ * As one can imagine, having 8 ports bottlenecking on a single
+ * command hurts performance.
+ *
+ * Based on user reports, later versions of the hardware (firmware?)
+ * seem to be able to survive with more than one command queued.
+ *
+ * Therefore, we default to the safe option -- 1 command -- but
+ * allow the user to increase this.
+ *
+ * SX8 should be able to support up to ~60 queued commands (CARM_MAX_REQ),
+ * but problems seem to occur when you exceed ~30, even on newer hardware.
+ */
+static int max_queue = 1;
+module_param(max_queue, int, 0444);
+MODULE_PARM_DESC(max_queue, "Maximum number of queued commands. (min==1, max==30, safe==1)");
+
+
 #define NEXT_RESP(idx)	((idx + 1) % RMSG_Q_LEN)
 
 /* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
@@ -90,12 +112,10 @@ enum {
 
 	/* command message queue limits */
 	CARM_MAX_REQ		= 64,	       /* max command msgs per host */
-	CARM_MAX_Q		= 1,		   /* one command at a time */
 	CARM_MSG_LOW_WATER	= (CARM_MAX_REQ / 4),	     /* refill mark */
 
 	/* S/G limits, host-wide and per-request */
 	CARM_MAX_REQ_SG		= 32,	     /* max s/g entries per request */
-	CARM_SG_BOUNDARY	= 0xffffUL,	    /* s/g segment boundary */
 	CARM_MAX_HOST_SG	= 600,		/* max s/g entries per host */
 	CARM_SG_LOW_WATER	= (CARM_MAX_HOST_SG / 4),   /* re-fill mark */
 
@@ -181,6 +201,10 @@ enum {
 	FL_DYN_MAJOR		= (1 << 17),
 };
 
+enum {
+	CARM_SG_BOUNDARY	= 0xffffUL,	    /* s/g segment boundary */
+};
+
 enum scatter_gather_types {
 	SGT_32BIT		= 0,
 	SGT_64BIT		= 1,
@@ -218,7 +242,6 @@ static const char *state_name[] = {
 
 struct carm_port {
 	unsigned int			port_no;
-	unsigned int			n_queued;
 	struct gendisk			*disk;
 	struct carm_host		*host;
 
@@ -448,7 +471,7 @@ static inline int carm_lookup_bucket(u32 msg_size)
 	for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
 		if (msg_size <= msg_sizes[i])
 			return i;
-	
+
 	return -ENOENT;
 }
 
@@ -509,7 +532,7 @@ static struct carm_request *carm_get_request(struct carm_host *host)
 	if (host->hw_sg_used >= (CARM_MAX_HOST_SG - CARM_MAX_REQ_SG))
 		return NULL;
 
-	for (i = 0; i < CARM_MAX_Q; i++)
+	for (i = 0; i < max_queue; i++)
 		if ((host->msg_alloc & (1ULL << i)) == 0) {
 			struct carm_request *crq = &host->req[i];
 			crq->port = NULL;
@@ -521,14 +544,14 @@ static struct carm_request *carm_get_request(struct carm_host *host)
 			assert(host->n_msgs <= CARM_MAX_REQ);
 			return crq;
 		}
-	
+
 	DPRINTK("no request available, returning NULL\n");
 	return NULL;
 }
 
 static int carm_put_request(struct carm_host *host, struct carm_request *crq)
 {
-	assert(crq->tag < CARM_MAX_Q);
+	assert(crq->tag < max_queue);
 
 	if (unlikely((host->msg_alloc & (1ULL << crq->tag)) == 0))
 		return -EINVAL; /* tried to clear a tag that was not active */
@@ -791,7 +814,7 @@ static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
 			int is_ok)
 {
 	carm_end_request_queued(host, crq, is_ok);
-	if (CARM_MAX_Q == 1)
+	if (max_queue == 1)
 		carm_round_robin(host);
 	else if ((host->n_msgs <= CARM_MSG_LOW_WATER) &&
 		 (host->hw_sg_used <= CARM_SG_LOW_WATER)) {
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index a026567..ed4d500 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -16,9 +16,10 @@
  *  -- verify the 13 conditions and do bulk resets
  *  -- kill last_pipe and simply do two-state clearing on both pipes
  *  -- verify protocol (bulk) from USB descriptors (maybe...)
- *  -- highmem and sg
+ *  -- highmem
  *  -- move top_sense and work_bcs into separate allocations (if they survive)
  *     for cache purists and esoteric architectures.
+ *  -- Allocate structure for LUN 0 before the first ub_sync_tur, avoid NULL. ?
  *  -- prune comments, they are too volumnous
  *  -- Exterminate P3 printks
  *  -- Resove XXX's
@@ -171,7 +172,7 @@ struct bulk_cs_wrap {
  */
 struct ub_dev;
 
-#define UB_MAX_REQ_SG	1
+#define UB_MAX_REQ_SG	9	/* cdrecord requires 32KB and maybe a header */
 #define UB_MAX_SECTORS 64
 
 /*
@@ -234,13 +235,10 @@ struct ub_scsi_cmd {
 
 	int stat_count;			/* Retries getting status. */
 
-	/*
-	 * We do not support transfers from highmem pages
-	 * because the underlying USB framework does not do what we need.
-	 */
-	char *data;			/* Requested buffer */
 	unsigned int len;		/* Requested length */
-	// struct scatterlist sgv[UB_MAX_REQ_SG];
+	unsigned int current_sg;
+	unsigned int nsg;		/* sgv[nsg] */
+	struct scatterlist sgv[UB_MAX_REQ_SG];
 
 	struct ub_lun *lun;
 	void (*done)(struct ub_dev *, struct ub_scsi_cmd *);
@@ -389,17 +387,18 @@ struct ub_dev {
 	struct bulk_cs_wrap work_bcs;
 	struct usb_ctrlrequest work_cr;
 
+	int sg_stat[6];
 	struct ub_scsi_trace tr;
 };
 
 /*
  */
 static void ub_cleanup(struct ub_dev *sc);
-static int ub_bd_rq_fn_1(struct ub_lun *lun, struct request *rq);
+static int ub_request_fn_1(struct ub_lun *lun, struct request *rq);
 static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
     struct ub_scsi_cmd *cmd, struct request *rq);
-static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
-    struct request *rq);
+static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_scsi_cmd *cmd, struct request *rq);
 static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
 static void ub_end_rq(struct request *rq, int uptodate);
 static int ub_submit_scsi(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
@@ -407,6 +406,7 @@ static void ub_urb_complete(struct urb *urb, struct pt_regs *pt);
 static void ub_scsi_action(unsigned long _dev);
 static void ub_scsi_dispatch(struct ub_dev *sc);
 static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
+static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
 static void ub_state_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd, int rc);
 static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
 static void ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd);
@@ -500,7 +500,8 @@ static void ub_cmdtr_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
 	}
 }
 
-static ssize_t ub_diag_show(struct device *dev, struct device_attribute *attr, char *page)
+static ssize_t ub_diag_show(struct device *dev, struct device_attribute *attr,
+    char *page)
 {
 	struct usb_interface *intf;
 	struct ub_dev *sc;
@@ -523,6 +524,14 @@ static ssize_t ub_diag_show(struct device *dev, struct device_attribute *attr, c
 	cnt += sprintf(page + cnt,
 	    "qlen %d qmax %d\n",
 	    sc->cmd_queue.qlen, sc->cmd_queue.qmax);
+	cnt += sprintf(page + cnt,
+	    "sg %d %d %d %d %d .. %d\n",
+	    sc->sg_stat[0],
+	    sc->sg_stat[1],
+	    sc->sg_stat[2],
+	    sc->sg_stat[3],
+	    sc->sg_stat[4],
+	    sc->sg_stat[5]);
 
 	list_for_each (p, &sc->luns) {
 		lun = list_entry(p, struct ub_lun, link);
@@ -744,20 +753,20 @@ static struct ub_scsi_cmd *ub_cmdq_pop(struct ub_dev *sc)
  * The request function is our main entry point
  */
 
-static void ub_bd_rq_fn(request_queue_t *q)
+static void ub_request_fn(request_queue_t *q)
 {
 	struct ub_lun *lun = q->queuedata;
 	struct request *rq;
 
 	while ((rq = elv_next_request(q)) != NULL) {
-		if (ub_bd_rq_fn_1(lun, rq) != 0) {
+		if (ub_request_fn_1(lun, rq) != 0) {
 			blk_stop_queue(q);
 			break;
 		}
 	}
 }
 
-static int ub_bd_rq_fn_1(struct ub_lun *lun, struct request *rq)
+static int ub_request_fn_1(struct ub_lun *lun, struct request *rq)
 {
 	struct ub_dev *sc = lun->udev;
 	struct ub_scsi_cmd *cmd;
@@ -774,9 +783,8 @@ static int ub_bd_rq_fn_1(struct ub_lun *lun, struct request *rq)
 	memset(cmd, 0, sizeof(struct ub_scsi_cmd));
 
 	blkdev_dequeue_request(rq);
-
 	if (blk_pc_request(rq)) {
-		rc = ub_cmd_build_packet(sc, cmd, rq);
+		rc = ub_cmd_build_packet(sc, lun, cmd, rq);
 	} else {
 		rc = ub_cmd_build_block(sc, lun, cmd, rq);
 	}
@@ -791,7 +799,7 @@ static int ub_bd_rq_fn_1(struct ub_lun *lun, struct request *rq)
 	cmd->back = rq;
 
 	cmd->tag = sc->tagcnt++;
-	if ((rc = ub_submit_scsi(sc, cmd)) != 0) {
+	if (ub_submit_scsi(sc, cmd) != 0) {
 		ub_put_cmd(lun, cmd);
 		ub_end_rq(rq, 0);
 		return 0;
@@ -804,58 +812,31 @@ static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
     struct ub_scsi_cmd *cmd, struct request *rq)
 {
 	int ub_dir;
-#if 0 /* We use rq->buffer for now */
-	struct scatterlist *sg;
 	int n_elem;
-#endif
 	unsigned int block, nblks;
 
 	if (rq_data_dir(rq) == WRITE)
 		ub_dir = UB_DIR_WRITE;
 	else
 		ub_dir = UB_DIR_READ;
+	cmd->dir = ub_dir;
 
 	/*
 	 * get scatterlist from block layer
 	 */
-#if 0 /* We use rq->buffer for now */
-	sg = &cmd->sgv[0];
-	n_elem = blk_rq_map_sg(q, rq, sg);
+	n_elem = blk_rq_map_sg(lun->disk->queue, rq, &cmd->sgv[0]);
 	if (n_elem <= 0) {
-		ub_put_cmd(lun, cmd);
-		ub_end_rq(rq, 0);
-		blk_start_queue(q);
-		return 0;		/* request with no s/g entries? */
+		printk(KERN_INFO "%s: failed request map (%d)\n",
+		    sc->name, n_elem); /* P3 */
+		return -1;		/* request with no s/g entries? */
 	}
-
-	if (n_elem != 1) {		/* Paranoia */
+	if (n_elem > UB_MAX_REQ_SG) {	/* Paranoia */
 		printk(KERN_WARNING "%s: request with %d segments\n",
 		    sc->name, n_elem);
-		ub_put_cmd(lun, cmd);
-		ub_end_rq(rq, 0);
-		blk_start_queue(q);
-		return 0;
-	}
-#endif
-
-	/*
-	 * XXX Unfortunately, this check does not work. It is quite possible
-	 * to get bogus non-null rq->buffer if you allow sg by mistake.
-	 */
-	if (rq->buffer == NULL) {
-		/*
-		 * This must not happen if we set the queue right.
-		 * The block level must create bounce buffers for us.
-		 */
-		static int do_print = 1;
-		if (do_print) {
-			printk(KERN_WARNING "%s: unmapped block request"
-			    " flags 0x%lx sectors %lu\n",
-			    sc->name, rq->flags, rq->nr_sectors);
-			do_print = 0;
-		}
 		return -1;
 	}
+	cmd->nsg = n_elem;
+	sc->sg_stat[n_elem < 5 ? n_elem : 5]++;
 
 	/*
 	 * build the command
@@ -876,30 +857,15 @@ static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 	cmd->cdb[8] = nblks;
 	cmd->cdb_len = 10;
 
-	cmd->dir = ub_dir;
-	cmd->data = rq->buffer;
 	cmd->len = rq->nr_sectors * 512;
 
 	return 0;
 }
 
-static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
-    struct request *rq)
+static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
+    struct ub_scsi_cmd *cmd, struct request *rq)
 {
-
-	if (rq->data_len != 0 && rq->data == NULL) {
-		static int do_print = 1;
-		if (do_print) {
-			printk(KERN_WARNING "%s: unmapped packet request"
-			    " flags 0x%lx length %d\n",
-			    sc->name, rq->flags, rq->data_len);
-			do_print = 0;
-		}
-		return -1;
-	}
-
-	memcpy(&cmd->cdb, rq->cmd, rq->cmd_len);
-	cmd->cdb_len = rq->cmd_len;
+	int n_elem;
 
 	if (rq->data_len == 0) {
 		cmd->dir = UB_DIR_NONE;
@@ -908,8 +874,29 @@ static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
 			cmd->dir = UB_DIR_WRITE;
 		else
 			cmd->dir = UB_DIR_READ;
+
+	}
+
+	/*
+	 * get scatterlist from block layer
+	 */
+	n_elem = blk_rq_map_sg(lun->disk->queue, rq, &cmd->sgv[0]);
+	if (n_elem < 0) {
+		printk(KERN_INFO "%s: failed request map (%d)\n",
+		    sc->name, n_elem); /* P3 */
+		return -1;
+	}
+	if (n_elem > UB_MAX_REQ_SG) {	/* Paranoia */
+		printk(KERN_WARNING "%s: request with %d segments\n",
+		    sc->name, n_elem);
+		return -1;
 	}
-	cmd->data = rq->data;
+	cmd->nsg = n_elem;
+	sc->sg_stat[n_elem < 5 ? n_elem : 5]++;
+
+	memcpy(&cmd->cdb, rq->cmd, rq->cmd_len);
+	cmd->cdb_len = rq->cmd_len;
+
 	cmd->len = rq->data_len;
 
 	return 0;
@@ -919,24 +906,34 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 {
 	struct request *rq = cmd->back;
 	struct ub_lun *lun = cmd->lun;
-	struct gendisk *disk = lun->disk;
-	request_queue_t *q = disk->queue;
 	int uptodate;
 
-	if (blk_pc_request(rq)) {
-		/* UB_SENSE_SIZE is smaller than SCSI_SENSE_BUFFERSIZE */
-		memcpy(rq->sense, sc->top_sense, UB_SENSE_SIZE);
-		rq->sense_len = UB_SENSE_SIZE;
-	}
-
-	if (cmd->error == 0)
+	if (cmd->error == 0) {
 		uptodate = 1;
-	else
+
+		if (blk_pc_request(rq)) {
+			if (cmd->act_len >= rq->data_len)
+				rq->data_len = 0;
+			else
+				rq->data_len -= cmd->act_len;
+		}
+	} else {
 		uptodate = 0;
 
+		if (blk_pc_request(rq)) {
+			/* UB_SENSE_SIZE is smaller than SCSI_SENSE_BUFFERSIZE */
+			memcpy(rq->sense, sc->top_sense, UB_SENSE_SIZE);
+			rq->sense_len = UB_SENSE_SIZE;
+			if (sc->top_sense[0] != 0)
+				rq->errors = SAM_STAT_CHECK_CONDITION;
+			else
+				rq->errors = DID_ERROR << 16;
+		}
+	}
+
 	ub_put_cmd(lun, cmd);
 	ub_end_rq(rq, uptodate);
-	blk_start_queue(q);
+	blk_start_queue(lun->disk->queue);
 }
 
 static void ub_end_rq(struct request *rq, int uptodate)
@@ -1014,7 +1011,6 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 	sc->last_pipe = sc->send_bulk_pipe;
 	usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->send_bulk_pipe,
 	    bcb, US_BULK_CB_WRAP_LEN, ub_urb_complete, sc);
-	sc->work_urb.transfer_flags = URB_ASYNC_UNLINK;
 
 	/* Fill what we shouldn't be filling, because usb-storage did so. */
 	sc->work_urb.actual_length = 0;
@@ -1023,7 +1019,6 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 
 	if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
 		/* XXX Clear stalls */
-		printk("ub: cmd #%d start failed (%d)\n", cmd->tag, rc); /* P3 */
 		ub_complete(&sc->work_done);
 		return rc;
 	}
@@ -1103,7 +1098,6 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 {
 	struct urb *urb = &sc->work_urb;
 	struct bulk_cs_wrap *bcs;
-	int pipe;
 	int rc;
 
 	if (atomic_read(&sc->poison)) {
@@ -1195,47 +1189,20 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 			return;
 		}
 		if (urb->status != 0) {
-			printk("ub: cmd #%d cmd status (%d)\n", cmd->tag, urb->status); /* P3 */
 			goto Bad_End;
 		}
 		if (urb->actual_length != US_BULK_CB_WRAP_LEN) {
-			printk("ub: cmd #%d xferred %d\n", cmd->tag, urb->actual_length); /* P3 */
 			/* XXX Must do reset here to unconfuse the device */
 			goto Bad_End;
 		}
 
-		if (cmd->dir == UB_DIR_NONE) {
+		if (cmd->dir == UB_DIR_NONE || cmd->nsg < 1) {
 			ub_state_stat(sc, cmd);
 			return;
 		}
 
-		UB_INIT_COMPLETION(sc->work_done);
-
-		if (cmd->dir == UB_DIR_READ)
-			pipe = sc->recv_bulk_pipe;
-		else
-			pipe = sc->send_bulk_pipe;
-		sc->last_pipe = pipe;
-		usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe,
-		    cmd->data, cmd->len, ub_urb_complete, sc);
-		sc->work_urb.transfer_flags = URB_ASYNC_UNLINK;
-		sc->work_urb.actual_length = 0;
-		sc->work_urb.error_count = 0;
-		sc->work_urb.status = 0;
-
-		if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
-			/* XXX Clear stalls */
-			printk("ub: data #%d submit failed (%d)\n", cmd->tag, rc); /* P3 */
-			ub_complete(&sc->work_done);
-			ub_state_done(sc, cmd, rc);
-			return;
-		}
-
-		sc->work_timer.expires = jiffies + UB_DATA_TIMEOUT;
-		add_timer(&sc->work_timer);
-
-		cmd->state = UB_CMDST_DATA;
-		ub_cmdtr_state(sc, cmd);
+		// udelay(125);		// usb-storage has this
+		ub_data_start(sc, cmd);
 
 	} else if (cmd->state == UB_CMDST_DATA) {
 		if (urb->status == -EPIPE) {
@@ -1257,16 +1224,22 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 		if (urb->status == -EOVERFLOW) {
 			/*
 			 * A babble? Failure, but we must transfer CSW now.
+			 * XXX This is going to end in perpetual babble. Reset.
 			 */
 			cmd->error = -EOVERFLOW;	/* A cheap trick... */
-		} else {
-			if (urb->status != 0)
-				goto Bad_End;
+			ub_state_stat(sc, cmd);
+			return;
 		}
+		if (urb->status != 0)
+			goto Bad_End;
 
-		cmd->act_len = urb->actual_length;
+		cmd->act_len += urb->actual_length;
 		ub_cmdtr_act_len(sc, cmd);
 
+		if (++cmd->current_sg < cmd->nsg) {
+			ub_data_start(sc, cmd);
+			return;
+		}
 		ub_state_stat(sc, cmd);
 
 	} else if (cmd->state == UB_CMDST_STAT) {
@@ -1401,6 +1374,44 @@ Bad_End: /* Little Excel is dead */
 
 /*
  * Factorization helper for the command state machine:
+ * Initiate a data segment transfer.
+ */
+static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
+{
+	struct scatterlist *sg = &cmd->sgv[cmd->current_sg];
+	int pipe;
+	int rc;
+
+	UB_INIT_COMPLETION(sc->work_done);
+
+	if (cmd->dir == UB_DIR_READ)
+		pipe = sc->recv_bulk_pipe;
+	else
+		pipe = sc->send_bulk_pipe;
+	sc->last_pipe = pipe;
+	usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe,
+	    page_address(sg->page) + sg->offset, sg->length,
+	    ub_urb_complete, sc);
+	sc->work_urb.actual_length = 0;
+	sc->work_urb.error_count = 0;
+	sc->work_urb.status = 0;
+
+	if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
+		/* XXX Clear stalls */
+		ub_complete(&sc->work_done);
+		ub_state_done(sc, cmd, rc);
+		return;
+	}
+
+	sc->work_timer.expires = jiffies + UB_DATA_TIMEOUT;
+	add_timer(&sc->work_timer);
+
+	cmd->state = UB_CMDST_DATA;
+	ub_cmdtr_state(sc, cmd);
+}
+
+/*
+ * Factorization helper for the command state machine:
  * Finish the command.
  */
 static void ub_state_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd, int rc)
@@ -1426,7 +1437,6 @@ static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 	sc->last_pipe = sc->recv_bulk_pipe;
 	usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->recv_bulk_pipe,
 	    &sc->work_bcs, US_BULK_CS_WRAP_LEN, ub_urb_complete, sc);
-	sc->work_urb.transfer_flags = URB_ASYNC_UNLINK;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
@@ -1484,6 +1494,7 @@ static void ub_state_stat_counted(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 {
 	struct ub_scsi_cmd *scmd;
+	struct scatterlist *sg;
 	int rc;
 
 	if (cmd->cdb[0] == REQUEST_SENSE) {
@@ -1492,12 +1503,17 @@ static void ub_state_sense(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 	}
 
 	scmd = &sc->top_rqs_cmd;
+	memset(scmd, 0, sizeof(struct ub_scsi_cmd));
 	scmd->cdb[0] = REQUEST_SENSE;
 	scmd->cdb[4] = UB_SENSE_SIZE;
 	scmd->cdb_len = 6;
 	scmd->dir = UB_DIR_READ;
 	scmd->state = UB_CMDST_INIT;
-	scmd->data = sc->top_sense;
+	scmd->nsg = 1;
+	sg = &scmd->sgv[0];
+	sg->page = virt_to_page(sc->top_sense);
+	sg->offset = (unsigned int)sc->top_sense & (PAGE_SIZE-1);
+	sg->length = UB_SENSE_SIZE;
 	scmd->len = UB_SENSE_SIZE;
 	scmd->lun = cmd->lun;
 	scmd->done = ub_top_sense_done;
@@ -1541,7 +1557,6 @@ static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
 
 	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe,
 	    (unsigned char*) cr, NULL, 0, ub_urb_complete, sc);
-	sc->work_urb.transfer_flags = URB_ASYNC_UNLINK;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
@@ -1560,7 +1575,7 @@ static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
  */
 static void ub_top_sense_done(struct ub_dev *sc, struct ub_scsi_cmd *scmd)
 {
-	unsigned char *sense = scmd->data;
+	unsigned char *sense = sc->top_sense;
 	struct ub_scsi_cmd *cmd;
 
 	/*
@@ -1852,6 +1867,7 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
     struct ub_capacity *ret)
 {
 	struct ub_scsi_cmd *cmd;
+	struct scatterlist *sg;
 	char *p;
 	enum { ALLOC_SIZE = sizeof(struct ub_scsi_cmd) + 8 };
 	unsigned long flags;
@@ -1872,7 +1888,11 @@ static int ub_sync_read_cap(struct ub_dev *sc, struct ub_lun *lun,
 	cmd->cdb_len = 10;
 	cmd->dir = UB_DIR_READ;
 	cmd->state = UB_CMDST_INIT;
-	cmd->data = p;
+	cmd->nsg = 1;
+	sg = &cmd->sgv[0];
+	sg->page = virt_to_page(p);
+	sg->offset = (unsigned int)p & (PAGE_SIZE-1);
+	sg->length = 8;
 	cmd->len = 8;
 	cmd->lun = lun;
 	cmd->done = ub_probe_done;
@@ -1973,17 +1993,16 @@ static int ub_sync_getmaxlun(struct ub_dev *sc)
 
 	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->recv_ctrl_pipe,
 	    (unsigned char*) cr, p, 1, ub_probe_urb_complete, &compl);
-	sc->work_urb.transfer_flags = 0;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
 
 	if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) {
 		if (rc == -EPIPE) {
-			printk("%s: Stall at GetMaxLUN, using 1 LUN\n",
+			printk("%s: Stall submitting GetMaxLUN, using 1 LUN\n",
 			     sc->name); /* P3 */
 		} else {
-			printk(KERN_WARNING
+			printk(KERN_NOTICE
 			     "%s: Unable to submit GetMaxLUN (%d)\n",
 			     sc->name, rc);
 		}
@@ -2001,6 +2020,18 @@ static int ub_sync_getmaxlun(struct ub_dev *sc)
 	del_timer_sync(&timer);
 	usb_kill_urb(&sc->work_urb);
 
+	if ((rc = sc->work_urb.status) < 0) {
+		if (rc == -EPIPE) {
+			printk("%s: Stall at GetMaxLUN, using 1 LUN\n",
+			     sc->name); /* P3 */
+		} else {
+			printk(KERN_NOTICE
+			     "%s: Error at GetMaxLUN (%d)\n",
+			     sc->name, rc);
+		}
+		goto err_io;
+	}
+
 	if (sc->work_urb.actual_length != 1) {
 		printk("%s: GetMaxLUN returned %d bytes\n", sc->name,
 		    sc->work_urb.actual_length); /* P3 */
@@ -2021,6 +2052,7 @@ static int ub_sync_getmaxlun(struct ub_dev *sc)
 	kfree(p);
 	return nluns;
 
+err_io:
 err_submit:
 	kfree(p);
 err_alloc:
@@ -2053,7 +2085,6 @@ static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe)
 
 	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe,
 	    (unsigned char*) cr, NULL, 0, ub_probe_urb_complete, &compl);
-	sc->work_urb.transfer_flags = 0;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
@@ -2186,8 +2217,10 @@ static int ub_probe(struct usb_interface *intf,
 	 * This is needed to clear toggles. It is a problem only if we do
 	 * `rmmod ub && modprobe ub` without disconnects, but we like that.
 	 */
+#if 0 /* iPod Mini fails if we do this (big white iPod works) */
 	ub_probe_clear_stall(sc, sc->recv_bulk_pipe);
 	ub_probe_clear_stall(sc, sc->send_bulk_pipe);
+#endif
 
 	/*
 	 * The way this is used by the startup code is a little specific.
@@ -2214,10 +2247,10 @@ static int ub_probe(struct usb_interface *intf,
 	for (i = 0; i < 3; i++) {
 		if ((rc = ub_sync_getmaxlun(sc)) < 0) {
 			/* 
-			 * Some devices (i.e. Iomega Zip100) need this --
-			 * apparently the bulk pipes get STALLed when the
-			 * GetMaxLUN request is processed.
-			 * XXX I have a ZIP-100, verify it does this.
+			 * This segment is taken from usb-storage. They say
+			 * that ZIP-100 needs this, but my own ZIP-100 works
+			 * fine without this.
+			 * Still, it does not seem to hurt anything.
 			 */
 			if (rc == -EPIPE) {
 				ub_probe_clear_stall(sc, sc->recv_bulk_pipe);
@@ -2286,10 +2319,10 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
 	disk->first_minor = lun->id * UB_MINORS_PER_MAJOR;
 	disk->fops = &ub_bd_fops;
 	disk->private_data = lun;
-	disk->driverfs_dev = &sc->intf->dev;	/* XXX Many to one ok? */
+	disk->driverfs_dev = &sc->intf->dev;
 
 	rc = -ENOMEM;
-	if ((q = blk_init_queue(ub_bd_rq_fn, &sc->lock)) == NULL)
+	if ((q = blk_init_queue(ub_request_fn, &sc->lock)) == NULL)
 		goto err_blkqinit;
 
 	disk->queue = q;
@@ -2439,9 +2472,6 @@ static int __init ub_init(void)
 {
 	int rc;
 
-	/* P3 */ printk("ub: sizeof ub_scsi_cmd %zu ub_dev %zu ub_lun %zu\n",
-			sizeof(struct ub_scsi_cmd), sizeof(struct ub_dev), sizeof(struct ub_lun));
-
 	if ((rc = register_blkdev(UB_MAJOR, DRV_NAME)) != 0)
 		goto err_regblkdev;
 	devfs_mk_dir(DEVFS_NAME);
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 0c4c121..0f48301 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -34,6 +34,7 @@
  *			 - set initialised bit then.
  */
 
+//#define DEBUG /* uncomment if you want debugging info (pr_debug) */
 #include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
@@ -58,10 +59,6 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
-#define PRINTK(x...) do {} while (0)
-#define dprintk(x...) do {} while (0)
-/*#define dprintk(x...) printk(x) */
-
 #define MM_MAXCARDS 4
 #define MM_RAHEAD 2      /* two sectors */
 #define MM_BLKSIZE 1024  /* 1k blocks */
@@ -299,7 +296,7 @@ static void mm_start_io(struct cardinfo *card)
 
 	/* make the last descriptor end the chain */
 	page = &card->mm_pages[card->Active];
-	PRINTK("start_io: %d %d->%d\n", card->Active, page->headcnt, page->cnt-1);
+	pr_debug("start_io: %d %d->%d\n", card->Active, page->headcnt, page->cnt-1);
 	desc = &page->desc[page->cnt-1];
 
 	desc->control_bits |= cpu_to_le32(DMASCR_CHAIN_COMP_EN);
@@ -532,7 +529,7 @@ static void process_page(unsigned long data)
 		activate(card);
 	} else {
 		/* haven't finished with this one yet */
-		PRINTK("do some more\n");
+		pr_debug("do some more\n");
 		mm_start_io(card);
 	}
  out_unlock:
@@ -555,7 +552,7 @@ static void process_page(unsigned long data)
 static int mm_make_request(request_queue_t *q, struct bio *bio)
 {
 	struct cardinfo *card = q->queuedata;
-	PRINTK("mm_make_request %ld %d\n", bh->b_rsector, bh->b_size);
+	pr_debug("mm_make_request %ld %d\n", bh->b_rsector, bh->b_size);
 
 	bio->bi_phys_segments = bio->bi_idx; /* count of completed segments*/
 	spin_lock_irq(&card->lock);
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 1676033..68b6d7b 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -47,6 +47,7 @@
 #include <linux/wait.h>
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
+#include <linux/delay.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -62,7 +63,7 @@ static int xd[5] = { -1,-1,-1,-1, };
 
 #define XD_DONT_USE_DMA		0  /* Initial value. may be overriden using
 				      "nodma" module option */
-#define XD_INIT_DISK_DELAY	(30*HZ/1000)  /* 30 ms delay during disk initialization */
+#define XD_INIT_DISK_DELAY	(30)  /* 30 ms delay during disk initialization */
 
 /* Above may need to be increased if a problem with the 2nd drive detection
    (ST11M controller) or resetting a controller (WD) appears */
@@ -529,10 +530,8 @@ static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long t
 	int success;
 
 	xdc_busy = 1;
-	while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry)) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(1);
-	}
+	while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry))
+		schedule_timeout_uninterruptible(1);
 	xdc_busy = 0;
 	return (success);
 }
@@ -633,14 +632,12 @@ static u_char __init xd_initdrives (void (*init_drive)(u_char drive))
 	for (i = 0; i < XD_MAXDRIVES; i++) {
 		xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0);
 		if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(XD_INIT_DISK_DELAY);
+			msleep_interruptible(XD_INIT_DISK_DELAY);
 
 			init_drive(count);
 			count++;
 
-			set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(XD_INIT_DISK_DELAY);
+			msleep_interruptible(XD_INIT_DISK_DELAY);
 		}
 	}
 	return (count);
@@ -761,8 +758,7 @@ static void __init xd_wd_init_controller (unsigned int address)
 
 	outb(0,XD_RESET);		/* reset the controller */
 
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	schedule_timeout(XD_INIT_DISK_DELAY);
+	msleep(XD_INIT_DISK_DELAY);
 }
 
 static void __init xd_wd_init_drive (u_char drive)
@@ -936,8 +932,7 @@ If you need non-standard settings use the xd=... command */
 	xd_maxsectors = 0x01;
 	outb(0,XD_RESET);		/* reset the controller */
 
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	schedule_timeout(XD_INIT_DISK_DELAY);
+	msleep(XD_INIT_DISK_DELAY);
 }
 
 static void __init xd_xebec_init_drive (u_char drive)
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 007f6a6..bb5e8d6 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -296,7 +296,7 @@ z2_open( struct inode *inode, struct file *filp )
     return 0;
 
 err_out_kfree:
-    kfree( z2ram_map );
+    kfree(z2ram_map);
 err_out:
     return rc;
 }