您的位置:首页 > 运维架构 > Linux

Linux那些事儿之我是Block层(8)scsi命令的前世今生(二)

2008-01-31 09:13 501 查看
一旦这种狼狈为奸的关系建立好了,就可以开始执行请求了.来看blk_execute_rq(),来自block/ll_rw_blk.c:

2605 /**
2606 * blk_execute_rq - insert a request into queue for execution
2607 * @q: queue to insert the request in
2608 * @bd_disk: matching gendisk
2609 * @rq: request to insert
2610 * @at_head: insert request at head or tail of queue
2611 *
2612 * Description:
2613 * Insert a fully prepared request at the back of the io scheduler queue
2614 * for execution and wait for completion.
2615 */
2616 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2617 struct request *rq, int at_head)
2618 {
2619 DECLARE_COMPLETION_ONSTACK(wait);
2620 char sense[SCSI_SENSE_BUFFERSIZE];
2621 int err = 0;
2622
2623 /*
2624 * we need an extra reference to the request, so we can look at
2625 * it after io completion
2626 */
2627 rq->ref_count++;
2628
2629 if (!rq->sense) {
2630 memset(sense, 0, sizeof(sense));
2631 rq->sense = sense;
2632 rq->sense_len = 0;
2633 }
2634
2635 rq->end_io_data = &wait;
2636 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2637 wait_for_completion(&wait);
2638
2639 if (rq->errors)
2640 err = -EIO;
2641
2642 return err;
2643 }

抛去那些用于错误处理的代码,这个函数真正有意义的代码就是两行, blk_execute_rq_nowait和wait_for_completion.先看前者,来自block/ll_rw_blk.c:

2576 /**
2577 * blk_execute_rq_nowait - insert a request into queue for execution
2578 * @q: queue to insert the request in
2579 * @bd_disk: matching gendisk
2580 * @rq: request to insert
2581 * @at_head: insert request at head or tail of queue
2582 * @done: I/O completion handler
2583 *
2584 * Description:
2585 * Insert a fully prepared request at the back of the io scheduler queue
2586 * for execution. Don't wait for completion.
2587 */
2588 void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2589 struct request *rq, int at_head,
2590 rq_end_io_fn *done)
2591 {
2592 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2593
2594 rq->rq_disk = bd_disk;
2595 rq->cmd_flags |= REQ_NOMERGE;
2596 rq->end_io = done;
2597 WARN_ON(irqs_disabled());
2598 spin_lock_irq(q->queue_lock);
2599 __elv_add_request(q, rq, where, 1);
2600 __generic_unplug_device(q);
2601 spin_unlock_irq(q->queue_lock);
2602 }

首先at_head是表示往哪插.(汗…,该不会还有一个参数表示用什么姿势插吧.)
而where用来记录at_head的值.在我们这个上下文中,at_head是从scsi_execute()中调用blk_execute_rq的时候传递下来的,当时我们设置的是1.于是where被设置为ELEVATOR_INSERT_FRONT.这几个宏来自include/linux/elevator.h:

155 /*
156 * Insertion selection
157 */
158 #define ELEVATOR_INSERT_FRONT 1
159 #define ELEVATOR_INSERT_BACK 2
160 #define ELEVATOR_INSERT_SORT 3
161 #define ELEVATOR_INSERT_REQUEUE 4

很明显,这是告诉我们从前面插,还算不是太变态.那么带着这个where我们进入下一个函数,即__elv_add_request.来自block/elevator.c:

646 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
647 int plug)
648 {
649 if (q->ordcolor)
650 rq->cmd_flags |= REQ_ORDERED_COLOR;
651
652 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
653 /*
654 * toggle ordered color
655 */
656 if (blk_barrier_rq(rq))
657 q->ordcolor ^= 1;
658
659 /*
660 * barriers implicitly indicate back insertion
661 */
662 if (where == ELEVATOR_INSERT_SORT)
663 where = ELEVATOR_INSERT_BACK;
664
665 /*
666 * this request is scheduling boundary, update
667 * end_sector
668 */
669 if (blk_fs_request(rq)) {
670 q->end_sector = rq_end_sector(rq);
671 q->boundary_rq = rq;
672 }
673 } else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
674 where = ELEVATOR_INSERT_BACK;
675
676 if (plug)
677 blk_plug_device(q);
678
679 elv_insert(q, rq, where);
680 }

传入的参数plug等于1,所以blk_plug_device()会被执行.暂且先不管这个函数.
很明显,前面都和我们无关,直接跳到最后一行这个elv_insert().

548 void elv_insert(request_queue_t *q, struct request *rq, int where)
549 {
550 struct list_head *pos;
551 unsigned ordseq;
552 int unplug_it = 1;
553
554 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
555
556 rq->q = q;
557
558 switch (where) {
559 case ELEVATOR_INSERT_FRONT:
560 rq->cmd_flags |= REQ_SOFTBARRIER;
561
562 list_add(&rq->queuelist, &q->queue_head);
563 break;
564
565 case ELEVATOR_INSERT_BACK:
566 rq->cmd_flags |= REQ_SOFTBARRIER;
567 elv_drain_elevator(q);
568 list_add_tail(&rq->queuelist, &q->queue_head);
569 /*
570 * We kick the queue here for the following reasons.
571 * - The elevator might have returned NULL previously
572 * to delay requests and returned them now. As the
573 * queue wasn't empty before this request, ll_rw_blk
574 * won't run the queue on return, resulting in hang.
575 * - Usually, back inserted requests won't be merged
576 * with anything. There's no point in delaying queue
577 * processing.
578 */
579 blk_remove_plug(q);
580 q->request_fn(q);
581 break;
582
583 case ELEVATOR_INSERT_SORT:
584 BUG_ON(!blk_fs_request(rq));
585 rq->cmd_flags |= REQ_SORTED;
586 q->nr_sorted++;
587 if (rq_mergeable(rq)) {
588 elv_rqhash_add(q, rq);
589 if (!q->last_merge)
590 q->last_merge = rq;
591 }
592
593 /*
594 * Some ioscheds (cfq) run q->request_fn directly, so
595 * rq cannot be accessed after calling
596 * elevator_add_req_fn.
597 */
598 q->elevator->ops->elevator_add_req_fn(q, rq);
599 break;
600
601 case ELEVATOR_INSERT_REQUEUE:
602 /*
603 * If ordered flush isn't in progress, we do front
604 * insertion; otherwise, requests should be requeued
605 * in ordseq order.
606 */
607 rq->cmd_flags |= REQ_SOFTBARRIER;
608
609 /*
610 * Most requeues happen because of a busy condition,
611 * don't force unplug of the queue for that case.
612 */
613 unplug_it = 0;
614
615 if (q->ordseq == 0) {
616 list_add(&rq->queuelist, &q->queue_head);
617 break;
618 }
619
620 ordseq = blk_ordered_req_seq(rq);
621
622 list_for_each(pos, &q->queue_head) {
623 struct request *pos_rq = list_entry_rq(pos);
624 if (ordseq <= blk_ordered_req_seq(pos_rq))
625 break;
626 }
627
628 list_add_tail(&rq->queuelist, pos);
629 break;
630
631 default:
632 printk(KERN_ERR "%s: bad insertion point %d/n",
633 __FUNCTION__, where);
634 BUG();
635 }
636
637 if (unplug_it && blk_queue_plugged(q)) {
638 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
639 - q->in_flight;
640
641 if (nrq >= q->unplug_thresh)
642 __generic_unplug_device(q);
643 }
644 }

由于我们是从前面插,所以我们执行562行这个list_add,struct request有一个成员struct list_head queuelist,而struct request_queue有一个成员struct list_head queue_head,所以我们就把前者插入到后者所代表的这个队伍中来.然后咱们就返回了.
回到blk_execute_rq_nowait()中,下一个被调用的函数是__generic_unplug_device,依然是来自block/ll_rw_blk.c:

1586 /*
1587 * remove the plug and let it rip..
1588 */
1589 void __generic_unplug_device(request_queue_t *q)
1590 {
1591 if (unlikely(blk_queue_stopped(q)))
1592 return;
1593
1594 if (!blk_remove_plug(q))
1595 return;
1596
1597 q->request_fn(q);
1598 }

其实最有看点的就是1597行调用这个request_fn,struct request_queue中的一个成员request_fn_proc *request_fn,而至于request_fn_proc,其实又是typedef的小伎俩,来自include/linux/blkdev.h:

334 typedef void (request_fn_proc) (request_queue_t *q);

那么这个request_fn是多少呢?还记得当初那个scsi子系统中申请队列的函数了么?没错,就是__scsi_alloc_queue(),调用它的是scsi_alloc_queue(),而在调用的时候就传递了这个参数:

1590 struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1591 {
1592 struct request_queue *q;
1593
1594 q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
1595 if (!q)
1596 return NULL;
1597
1598 blk_queue_prep_rq(q, scsi_prep_fn);
1599 blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
1600 blk_queue_softirq_done(q, scsi_softirq_done);
1601 return q;
1602 }

对,就是这个scsi_request_fn(),这么一个函数指针通过几次传递并最终在blk_init_queue_node()中被赋予了q->request_fn.所以我们真正需要关心的是scsi_request_fn.
在看scsi_request_fn之前,注意这里1598行至1560行也是赋了三个函数指针,

132 /**
133 * blk_queue_prep_rq - set a prepare_request function for queue
134 * @q: queue
135 * @pfn: prepare_request function
136 *
137 * It's possible for a queue to register a prepare_request callback which
138 * is invoked before the request is handed to the request_fn. The goal of
139 * the function is to prepare a request for I/O, it can be used to build a
140 * cdb from the request data for instance.
141 *
142 */
143 void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
144 {
145 q->prep_rq_fn = pfn;
146 }
303 /**
304 * blk_queue_issue_flush_fn - set function for issuing a flush
305 * @q: the request queue
306 * @iff: the function to be called issuing the flush
307 *
308 * Description:
309 * If a driver supports issuing a flush command, the support is notified
310 * to the block layer by defining it through this call.
311 *
312 **/
313 void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
314 {
315 q->issue_flush_fn = iff;
316 }
173 void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
174 {
175 q->softirq_done_fn = fn;
176 }

分别是把scsi_prep_fn赋给了q->prep_rq_fn,把scsi_issue_flush_fn赋给了q->issue_flush_fn,把scsi_softirq_done赋给了q->softirq_done_fn.尤其是scsi_prep_fn我们马上就会用到.
好,让我们继续前面的话题,来看scsi_request_fn().

1411 /*
1412 * Function: scsi_request_fn()
1413 *
1414 * Purpose: Main strategy routine for SCSI.
1415 *
1416 * Arguments: q - Pointer to actual queue.
1417 *
1418 * Returns: Nothing
1419 *
1420 * Lock status: IO request lock assumed to be held when called.
1421 */
1422 static void scsi_request_fn(struct request_queue *q)
1423 {
1424 struct scsi_device *sdev = q->queuedata;
1425 struct Scsi_Host *shost;
1426 struct scsi_cmnd *cmd;
1427 struct request *req;
1428
1429 if (!sdev) {
1430 printk("scsi: killing requests for dead queue/n");
1431 while ((req = elv_next_request(q)) != NULL)
1432 scsi_kill_request(req, q);
1433 return;
1434 }
1435
1436 if(!get_device(&sdev->sdev_gendev))
1437 /* We must be tearing the block queue down already */
1438 return;
1439
1440 /*
1441 * To start with, we keep looping until the queue is empty, or until
1442 * the host is no longer able to accept any more requests.
1443 */
1444 shost = sdev->host;
1445 while (!blk_queue_plugged(q)) {
1446 int rtn;
1447 /*
1448 * get next queueable request. We do this early to make sure
1449 * that the request is fully prepared even if we cannot
1450 * accept it.
1451 */
1452 req = elv_next_request(q);
1453 if (!req || !scsi_dev_queue_ready(q, sdev))
1454 break;
1455
1456 if (unlikely(!scsi_device_online(sdev))) {
1457 sdev_printk(KERN_ERR, sdev,
1458 "rejecting I/O to offline device/n");
1459 scsi_kill_request(req, q);
1460 continue;
1461 }
1462
1463
1464 /*
1465 * Remove the request from the request list.
1466 */
1467 if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1468 blkdev_dequeue_request(req);
1469 sdev->device_busy++;
1470
1471 spin_unlock(q->queue_lock);
1472 cmd = req->special;
1473 if (unlikely(cmd == NULL)) {
1474 printk(KERN_CRIT "impossible request in %s./n"
1475 "please mail a stack trace to "
1476 "linux-scsi@vger.kernel.org/n",
1477 __FUNCTION__);
1478 blk_dump_rq_flags(req, "foo");
1479 BUG();
1480 }
1481 spin_lock(shost->host_lock);
1482
1483 if (!scsi_host_queue_ready(q, shost, sdev))
1484 goto not_ready;
1485 if (sdev->single_lun) {
1486 if (scsi_target(sdev)->starget_sdev_user &&
1487 scsi_target(sdev)->starget_sdev_user != sdev)
1488 goto not_ready;
1489 scsi_target(sdev)->starget_sdev_user = sdev;
1490 }
1491 shost->host_busy++;
1492
1493 /*
1494 * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1495 * take the lock again.
1496 */
1497 spin_unlock_irq(shost->host_lock);
1498
1499 /*
1500 * Finally, initialize any error handling parameters, and set up
1501 * the timers for timeouts.
1502 */
1503 scsi_init_cmd_errh(cmd);
1504
1505 /*
1506 * Dispatch the command to the low-level driver.
1507 */
1508 rtn = scsi_dispatch_cmd(cmd);
1509 spin_lock_irq(q->queue_lock);
1510 if(rtn) {
1511 /* we're refusing the command; because of
1512 * the way locks get dropped, we need to
1513 * check here if plugging is required */
1514 if(sdev->device_busy == 0)
1515 blk_plug_device(q);
1516
1517 break;
1518 }
1519 }
1520
1521 goto out;
1522
1523 not_ready:
1524 spin_unlock_irq(shost->host_lock);
1525
1526 /*
1527 * lock q, handle tag, requeue req, and decrement device_busy. We
1528 * must return with queue_lock held.
1529 *
1530 * Decrementing device_busy without checking it is OK, as all such
1531 * cases (host limits or settings) should run the queue at some
1532 * later time.
1533 */
1534 spin_lock_irq(q->queue_lock);
1535 blk_requeue_request(q, req);
1536 sdev->device_busy--;
1537 if(sdev->device_busy == 0)
1538 blk_plug_device(q);
1539 out:
1540 /* must be careful here...if we trigger the ->remove() function
1541 * we cannot be holding the q lock */
1542 spin_unlock_irq(q->queue_lock);
1543 put_device(&sdev->sdev_gendev);
1544 spin_lock_irq(q->queue_lock);
1545 }

首先关注elv_next_request().来自block/elevator.c:

712 struct request *elv_next_request(request_queue_t *q)
713 {
714 struct request *rq;
715 int ret;
716
717 while ((rq = __elv_next_request(q)) != NULL) {
718 if (!(rq->cmd_flags & REQ_STARTED)) {
719 /*
720 * This is the first time the device driver
721 * sees this request (possibly after
722 * requeueing). Notify IO scheduler.
723 */
724 if (blk_sorted_rq(rq))
725 elv_activate_rq(q, rq);
726
727 /*
728 * just mark as started even if we don't start
729 * it, a request that has been delayed should
730 * not be passed by new incoming requests
731 */
732 rq->cmd_flags |= REQ_STARTED;
733 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
734 }
735
736 if (!q->boundary_rq || q->boundary_rq == rq) {
737 q->end_sector = rq_end_sector(rq);
738 q->boundary_rq = NULL;
739 }
740
741 if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
742 break;
743
744 ret = q->prep_rq_fn(q, rq);
745 if (ret == BLKPREP_OK) {
746 break;
747 } else if (ret == BLKPREP_DEFER) {
748 /*
749 * the request may have been (partially) prepped.
750 * we need to keep this request in the front to
751 * avoid resource deadlock. REQ_STARTED will
752 * prevent other fs requests from passing this one.
753 */
754 rq = NULL;
755 break;
756 } else if (ret == BLKPREP_KILL) {
757 int nr_bytes = rq->hard_nr_sectors << 9;
758
759 if (!nr_bytes)
760 nr_bytes = rq->data_len;
761
762 blkdev_dequeue_request(rq);
763 rq->cmd_flags |= REQ_QUIET;
764 end_that_request_chunk(rq, 0, nr_bytes);
765 end_that_request_last(rq, 0);
766 } else {
767 printk(KERN_ERR "%s: bad return=%d/n", __FUNCTION__,
768 ret);
769 break;
770 }
771 }
772
773 return rq;
774 }

它调用的__elv_next_request()仍然来自block/elevator.c:

696 static inline struct request *__elv_next_request(request_queue_t *q)
697 {
698 struct request *rq;
699
700 while (1) {
701 while (!list_empty(&q->queue_head)) {
702 rq = list_entry_rq(q->queue_head.next);
703 if (blk_do_ordered(q, &rq))
704 return rq;
705 }
706
707 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
708 return NULL;
709 }
710 }

由于我们刚才那个精彩的插入动作,这里q->queue_head不可能为空.所以从中取出一个request来.
首先是blk_do_ordered(),来自block/ll_rw_blk.c:

478 int blk_do_ordered(request_queue_t *q, struct request **rqp)
479 {
480 struct request *rq = *rqp;
481 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
482
483 if (!q->ordseq) {
484 if (!is_barrier)
485 return 1;
486
487 if (q->next_ordered != QUEUE_ORDERED_NONE) {
488 *rqp = start_ordered(q, rq);
489 return 1;
490 } else {
491 /*
492 * This can happen when the queue switches to
493 * ORDERED_NONE while this request is on it.
494 */
495 blkdev_dequeue_request(rq);
496 end_that_request_first(rq, -EOPNOTSUPP,
497 rq->hard_nr_sectors);
498 end_that_request_last(rq, -EOPNOTSUPP);
499 *rqp = NULL;
500 return 0;
501 }
502 }
503
504 /*
505 * Ordered sequence in progress
506 */
507
508 /* Special requests are not subject to ordering rules. */
509 if (!blk_fs_request(rq) &&
510 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
511 return 1;
512
513 if (q->ordered & QUEUE_ORDERED_TAG) {
514 /* Ordered by tag. Blocking the next barrier is enough. */
515 if (is_barrier && rq != &q->bar_rq)
516 *rqp = NULL;
517 } else {
518 /* Ordered by draining. Wait for turn. */
519 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
520 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
521 *rqp = NULL;
522 }
523
524 return 1;
525 }

首先看一下blk_fs_request,

528 #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)

很显然,咱们的情况和这个不一样.
所以在咱们这个上下文里,is_barrier一定是0.所以,blk_do_ordered二话不说,直接返回1.那么回到__elv_next_request以后,703行这个if条件是满足的,所以也就是返回rq.而下面那个elevator_dispatch_fn实际上在我们这个上下文中是不会执行的.另一方面,我们从__elv_next_request返回,回到elv_next_request()的时候,只要request queue不是空的,那么返回值就是队列头的那个request.
继续往下走,cmd_flags其实整个故事中设置REQ_STARTED的也就是这里,732行,所以在我们执行732行之前,这个flag是没有设置的.因此,if条件是满足的.
而blk_sorted_rq又是一个宏,来自include/linux/blkdev.h:

543 #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)

很显然,咱们也从来没有设置过这个flag,所以这里不关我们的事.
当然了,对于noop,即便执行下一个函数也没有意义,因为这个elv_activate_rq()来自block/elevator.c:

272 static void elv_activate_rq(request_queue_t *q, struct request *rq)
273 {
274 elevator_t *e = q->elevator;
275
276 if (e->ops->elevator_activate_req_fn)
277 e->ops->elevator_activate_req_fn(q, rq);
278 }

而我们知道,对于noop来说,根本就没有这个指针,所以我们不准不开心.
这时候,我们设置REQ_STARTED这个flag.
最开始我们在elevator_init()中,有这么一句:

230 q->boundary_rq = NULL;

于是rq_end_sector会被执行,这其实也只是一个很简单的宏.

172 #define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors)

同时,boundary_rq还是被置为NULL.
接下来,由于我们把prep_rq_fn赋上了scsi_prep_fn,所以我们要看一下这个scsi_prep_fn(),这个来自drivers/scsi/scsi_lib.c的函数.

1176 static int scsi_prep_fn(struct request_queue *q, struct request *req)
1177 {
1178 struct scsi_device *sdev = q->queuedata;
1179 int ret = BLKPREP_OK;
1180
1181 /*
1182 * If the device is not in running state we will reject some
1183 * or all commands.
1184 */
1185 if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
1186 switch (sdev->sdev_state) {
1187 case SDEV_OFFLINE:
1188 /*
1189 * If the device is offline we refuse to process any
1190 * commands. The device must be brought online
1191 * before trying any recovery commands.
1192 */
1193 sdev_printk(KERN_ERR, sdev,
1194 "rejecting I/O to offline device/n");
1195 ret = BLKPREP_KILL;
1196 break;
1197 case SDEV_DEL:
1198 /*
1199 * If the device is fully deleted, we refuse to
1200 * process any commands as well.
1201 */
1202 sdev_printk(KERN_ERR, sdev,
1203 "rejecting I/O to dead device/n");
1204 ret = BLKPREP_KILL;
1205 break;
1206 case SDEV_QUIESCE:
1207 case SDEV_BLOCK:
1208 /*
1209 * If the devices is blocked we defer normal commands.
1210 */
1211 if (!(req->cmd_flags & REQ_PREEMPT))
1212 ret = BLKPREP_DEFER;
1213 break;
1214 default:
1215 /*
1216 * For any other not fully online state we only allow
1217 * special commands. In particular any user initiated
1218 * command is not allowed.
1219 */
1220 if (!(req->cmd_flags & REQ_PREEMPT))
1221 ret = BLKPREP_KILL;
1222 break;
1223 }
1224
1225 if (ret != BLKPREP_OK)
1226 goto out;
1227 }
1228
1229 switch (req->cmd_type) {
1230 case REQ_TYPE_BLOCK_PC:
1231 ret = scsi_setup_blk_pc_cmnd(sdev, req);
1232 break;
1233 case REQ_TYPE_FS:
1234 ret = scsi_setup_fs_cmnd(sdev, req);
1235 break;
1236 default:
1237 /*
1238 * All other command types are not supported.
1239 *
1240 * Note that these days the SCSI subsystem does not use
1241 * REQ_TYPE_SPECIAL requests anymore. These are only used
1242 * (directly or via blk_insert_request) by non-SCSI drivers.
1243 */
1244 blk_dump_rq_flags(req, "SCSI bad req");
1245 ret = BLKPREP_KILL;
1246 break;
1247 }
1248
1249 out:
1250 switch (ret) {
1251 case BLKPREP_KILL:
1252 req->errors = DID_NO_CONNECT << 16;
1253 break;
1254 case BLKPREP_DEFER:
1255 /*
1256 * If we defer, the elv_next_request() returns NULL, but the
1257 * queue must be restarted, so we plug here if no returning
1258 * command will automatically do that.
1259 */
1260 if (sdev->device_busy == 0)
1261 blk_plug_device(q);
1262 break;
1263 default:
1264 req->cmd_flags |= REQ_DONTPREP;
1265 }
1266
1267 return ret;
1268 }

按正路,我们会走到1229行这个switch语句,并且会根据scsi命令的类型而执行不同的函数, scsi_setup_blk_pc_cmnd或者scsi_setup_fs_cmnd.那么我们cmd_type究竟是什么呢?回首那如烟的往事,犹记当初在scsi_execute()中有这么一行,

199 req->cmd_type = REQ_TYPE_BLOCK_PC;

所以,没什么好说的.我们会执行scsi_setup_blk_pc_cmnd,来自drivers/scsi/scsi_lib.c:

1090 static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
1091 {
1092 struct scsi_cmnd *cmd;
1093
1094 cmd = scsi_get_cmd_from_req(sdev, req);
1095 if (unlikely(!cmd))
1096 return BLKPREP_DEFER;
1097
1098 /*
1099 * BLOCK_PC requests may transfer data, in which case they must
1100 * a bio attached to them. Or they might contain a SCSI command
1101 * that does not transfer data, in which case they may optionally
1102 * submit a request without an attached bio.
1103 */
1104 if (req->bio) {
1105 int ret;
1106
1107 BUG_ON(!req->nr_phys_segments);
1108
1109 ret = scsi_init_io(cmd);
1110 if (unlikely(ret))
1111 return ret;
1112 } else {
1113 BUG_ON(req->data_len);
1114 BUG_ON(req->data);
1115
1116 cmd->request_bufflen = 0;
1117 cmd->request_buffer = NULL;
1118 cmd->use_sg = 0;
1119 req->buffer = NULL;
1120 }
1121
1122 BUILD_BUG_ON(sizeof(req->cmd) > sizeof(cmd->cmnd));
1123 memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
1124 cmd->cmd_len = req->cmd_len;
1125 if (!req->data_len)
1126 cmd->sc_data_direction = DMA_NONE;
1127 else if (rq_data_dir(req) == WRITE)
1128 cmd->sc_data_direction = DMA_TO_DEVICE;
1129 else
1130 cmd->sc_data_direction = DMA_FROM_DEVICE;
1131
1132 cmd->transfersize = req->data_len;
1133 cmd->allowed = req->retries;
1134 cmd->timeout_per_command = req->timeout;
1135 cmd->done = scsi_blk_pc_done;
1136 return BLKPREP_OK;
1137 }

如果曾经的你还对scsi cmd是如何形成的颇有疑义的话,那么相信此刻,你应该会明白了吧,尤其是当你在usb-storage那个故事中看到对它sc_data_direction的判断的时候,你不理解这个值是如何设定的,那么此刻,这代码活生生的展现在你面前,想必已经揭开了你心中那谜团吧.
最终,正常的话,函数返回BLKPREP_OK.prep表示prepare的意思,用我们的母语说就是准备的意思,最后BLKPREP_OK就说明准备好了,或者说准备就绪.而scsi_prep_fn()也将返回这个值,返回之前还设置了cmd_flags中的REQ_DONTPREP.(注意elv_next_request()函数741行判断的就是设没设这个flag.)
回到elv_next_request()中,由于返回值是BLKPREP_OK,所以746行我们就break了.换言之,我们取到了一个request,我们为之准备好了scsi命令,我们下一步就该是执行这个命令了.所以我们不需要再在elv_next_request()中滞留.我们终于回到了scsi_request_fn(),汤唯姐姐曾坦言拍床戏的经验让她恍如在地狱走了一趟,而看代码的我又何尝不是如此呢?而且汤唯姐姐说虽然过程好似地狱,但过后就是天堂.而我们则永远陷在这代码中,不知何时才是个头,这不,结束了elv_next_request,又要看下一个,不只是一个,而是两个,1467行,一个宏加一个函数,宏是blk_queue_tagged,来自include/linux/blkdev.h:

524 #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)

而函数是blk_queue_start_tag,来自block/ll_rw_blk.c:

1104 /**
1105 * blk_queue_start_tag - find a free tag and assign it
1106 * @q: the request queue for the device
1107 * @rq: the block request that needs tagging
1108 *
1109 * Description:
1110 * This can either be used as a stand-alone helper, or possibly be
1111 * assigned as the queue &prep_rq_fn (in which case &struct request
1112 * automagically gets a tag assigned). Note that this function
1113 * assumes that any type of request can be queued! if this is not
1114 * true for your device, you must check the request type before
1115 * calling this function. The request will also be removed from
1116 * the request queue, so it's the drivers responsibility to readd
1117 * it if it should need to be restarted for some reason.
1118 *
1119 * Notes:
1120 * queue lock must be held.
1121 **/
1122 int blk_queue_start_tag(request_queue_t *q, struct request *rq)
1123 {
1124 struct blk_queue_tag *bqt = q->queue_tags;
1125 int tag;
1126
1127 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
1128 printk(KERN_ERR
1129 "%s: request %p for device [%s] already tagged %d",
1130 __FUNCTION__, rq,
1131 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
1132 BUG();
1133 }
1134
1135 /*
1136 * Protect against shared tag maps, as we may not have exclusive
1137 * access to the tag map.
1138 */
1139 do {
1140 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
1141 if (tag >= bqt->max_depth)
1142 return 1;
1143
1144 } while (test_and_set_bit(tag, bqt->tag_map));
1145
1146 rq->cmd_flags |= REQ_QUEUED;
1147 rq->tag = tag;
1148 bqt->tag_index[tag] = rq;
1149 blkdev_dequeue_request(rq);
1150 list_add(&rq->queuelist, &bqt->busy_list);
1151 bqt->busy++;
1152 return 0;
1153 }

对于我们大多数人来说,这两个函数的返回值都是0.
也因此,下一个函数blkdev_dequeue_request()就会被执行.来自include/linux/blkdev.h:

725 static inline void blkdev_dequeue_request(struct request *req)
726 {
727 elv_dequeue_request(req->q, req);
728 }

而elv_dequeue_request来自block/elevator.c:

778 void elv_dequeue_request(request_queue_t *q, struct request *rq)
779 {
780 BUG_ON(list_empty(&rq->queuelist));
781 BUG_ON(ELV_ON_HASH(rq));
782
783 list_del_init(&rq->queuelist);
784
785 /*
786 * the time frame between a request being removed from the lists
787 * and to it is freed is accounted as io that is in progress at
788 * the driver side.
789 */
790 if (blk_account_rq(rq))
791 q->in_flight++;
792 }

现在这个社会就是利用与被利用的关系,既然这个request已经没有了利用价值,我们已经从它身上得到了我们想要的scsi命令,那么我们完全可以过河拆桥卸磨杀驴了.list_del_init把这个request从request queue队列里删除掉.
而下面这个blk_account_rq也是一个来自include/linux/blkdev.h的宏:

536 #define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq))

很显然,至少第二个条件我们是不满足的.所以不用多说,结束这个elv_dequeue_request.
现在是时候去执行scsi命令了.所以调用scsi_dispatch_cmd().
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: