您的位置:首页 > 运维架构 > Linux

Linux那些事儿之我是Block层(9)scsi命令的前世今生(三)

2010-08-10 06:55 351 查看
下一个更为重要的函数是

scsi_dispatch_cmd,
来自

drivers/scsi/scsi.c:

459 /*

460
* Function:
scsi_dispatch_command

461
*

462
* Purpose:
Dispatch a command to the low-level driver.

463
*

464
* Arguments:
cmd - command block we are dispatching.

465
*

466
* Notes:

467
*/

468 int scsi_dispatch_cmd(struct scsi_cmnd *cmd)

469 {

470
struct Scsi_Host *host = cmd->device->host;

471
unsigned long flags = 0;

472
unsigned long timeout;

473
int rtn = 0;

474

475
/* check if the device is still usable */

476
if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {

477
/* in SDEV_DEL we error all commands. DID_NO_CONNECT

478
* returns an immediate error upwards, and signals

479
* that the device is no longer present */

480
cmd->result = DID_NO_CONNECT << 16;

481
atomic_inc(&cmd->device->iorequest_cnt);

482
__scsi_done(cmd);

483
/* return 0 (because the command has been processed) */

484
goto out;

485
}

486

487
/* Check to see if the scsi lld put this device into state SDEV_BLOCK. */

488
if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) {

489

/*

490
* in SDEV_BLOCK, the command is just put back on the device

491
* queue.
The suspend state has already blocked the queue so

492
* future requests should not occur until the device

493
* transitions out of the suspend state.

494
*/

495
scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);

496

497
SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked /n"));

498

499
/*

500
* NOTE: rtn is still zero here because we don't need the

501
* queue to be plugged on return (it's already stopped)

502
*/

503

goto out;

504
}

505

506
/*

507
* If SCSI-2 or lower, store the LUN value in cmnd.

508
*/

509
if (cmd->device->scsi_level <= SCSI_2 &&

510
cmd->device->scsi_level != SCSI_UNKNOWN) {

511
cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |

512
(cmd->device->lun << 5 & 0xe0);

513
}

514

515
/*

516
* We will wait MIN_RESET_DELAY clock ticks after the last reset so

517
* we can avoid the drive not being ready.

518
*/

519
timeout = host->last_reset + MIN_RESET_DELAY;

520

521
if (host->resetting && time_before(jiffies, timeout)) {

522

int ticks_remaining = timeout - jiffies;

523
/*

524
* NOTE: This may be executed from within an interrupt

525
* handler!
This is bad, but for now, it'll do.
The irq

526

* level of the interrupt handler has been masked out by the

527
* platform dependent interrupt handling code already, so the

528
* sti() here will not cause another call to the SCSI host's

529

* interrupt handler (assuming there is one irq-level per

530
* host).

531
*/

532
while (--ticks_remaining >= 0)

533
mdelay(1 + 999 / HZ);

534

host->resetting = 0;

535
}

536

537
/*

538
* AK: unlikely race here: for some reason the timer could

539
* expire before the serial number is set up below.

540
*/

541
scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);

542

543
scsi_log_send(cmd);

544

545
/*

546
* We will use a queued command if possible, otherwise we will

547
* emulate the queuing and calling of completion function ourselves.

548
*/

549
atomic_inc(&cmd->device->iorequest_cnt);

550

551
/*

552
* Before we queue this command, check if the command

553
* length exceeds what the host adapter can handle.

554
*/

555
if (CDB_SIZE(cmd) > cmd->device->host->max_cmd_len) {

556
SCSI_LOG_MLQUEUE(3,

557
printk("queuecommand : command too long./n"));

558

cmd->result = (DID_ABORT << 16);

559

560
scsi_done(cmd);

561
goto out;

562
}

563

564
spin_lock_irqsave(host->host_lock, flags);

565
scsi_cmd_get_serial(host, cmd);

566

567
if (unlikely(host->shost_state == SHOST_DEL)) {

568
cmd->result = (DID_NO_CONNECT << 16);

569
scsi_done(cmd);

570
} else {

571
rtn = host->hostt->queuecommand(cmd, scsi_done);

572
}

573
spin_unlock_irqrestore(host->host_lock, flags);

574
if (rtn) {

575
if (scsi_delete_timer(cmd)) {

576
atomic_inc(&cmd->device->iodone_cnt);

577

scsi_queue_insert(cmd,

578
(rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?

579
rtn : SCSI_MLQUEUE_HOST_BUSY);

580
}

581

SCSI_LOG_MLQUEUE(3,

582
printk("queuecommand : request rejected/n"));

583
}

584

585
out:

586
SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()/n"));

587
return rtn;

588 }

一路走来的兄弟一定会一眼就看出这里我们最期待的一行代码就是

571
那个

queuecommand()
的调用

.
因为这之后我们就知道该发生什么了

.
比如对于

U
盘驱动来说

,
命令就从这里接过去开始执行

.
而对于实际的

scsi
控制器

,
其对应的驱动中的

queuecommand
也会被调用

,
剩下的事情我们就不用操心了

.
正常情况下

queuecommand
返回

0.
于是紧接着

scsi_dispatch_cmd
也返回

0.
这样就算是执行了一条

scsi
命令了

.



scsi_request_fn()
是否结束还得看

while
循环的条件是否满足

,
而这就得看

blk_queue_plugged()
的脸色了

.
那么我们从字面上来分析

,
什么叫

queue plugged?
我那盗版金山词霸告诉我

plugged
就是塞紧的意思

,
你说队列塞紧的是什么意思

?
比如说

,
北四环上上下班高峰期

,
许许多多的车辆排成一队又一队

,
但是可能半天都前进不了

,
这就叫塞紧

,
或者说堵车

,
也叫塞车

.
为此咱们使用一个

flag
来标志堵车与否

,
来自

include/linux/blkdev.h:

523 #define blk_queue_plugged(q)
test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)

改变这个这个

flag
的函数有两个

,
一个是设置

,
一个是取消

.

负责设置的是

blk_plug_device.

1542 /*

1543
* "plug" the device if there are no outstanding requests: this will

1544
* force the transfer to start only after we have put all the requests

1545
* on the list.

1546
*

1547
* This is called with interrupts off and no requests on the queue and

1548
* with the queue lock held.

1549
*/

1550 void blk_plug_device(request_queue_t *q)

1551 {

1552
WARN_ON(!irqs_disabled());

1553

1554
/*

1555
* don't plug a stopped queue, it must be paired with blk_start_queue()

1556
* which will restart the queueing

1557
*/

1558
if (blk_queue_stopped(q))

1559
return;

1560

1561
if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {

1562
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);

1563
blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);

1564
}

1565 }

负责取消的是

blk_remove_plug().

1569 /*

1570
* remove the queue from the plugged list, if present. called with

1571
* queue lock held and interrupts disabled.

1572
*/

1573 int blk_remove_plug(request_queue_t *q)

1574 {

1575
WARN_ON(!irqs_disabled());

1576

1577
if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))

1578
return 0;

1579

1580
del_timer(&q->unplug_timer);

1581
return 1;

1582 }

而调用前者的地方不少

,
比如我们见到的

__elv_add_request,
其第四个参数

int plug
就可以控制是否调用

blk_plug_device(),
而当我们在

blk_execute_rq_nowait()
中调用

__elv_add_request()
的时候传递的

plug
就是

1.

另一方面

,
调用

blk_remove_plug
的地方也有多处

.
其中

__generic_unplug_device()
就是之一

.
所以在咱们这个上下文里

,
实际上并没有设置这个

flag,
因此

scsi_request_fn()
就会被执行

.

那么编写这两个函数究竟是为了什么呢

?
这年头

,
有人做贼

,
我可以理解是为了劫富济贫

,
有人杀人

,
我可以理解是为了伸张正义

,
甚至有女人红杏出墙

,
我还可以理解是为了繁荣经济

.
然而

,
很长一段时间我都没办法理解有人编写这两个函数是为了什么

?

后来我想

,
不妨这样理解

,
假设你经常开车经过长安街

,
你会发现经常有戒严的现象发生

,
比如某位领导人要出行

,
比如某位领导人要来访

,
而你可以把

blk_plug_device()
想象成戒严

,


blk_remove_plug
想象成开放

.
车流要想行进

,
前提条件是没有戒严

,
换言之

,
没有设卡

,


QUEUE_FLAG_PLUGGED
这个

flag
就相当于




”,
设了它队列就不能前进了

,
没有设才有可能前进

.
之所以需要设卡

,
是因为确实有这个需求

,
有时候确实不想让队列前进

.

那么这里我们还看到两个函数被调用了

,mod_timer


del_timer,
这是干嘛使的

?
还记得

kblockd


?
最早咱们创建了那个工作队列

kblockd_workqueue,
现在是它该出场的时间了

.
让我们把镜头拉回到函数

blk_init_queue_node().
这个函数我们曾经看过

,
所以这里只贴出其中跟我们这里密切相关的几行

:

1922
q->request_fn

= rfn;

1923
q->prep_rq_fn
= NULL;

1924
q->unplug_fn
= generic_unplug_device;

1925
q->queue_flags
= (1 << QUEUE_FLAG_CLUSTER);

1926
q->queue_lock
= lock;

1927

1928
blk_queue_segment_boundary(q, 0xffffffff);

1929

1930
blk_queue_make_request(q, __make_request);

首先

q->unplug_fn
被赋上了

generic_unplug_device.
这一点很重要

,
稍后会用到

.

然后来看

blk_queue_make_request().
这个函数
当时咱们并没有讲过

.
来自

block/ll_rw_block.c:

180 /**

181
* blk_queue_make_request - define an alternate make_request function for a device

182
* @q:
the request queue for the device to be affected

183
* @mfn: the alternate make_request function

184
*

185
* Description:

186
*
The normal way for &struct bios to be passed to a device

187
*
driver is for them to be collected into requests on a request

188
*
queue, and then to allow the device driver to select requests

189
*
off that queue when it is ready.
This works well for many block

190
*
devices. However some block devices (typically virtual devices

191
*
such as md or lvm) do not benefit from the processing on the

192
*
request queue, and are served best by having the requests passed

193
*
directly to them.
This can be achieved by providing a function

194
*
to blk_queue_make_request().

195
*

196
* Caveat:

197
*
The driver that does this *must* be able to deal appropriately

198
*
with buffers in "highmemory". This can be accomplished by either calling

199
*
__bio_kmap_atomic() to get a temporary kernel mapping, or by calling

200
*
blk_queue_bounce() to create a buffer in normal memory.

201
**/

202 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)

203 {

204
/*

205
* set defaults

206
*/

207
q->nr_requests = BLKDEV_MAX_RQ;

208
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

209
blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

210
q->make_request_fn = mfn;

211
q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;

212
q->backing_dev_info.state = 0;

213
q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;

214
blk_queue_max_sectors(q, SAFE_MAX_SECTORS);

215

blk_queue_hardsect_size(q, 512);

216
blk_queue_dma_alignment(q, 511);

217
blk_queue_congestion_threshold(q);

218
q->nr_batching = BLK_BATCH_REQ;

219

220
q->unplug_thresh = 4;
/* hmm */

221
q->unplug_delay = (3 * HZ) / 1000;
/* 3 milliseconds */

222
if (q->unplug_delay == 0)

223
q->unplug_delay = 1;

224

225
INIT_WORK(&q->unplug_work, blk_unplug_work);

226

227
q->unplug_timer.function = blk_unplug_timeout;

228
q->unplug_timer.data = (unsigned long)q;

229

230
/*

231
* by default assume old behaviour and bounce for any highmem page

232
*/

233
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);

234 }

这里重点关注几个

”unplug”
为名字的成员

.
尤其是

INIT_WORK,
它使得一旦

unplug_work
这项工作被执行

,blk_unplug_work
这个函数就会被执行

.


unplug_timer
这么一赋值

,
我们就知道

,
一旦设了闹钟

,
一旦闹钟时间到了

,blk_unplug_timeout
这个函数就会被执行

.
并且因为这里设置了

unplug_delay


3ms,
使得闹钟的

timeout
就是

3ms,
一旦激活闹钟

,3ms
之后

blk_unplug_timeout
就会被执行

.
这个函数来自

block/ll_rw_blk.c:

1646 static void blk_unplug_timeout(unsigned long data)

1647 {

1648
request_queue_t *q = (request_queue_t *)data;

1649

1650
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,

1651
q->rq.count[READ] + q->rq.count[WRITE]);

1652

1653
kblockd_schedule_work(&q->unplug_work);

1654 }

可以看到

,
其实就是执行

kblockd_schedule_work,
换言之

,
真正被调用的函数就是

blk_unplug_work().

1636 static void blk_unplug_work(struct work_struct *work)

1637 {

1638
request_queue_t *q = container_of(work, request_queue_t, unplug_work);

1639

1640
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,

1641
q->rq.count[READ] + q->rq.count[WRITE]);

1642

1643
q->unplug_fn(q);

1644 }

而刚才我们说了

,unplug_fn
被赋上了

generic_unplug_device.
所以真正要执行的是

generic_unplug_device.
而这个函数又长成什么样呢

?

1601 /**

1602
* generic_unplug_device - fire a request queue

1603
* @q:
The &request_queue_t in question

1604
*

1605
* Description:

1606
*
Linux uses plugging to build bigger requests queues before letting

1607
*
the device have at them. If a queue is plugged, the I/O scheduler

1608
*
is still adding and merging requests on the queue. Once the queue

1609
*
gets unplugged, the request_fn defined for the queue is invoked and

1610
*
transfers started.

1611
**/

1612 void generic_unplug_device(request_queue_t *q)

1613 {

1614
spin_lock_irq(q->queue_lock);

1615
__generic_unplug_device(q);

1616
spin_unlock_irq(q->queue_lock);

1617 }



,
扭扭捏捏大半天

,
其实就是调用

__generic_unplug_device.
而回过头去看这个函数

,
我们知道

,
它也无非就是调用了两个函数

,blk_remove_plug


request_fn.
这下子我们基本上就明白了

.
总结一下就是

:

1.

blk_plug_device()
负责戒严

.

2.

blk_remove_plug()
负责解禁

.

3.

但是戒严这东西吧

,
也是有时间限制的

,
毕竟长安街就算有重大活动也是短时间的

,
一年中毕竟大多数时间还是得保证道路畅通

.
所以在戒严的时候

,
设了一个定时器

,unplug_timer, (


mod_timer
),
一旦时间到了就自动执行

blk_remove_plug
去解禁

.

4.

而在解禁的时候就不要忘记把这个定时器给关掉

.(


del_timer)

5.

解禁之后调用

request_fn()
开始处理队列中的下一个请求

,
或者说车流开始恢复前行

.

Ok,
这样我们就算是明白这两个戒严与解禁的函数了
.
最后
,
题外话
,
关于
unplug

plug,
我觉得更贴切的单词是
activate

deactivate,
或者说激活与冻结
,
或者简单的说
,
开与关
.
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: