Linux那些事儿之我是Block层(9)scsi命令的前世今生(三)
2010-08-10 06:55
351 查看
下一个更为重要的函数是
scsi_dispatch_cmd,
来自
drivers/scsi/scsi.c:
459 /*
460
* Function:
scsi_dispatch_command
461
*
462
* Purpose:
Dispatch a command to the low-level driver.
463
*
464
* Arguments:
cmd - command block we are dispatching.
465
*
466
* Notes:
467
*/
468 int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
469 {
470
struct Scsi_Host *host = cmd->device->host;
471
unsigned long flags = 0;
472
unsigned long timeout;
473
int rtn = 0;
474
475
/* check if the device is still usable */
476
if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
477
/* in SDEV_DEL we error all commands. DID_NO_CONNECT
478
* returns an immediate error upwards, and signals
479
* that the device is no longer present */
480
cmd->result = DID_NO_CONNECT << 16;
481
atomic_inc(&cmd->device->iorequest_cnt);
482
__scsi_done(cmd);
483
/* return 0 (because the command has been processed) */
484
goto out;
485
}
486
487
/* Check to see if the scsi lld put this device into state SDEV_BLOCK. */
488
if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) {
489
/*
490
* in SDEV_BLOCK, the command is just put back on the device
491
* queue.
The suspend state has already blocked the queue so
492
* future requests should not occur until the device
493
* transitions out of the suspend state.
494
*/
495
scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
496
497
SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked /n"));
498
499
/*
500
* NOTE: rtn is still zero here because we don't need the
501
* queue to be plugged on return (it's already stopped)
502
*/
503
goto out;
504
}
505
506
/*
507
* If SCSI-2 or lower, store the LUN value in cmnd.
508
*/
509
if (cmd->device->scsi_level <= SCSI_2 &&
510
cmd->device->scsi_level != SCSI_UNKNOWN) {
511
cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |
512
(cmd->device->lun << 5 & 0xe0);
513
}
514
515
/*
516
* We will wait MIN_RESET_DELAY clock ticks after the last reset so
517
* we can avoid the drive not being ready.
518
*/
519
timeout = host->last_reset + MIN_RESET_DELAY;
520
521
if (host->resetting && time_before(jiffies, timeout)) {
522
int ticks_remaining = timeout - jiffies;
523
/*
524
* NOTE: This may be executed from within an interrupt
525
* handler!
This is bad, but for now, it'll do.
The irq
526
* level of the interrupt handler has been masked out by the
527
* platform dependent interrupt handling code already, so the
528
* sti() here will not cause another call to the SCSI host's
529
* interrupt handler (assuming there is one irq-level per
530
* host).
531
*/
532
while (--ticks_remaining >= 0)
533
mdelay(1 + 999 / HZ);
534
host->resetting = 0;
535
}
536
537
/*
538
* AK: unlikely race here: for some reason the timer could
539
* expire before the serial number is set up below.
540
*/
541
scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);
542
543
scsi_log_send(cmd);
544
545
/*
546
* We will use a queued command if possible, otherwise we will
547
* emulate the queuing and calling of completion function ourselves.
548
*/
549
atomic_inc(&cmd->device->iorequest_cnt);
550
551
/*
552
* Before we queue this command, check if the command
553
* length exceeds what the host adapter can handle.
554
*/
555
if (CDB_SIZE(cmd) > cmd->device->host->max_cmd_len) {
556
SCSI_LOG_MLQUEUE(3,
557
printk("queuecommand : command too long./n"));
558
cmd->result = (DID_ABORT << 16);
559
560
scsi_done(cmd);
561
goto out;
562
}
563
564
spin_lock_irqsave(host->host_lock, flags);
565
scsi_cmd_get_serial(host, cmd);
566
567
if (unlikely(host->shost_state == SHOST_DEL)) {
568
cmd->result = (DID_NO_CONNECT << 16);
569
scsi_done(cmd);
570
} else {
571
rtn = host->hostt->queuecommand(cmd, scsi_done);
572
}
573
spin_unlock_irqrestore(host->host_lock, flags);
574
if (rtn) {
575
if (scsi_delete_timer(cmd)) {
576
atomic_inc(&cmd->device->iodone_cnt);
577
scsi_queue_insert(cmd,
578
(rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
579
rtn : SCSI_MLQUEUE_HOST_BUSY);
580
}
581
SCSI_LOG_MLQUEUE(3,
582
printk("queuecommand : request rejected/n"));
583
}
584
585
out:
586
SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()/n"));
587
return rtn;
588 }
一路走来的兄弟一定会一眼就看出这里我们最期待的一行代码就是
571
那个
queuecommand()
的调用
.
因为这之后我们就知道该发生什么了
.
比如对于
U
盘驱动来说
,
命令就从这里接过去开始执行
.
而对于实际的
scsi
控制器
,
其对应的驱动中的
queuecommand
也会被调用
,
剩下的事情我们就不用操心了
.
正常情况下
queuecommand
返回
0.
于是紧接着
scsi_dispatch_cmd
也返回
0.
这样就算是执行了一条
scsi
命令了
.
而
scsi_request_fn()
是否结束还得看
while
循环的条件是否满足
,
而这就得看
blk_queue_plugged()
的脸色了
.
那么我们从字面上来分析
,
什么叫
queue plugged?
我那盗版金山词霸告诉我
plugged
就是塞紧的意思
,
你说队列塞紧的是什么意思
?
比如说
,
北四环上上下班高峰期
,
许许多多的车辆排成一队又一队
,
但是可能半天都前进不了
,
这就叫塞紧
,
或者说堵车
,
也叫塞车
.
为此咱们使用一个
flag
来标志堵车与否
,
来自
include/linux/blkdev.h:
523 #define blk_queue_plugged(q)
test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
改变这个这个
flag
的函数有两个
,
一个是设置
,
一个是取消
.
负责设置的是
blk_plug_device.
1542 /*
1543
* "plug" the device if there are no outstanding requests: this will
1544
* force the transfer to start only after we have put all the requests
1545
* on the list.
1546
*
1547
* This is called with interrupts off and no requests on the queue and
1548
* with the queue lock held.
1549
*/
1550 void blk_plug_device(request_queue_t *q)
1551 {
1552
WARN_ON(!irqs_disabled());
1553
1554
/*
1555
* don't plug a stopped queue, it must be paired with blk_start_queue()
1556
* which will restart the queueing
1557
*/
1558
if (blk_queue_stopped(q))
1559
return;
1560
1561
if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1562
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1563
blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
1564
}
1565 }
负责取消的是
blk_remove_plug().
1569 /*
1570
* remove the queue from the plugged list, if present. called with
1571
* queue lock held and interrupts disabled.
1572
*/
1573 int blk_remove_plug(request_queue_t *q)
1574 {
1575
WARN_ON(!irqs_disabled());
1576
1577
if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1578
return 0;
1579
1580
del_timer(&q->unplug_timer);
1581
return 1;
1582 }
而调用前者的地方不少
,
比如我们见到的
__elv_add_request,
其第四个参数
int plug
就可以控制是否调用
blk_plug_device(),
而当我们在
blk_execute_rq_nowait()
中调用
__elv_add_request()
的时候传递的
plug
就是
1.
另一方面
,
调用
blk_remove_plug
的地方也有多处
.
其中
__generic_unplug_device()
就是之一
.
所以在咱们这个上下文里
,
实际上并没有设置这个
flag,
因此
scsi_request_fn()
就会被执行
.
那么编写这两个函数究竟是为了什么呢
?
这年头
,
有人做贼
,
我可以理解是为了劫富济贫
,
有人杀人
,
我可以理解是为了伸张正义
,
甚至有女人红杏出墙
,
我还可以理解是为了繁荣经济
.
然而
,
很长一段时间我都没办法理解有人编写这两个函数是为了什么
?
后来我想
,
不妨这样理解
,
假设你经常开车经过长安街
,
你会发现经常有戒严的现象发生
,
比如某位领导人要出行
,
比如某位领导人要来访
,
而你可以把
blk_plug_device()
想象成戒严
,
把
blk_remove_plug
想象成开放
.
车流要想行进
,
前提条件是没有戒严
,
换言之
,
没有设卡
,
而
QUEUE_FLAG_PLUGGED
这个
flag
就相当于
”
卡
”,
设了它队列就不能前进了
,
没有设才有可能前进
.
之所以需要设卡
,
是因为确实有这个需求
,
有时候确实不想让队列前进
.
那么这里我们还看到两个函数被调用了
,mod_timer
和
del_timer,
这是干嘛使的
?
还记得
kblockd
么
?
最早咱们创建了那个工作队列
kblockd_workqueue,
现在是它该出场的时间了
.
让我们把镜头拉回到函数
blk_init_queue_node().
这个函数我们曾经看过
,
所以这里只贴出其中跟我们这里密切相关的几行
:
1922
q->request_fn
= rfn;
1923
q->prep_rq_fn
= NULL;
1924
q->unplug_fn
= generic_unplug_device;
1925
q->queue_flags
= (1 << QUEUE_FLAG_CLUSTER);
1926
q->queue_lock
= lock;
1927
1928
blk_queue_segment_boundary(q, 0xffffffff);
1929
1930
blk_queue_make_request(q, __make_request);
首先
q->unplug_fn
被赋上了
generic_unplug_device.
这一点很重要
,
稍后会用到
.
然后来看
blk_queue_make_request().
这个函数
当时咱们并没有讲过
.
来自
block/ll_rw_block.c:
180 /**
181
* blk_queue_make_request - define an alternate make_request function for a device
182
* @q:
the request queue for the device to be affected
183
* @mfn: the alternate make_request function
184
*
185
* Description:
186
*
The normal way for &struct bios to be passed to a device
187
*
driver is for them to be collected into requests on a request
188
*
queue, and then to allow the device driver to select requests
189
*
off that queue when it is ready.
This works well for many block
190
*
devices. However some block devices (typically virtual devices
191
*
such as md or lvm) do not benefit from the processing on the
192
*
request queue, and are served best by having the requests passed
193
*
directly to them.
This can be achieved by providing a function
194
*
to blk_queue_make_request().
195
*
196
* Caveat:
197
*
The driver that does this *must* be able to deal appropriately
198
*
with buffers in "highmemory". This can be accomplished by either calling
199
*
__bio_kmap_atomic() to get a temporary kernel mapping, or by calling
200
*
blk_queue_bounce() to create a buffer in normal memory.
201
**/
202 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
203 {
204
/*
205
* set defaults
206
*/
207
q->nr_requests = BLKDEV_MAX_RQ;
208
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
209
blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
210
q->make_request_fn = mfn;
211
q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
212
q->backing_dev_info.state = 0;
213
q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
214
blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
215
blk_queue_hardsect_size(q, 512);
216
blk_queue_dma_alignment(q, 511);
217
blk_queue_congestion_threshold(q);
218
q->nr_batching = BLK_BATCH_REQ;
219
220
q->unplug_thresh = 4;
/* hmm */
221
q->unplug_delay = (3 * HZ) / 1000;
/* 3 milliseconds */
222
if (q->unplug_delay == 0)
223
q->unplug_delay = 1;
224
225
INIT_WORK(&q->unplug_work, blk_unplug_work);
226
227
q->unplug_timer.function = blk_unplug_timeout;
228
q->unplug_timer.data = (unsigned long)q;
229
230
/*
231
* by default assume old behaviour and bounce for any highmem page
232
*/
233
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
234 }
这里重点关注几个
”unplug”
为名字的成员
.
尤其是
INIT_WORK,
它使得一旦
unplug_work
这项工作被执行
,blk_unplug_work
这个函数就会被执行
.
而
unplug_timer
这么一赋值
,
我们就知道
,
一旦设了闹钟
,
一旦闹钟时间到了
,blk_unplug_timeout
这个函数就会被执行
.
并且因为这里设置了
unplug_delay
为
3ms,
使得闹钟的
timeout
就是
3ms,
一旦激活闹钟
,3ms
之后
blk_unplug_timeout
就会被执行
.
这个函数来自
block/ll_rw_blk.c:
1646 static void blk_unplug_timeout(unsigned long data)
1647 {
1648
request_queue_t *q = (request_queue_t *)data;
1649
1650
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
1651
q->rq.count[READ] + q->rq.count[WRITE]);
1652
1653
kblockd_schedule_work(&q->unplug_work);
1654 }
可以看到
,
其实就是执行
kblockd_schedule_work,
换言之
,
真正被调用的函数就是
blk_unplug_work().
1636 static void blk_unplug_work(struct work_struct *work)
1637 {
1638
request_queue_t *q = container_of(work, request_queue_t, unplug_work);
1639
1640
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1641
q->rq.count[READ] + q->rq.count[WRITE]);
1642
1643
q->unplug_fn(q);
1644 }
而刚才我们说了
,unplug_fn
被赋上了
generic_unplug_device.
所以真正要执行的是
generic_unplug_device.
而这个函数又长成什么样呢
?
1601 /**
1602
* generic_unplug_device - fire a request queue
1603
* @q:
The &request_queue_t in question
1604
*
1605
* Description:
1606
*
Linux uses plugging to build bigger requests queues before letting
1607
*
the device have at them. If a queue is plugged, the I/O scheduler
1608
*
is still adding and merging requests on the queue. Once the queue
1609
*
gets unplugged, the request_fn defined for the queue is invoked and
1610
*
transfers started.
1611
**/
1612 void generic_unplug_device(request_queue_t *q)
1613 {
1614
spin_lock_irq(q->queue_lock);
1615
__generic_unplug_device(q);
1616
spin_unlock_irq(q->queue_lock);
1617 }
哦
,
扭扭捏捏大半天
,
其实就是调用
__generic_unplug_device.
而回过头去看这个函数
,
我们知道
,
它也无非就是调用了两个函数
,blk_remove_plug
和
request_fn.
这下子我们基本上就明白了
.
总结一下就是
:
1.
blk_plug_device()
负责戒严
.
2.
blk_remove_plug()
负责解禁
.
3.
但是戒严这东西吧
,
也是有时间限制的
,
毕竟长安街就算有重大活动也是短时间的
,
一年中毕竟大多数时间还是得保证道路畅通
.
所以在戒严的时候
,
设了一个定时器
,unplug_timer, (
即
mod_timer
),
一旦时间到了就自动执行
blk_remove_plug
去解禁
.
4.
而在解禁的时候就不要忘记把这个定时器给关掉
.(
即
del_timer)
5.
解禁之后调用
request_fn()
开始处理队列中的下一个请求
,
或者说车流开始恢复前行
.
Ok,
这样我们就算是明白这两个戒严与解禁的函数了
.
最后
,
题外话
,
关于
unplug
和
plug,
我觉得更贴切的单词是
activate
和
deactivate,
或者说激活与冻结
,
或者简单的说
,
开与关
.
scsi_dispatch_cmd,
来自
drivers/scsi/scsi.c:
459 /*
460
* Function:
scsi_dispatch_command
461
*
462
* Purpose:
Dispatch a command to the low-level driver.
463
*
464
* Arguments:
cmd - command block we are dispatching.
465
*
466
* Notes:
467
*/
468 int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
469 {
470
struct Scsi_Host *host = cmd->device->host;
471
unsigned long flags = 0;
472
unsigned long timeout;
473
int rtn = 0;
474
475
/* check if the device is still usable */
476
if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
477
/* in SDEV_DEL we error all commands. DID_NO_CONNECT
478
* returns an immediate error upwards, and signals
479
* that the device is no longer present */
480
cmd->result = DID_NO_CONNECT << 16;
481
atomic_inc(&cmd->device->iorequest_cnt);
482
__scsi_done(cmd);
483
/* return 0 (because the command has been processed) */
484
goto out;
485
}
486
487
/* Check to see if the scsi lld put this device into state SDEV_BLOCK. */
488
if (unlikely(cmd->device->sdev_state == SDEV_BLOCK)) {
489
/*
490
* in SDEV_BLOCK, the command is just put back on the device
491
* queue.
The suspend state has already blocked the queue so
492
* future requests should not occur until the device
493
* transitions out of the suspend state.
494
*/
495
scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
496
497
SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked /n"));
498
499
/*
500
* NOTE: rtn is still zero here because we don't need the
501
* queue to be plugged on return (it's already stopped)
502
*/
503
goto out;
504
}
505
506
/*
507
* If SCSI-2 or lower, store the LUN value in cmnd.
508
*/
509
if (cmd->device->scsi_level <= SCSI_2 &&
510
cmd->device->scsi_level != SCSI_UNKNOWN) {
511
cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) |
512
(cmd->device->lun << 5 & 0xe0);
513
}
514
515
/*
516
* We will wait MIN_RESET_DELAY clock ticks after the last reset so
517
* we can avoid the drive not being ready.
518
*/
519
timeout = host->last_reset + MIN_RESET_DELAY;
520
521
if (host->resetting && time_before(jiffies, timeout)) {
522
int ticks_remaining = timeout - jiffies;
523
/*
524
* NOTE: This may be executed from within an interrupt
525
* handler!
This is bad, but for now, it'll do.
The irq
526
* level of the interrupt handler has been masked out by the
527
* platform dependent interrupt handling code already, so the
528
* sti() here will not cause another call to the SCSI host's
529
* interrupt handler (assuming there is one irq-level per
530
* host).
531
*/
532
while (--ticks_remaining >= 0)
533
mdelay(1 + 999 / HZ);
534
host->resetting = 0;
535
}
536
537
/*
538
* AK: unlikely race here: for some reason the timer could
539
* expire before the serial number is set up below.
540
*/
541
scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);
542
543
scsi_log_send(cmd);
544
545
/*
546
* We will use a queued command if possible, otherwise we will
547
* emulate the queuing and calling of completion function ourselves.
548
*/
549
atomic_inc(&cmd->device->iorequest_cnt);
550
551
/*
552
* Before we queue this command, check if the command
553
* length exceeds what the host adapter can handle.
554
*/
555
if (CDB_SIZE(cmd) > cmd->device->host->max_cmd_len) {
556
SCSI_LOG_MLQUEUE(3,
557
printk("queuecommand : command too long./n"));
558
cmd->result = (DID_ABORT << 16);
559
560
scsi_done(cmd);
561
goto out;
562
}
563
564
spin_lock_irqsave(host->host_lock, flags);
565
scsi_cmd_get_serial(host, cmd);
566
567
if (unlikely(host->shost_state == SHOST_DEL)) {
568
cmd->result = (DID_NO_CONNECT << 16);
569
scsi_done(cmd);
570
} else {
571
rtn = host->hostt->queuecommand(cmd, scsi_done);
572
}
573
spin_unlock_irqrestore(host->host_lock, flags);
574
if (rtn) {
575
if (scsi_delete_timer(cmd)) {
576
atomic_inc(&cmd->device->iodone_cnt);
577
scsi_queue_insert(cmd,
578
(rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
579
rtn : SCSI_MLQUEUE_HOST_BUSY);
580
}
581
SCSI_LOG_MLQUEUE(3,
582
printk("queuecommand : request rejected/n"));
583
}
584
585
out:
586
SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()/n"));
587
return rtn;
588 }
一路走来的兄弟一定会一眼就看出这里我们最期待的一行代码就是
571
那个
queuecommand()
的调用
.
因为这之后我们就知道该发生什么了
.
比如对于
U
盘驱动来说
,
命令就从这里接过去开始执行
.
而对于实际的
scsi
控制器
,
其对应的驱动中的
queuecommand
也会被调用
,
剩下的事情我们就不用操心了
.
正常情况下
queuecommand
返回
0.
于是紧接着
scsi_dispatch_cmd
也返回
0.
这样就算是执行了一条
scsi
命令了
.
而
scsi_request_fn()
是否结束还得看
while
循环的条件是否满足
,
而这就得看
blk_queue_plugged()
的脸色了
.
那么我们从字面上来分析
,
什么叫
queue plugged?
我那盗版金山词霸告诉我
plugged
就是塞紧的意思
,
你说队列塞紧的是什么意思
?
比如说
,
北四环上上下班高峰期
,
许许多多的车辆排成一队又一队
,
但是可能半天都前进不了
,
这就叫塞紧
,
或者说堵车
,
也叫塞车
.
为此咱们使用一个
flag
来标志堵车与否
,
来自
include/linux/blkdev.h:
523 #define blk_queue_plugged(q)
test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
改变这个这个
flag
的函数有两个
,
一个是设置
,
一个是取消
.
负责设置的是
blk_plug_device.
1542 /*
1543
* "plug" the device if there are no outstanding requests: this will
1544
* force the transfer to start only after we have put all the requests
1545
* on the list.
1546
*
1547
* This is called with interrupts off and no requests on the queue and
1548
* with the queue lock held.
1549
*/
1550 void blk_plug_device(request_queue_t *q)
1551 {
1552
WARN_ON(!irqs_disabled());
1553
1554
/*
1555
* don't plug a stopped queue, it must be paired with blk_start_queue()
1556
* which will restart the queueing
1557
*/
1558
if (blk_queue_stopped(q))
1559
return;
1560
1561
if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1562
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1563
blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
1564
}
1565 }
负责取消的是
blk_remove_plug().
1569 /*
1570
* remove the queue from the plugged list, if present. called with
1571
* queue lock held and interrupts disabled.
1572
*/
1573 int blk_remove_plug(request_queue_t *q)
1574 {
1575
WARN_ON(!irqs_disabled());
1576
1577
if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1578
return 0;
1579
1580
del_timer(&q->unplug_timer);
1581
return 1;
1582 }
而调用前者的地方不少
,
比如我们见到的
__elv_add_request,
其第四个参数
int plug
就可以控制是否调用
blk_plug_device(),
而当我们在
blk_execute_rq_nowait()
中调用
__elv_add_request()
的时候传递的
plug
就是
1.
另一方面
,
调用
blk_remove_plug
的地方也有多处
.
其中
__generic_unplug_device()
就是之一
.
所以在咱们这个上下文里
,
实际上并没有设置这个
flag,
因此
scsi_request_fn()
就会被执行
.
那么编写这两个函数究竟是为了什么呢
?
这年头
,
有人做贼
,
我可以理解是为了劫富济贫
,
有人杀人
,
我可以理解是为了伸张正义
,
甚至有女人红杏出墙
,
我还可以理解是为了繁荣经济
.
然而
,
很长一段时间我都没办法理解有人编写这两个函数是为了什么
?
后来我想
,
不妨这样理解
,
假设你经常开车经过长安街
,
你会发现经常有戒严的现象发生
,
比如某位领导人要出行
,
比如某位领导人要来访
,
而你可以把
blk_plug_device()
想象成戒严
,
把
blk_remove_plug
想象成开放
.
车流要想行进
,
前提条件是没有戒严
,
换言之
,
没有设卡
,
而
QUEUE_FLAG_PLUGGED
这个
flag
就相当于
”
卡
”,
设了它队列就不能前进了
,
没有设才有可能前进
.
之所以需要设卡
,
是因为确实有这个需求
,
有时候确实不想让队列前进
.
那么这里我们还看到两个函数被调用了
,mod_timer
和
del_timer,
这是干嘛使的
?
还记得
kblockd
么
?
最早咱们创建了那个工作队列
kblockd_workqueue,
现在是它该出场的时间了
.
让我们把镜头拉回到函数
blk_init_queue_node().
这个函数我们曾经看过
,
所以这里只贴出其中跟我们这里密切相关的几行
:
1922
q->request_fn
= rfn;
1923
q->prep_rq_fn
= NULL;
1924
q->unplug_fn
= generic_unplug_device;
1925
q->queue_flags
= (1 << QUEUE_FLAG_CLUSTER);
1926
q->queue_lock
= lock;
1927
1928
blk_queue_segment_boundary(q, 0xffffffff);
1929
1930
blk_queue_make_request(q, __make_request);
首先
q->unplug_fn
被赋上了
generic_unplug_device.
这一点很重要
,
稍后会用到
.
然后来看
blk_queue_make_request().
这个函数
当时咱们并没有讲过
.
来自
block/ll_rw_block.c:
180 /**
181
* blk_queue_make_request - define an alternate make_request function for a device
182
* @q:
the request queue for the device to be affected
183
* @mfn: the alternate make_request function
184
*
185
* Description:
186
*
The normal way for &struct bios to be passed to a device
187
*
driver is for them to be collected into requests on a request
188
*
queue, and then to allow the device driver to select requests
189
*
off that queue when it is ready.
This works well for many block
190
*
devices. However some block devices (typically virtual devices
191
*
such as md or lvm) do not benefit from the processing on the
192
*
request queue, and are served best by having the requests passed
193
*
directly to them.
This can be achieved by providing a function
194
*
to blk_queue_make_request().
195
*
196
* Caveat:
197
*
The driver that does this *must* be able to deal appropriately
198
*
with buffers in "highmemory". This can be accomplished by either calling
199
*
__bio_kmap_atomic() to get a temporary kernel mapping, or by calling
200
*
blk_queue_bounce() to create a buffer in normal memory.
201
**/
202 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
203 {
204
/*
205
* set defaults
206
*/
207
q->nr_requests = BLKDEV_MAX_RQ;
208
blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
209
blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
210
q->make_request_fn = mfn;
211
q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
212
q->backing_dev_info.state = 0;
213
q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
214
blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
215
blk_queue_hardsect_size(q, 512);
216
blk_queue_dma_alignment(q, 511);
217
blk_queue_congestion_threshold(q);
218
q->nr_batching = BLK_BATCH_REQ;
219
220
q->unplug_thresh = 4;
/* hmm */
221
q->unplug_delay = (3 * HZ) / 1000;
/* 3 milliseconds */
222
if (q->unplug_delay == 0)
223
q->unplug_delay = 1;
224
225
INIT_WORK(&q->unplug_work, blk_unplug_work);
226
227
q->unplug_timer.function = blk_unplug_timeout;
228
q->unplug_timer.data = (unsigned long)q;
229
230
/*
231
* by default assume old behaviour and bounce for any highmem page
232
*/
233
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
234 }
这里重点关注几个
”unplug”
为名字的成员
.
尤其是
INIT_WORK,
它使得一旦
unplug_work
这项工作被执行
,blk_unplug_work
这个函数就会被执行
.
而
unplug_timer
这么一赋值
,
我们就知道
,
一旦设了闹钟
,
一旦闹钟时间到了
,blk_unplug_timeout
这个函数就会被执行
.
并且因为这里设置了
unplug_delay
为
3ms,
使得闹钟的
timeout
就是
3ms,
一旦激活闹钟
,3ms
之后
blk_unplug_timeout
就会被执行
.
这个函数来自
block/ll_rw_blk.c:
1646 static void blk_unplug_timeout(unsigned long data)
1647 {
1648
request_queue_t *q = (request_queue_t *)data;
1649
1650
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
1651
q->rq.count[READ] + q->rq.count[WRITE]);
1652
1653
kblockd_schedule_work(&q->unplug_work);
1654 }
可以看到
,
其实就是执行
kblockd_schedule_work,
换言之
,
真正被调用的函数就是
blk_unplug_work().
1636 static void blk_unplug_work(struct work_struct *work)
1637 {
1638
request_queue_t *q = container_of(work, request_queue_t, unplug_work);
1639
1640
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1641
q->rq.count[READ] + q->rq.count[WRITE]);
1642
1643
q->unplug_fn(q);
1644 }
而刚才我们说了
,unplug_fn
被赋上了
generic_unplug_device.
所以真正要执行的是
generic_unplug_device.
而这个函数又长成什么样呢
?
1601 /**
1602
* generic_unplug_device - fire a request queue
1603
* @q:
The &request_queue_t in question
1604
*
1605
* Description:
1606
*
Linux uses plugging to build bigger requests queues before letting
1607
*
the device have at them. If a queue is plugged, the I/O scheduler
1608
*
is still adding and merging requests on the queue. Once the queue
1609
*
gets unplugged, the request_fn defined for the queue is invoked and
1610
*
transfers started.
1611
**/
1612 void generic_unplug_device(request_queue_t *q)
1613 {
1614
spin_lock_irq(q->queue_lock);
1615
__generic_unplug_device(q);
1616
spin_unlock_irq(q->queue_lock);
1617 }
哦
,
扭扭捏捏大半天
,
其实就是调用
__generic_unplug_device.
而回过头去看这个函数
,
我们知道
,
它也无非就是调用了两个函数
,blk_remove_plug
和
request_fn.
这下子我们基本上就明白了
.
总结一下就是
:
1.
blk_plug_device()
负责戒严
.
2.
blk_remove_plug()
负责解禁
.
3.
但是戒严这东西吧
,
也是有时间限制的
,
毕竟长安街就算有重大活动也是短时间的
,
一年中毕竟大多数时间还是得保证道路畅通
.
所以在戒严的时候
,
设了一个定时器
,unplug_timer, (
即
mod_timer
),
一旦时间到了就自动执行
blk_remove_plug
去解禁
.
4.
而在解禁的时候就不要忘记把这个定时器给关掉
.(
即
del_timer)
5.
解禁之后调用
request_fn()
开始处理队列中的下一个请求
,
或者说车流开始恢复前行
.
Ok,
这样我们就算是明白这两个戒严与解禁的函数了
.
最后
,
题外话
,
关于
unplug
和
plug,
我觉得更贴切的单词是
activate
和
deactivate,
或者说激活与冻结
,
或者简单的说
,
开与关
.
相关文章推荐
- Linux那些事儿之我是Block层(9)scsi命令的前世今生(三)
- Linux那些事儿之我是Block层(10)scsi命令的前世今生(四)
- Linux那些事儿之我是Block层(10)scsi命令的前世今生(四)
- Linux那些事儿之我是Block层(10)scsi命令的前世今生(四)
- Linux那些事儿之我是Block层(9)scsi命令的前世今生(三)
- Linux那些事儿之我是Block层(8)scsi命令的前世今生(二)
- Linux那些事儿之我是Block层(7)scsi命令的前世今生(一)
- Linux那些事儿之我是Block层(7)scsi命令的前世今生(一)
- Linux那些事儿之我是Block层(7)scsi命令的前世今生(一)
- Linux那些事儿之我是Block层(8)scsi命令的前世今生(二)
- Linux那些事儿之我是Block层(8)scsi命令的前世今生(二)
- Linux那些事儿之我是Block层(8)scsi命令的前世今生(二)
- 向usb cdrom发送SCSI 命令
- 对TRIM SCSI命令的一些分析
- SCSI 命令构造应用:USB指纹模块
- Linux系统SCSI磁盘扫描机制解析及命令实例(转)
- Scsi命令队列转换为ata命令过程
- scsi磁盘驱动中将request请求转化为scsi命令的过程(sd_prep_fn)
- Linux系统SCSI磁盘扫描机制解析及命令详细介绍