您的位置：首页 > 运维架构 > Linux
linux内核之块设备二---真正派发请求request

2016-07-23 13:09 701 查看
触发请求request派发的时机：

重新申请一个请求失败时
将一个带有unplug标记的bio添加到请求队列时
将请求添加调度器的调度队列，达到了unplug阈值时
unplug定时时间到，周期性

前三个调用__generic_unplug_device，最后一个调用generic_unplug_device

互斥机制：加锁，请求队列的锁

在初始化请求队列时，默认的块设备的请求下发接口设置为scsi_request_fn

/*真正派发请求request*/
void generic_unplug_device(struct request_queue *q)
{
if (blk_queue_plugged(q)) {
spin_lock_irq(q->queue_lock);
__generic_unplug_device(q);
spin_unlock_irq(q->queue_lock);
}
}

/*
* remove the plug and let it rip..
*/
void __generic_unplug_device(struct request_queue *q)
{
if (unlikely(blk_queue_stopped(q)))
return;
if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
return;

q->request_fn(q);
}

/*
* remove the queue from the plugged list, if present. called with
* queue lock held and interrupts disabled.
*/
int blk_remove_plug(struct request_queue *q)
{
WARN_ON(!irqs_disabled());

if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
return 0;

del_timer(&q->unplug_timer);
return 1;
}

/*
* Function:    scsi_request_fn()
*
* Purpose:     Main strategy routine for SCSI.
*
* Arguments:   q       - Pointer to actual queue.
*
* Returns:     Nothing
*
* Lock status: IO request lock assumed to be held when called.
*/
static void scsi_request_fn(struct request_queue *q)
{
struct scsi_device *sdev = q->queuedata;
struct Scsi_Host *shost;
struct scsi_cmnd *cmd;
struct request *req;

if(!get_device(&sdev->sdev_gendev))
/* We must be tearing the block queue down already */
return;

/*
* To start with, we keep looping until the queue is empty, or until
* the host is no longer able to accept any more requests.
*/
shost = sdev->host;
while (!blk_queue_plugged(q)) {
int rtn;
/*
* get next queueable request.  We do this early to make sure
* that the request is fully prepared even if we cannot
* accept it.
*/
req = blk_peek_request(q);               //从请求队列的派发队列获取一个请求，并为请求准备cmd
if (!req || !scsi_dev_queue_ready(q, sdev))
break;

if (unlikely(!scsi_device_online(sdev))) {
sdev_printk(KERN_ERR, sdev,
"rejecting I/O to offline device\n");
scsi_kill_request(req, q);
continue;
}

/*
* Remove the request from the request list.     从派发队列移除该请求，并为该请求添加一个定时器
*/
if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
blk_start_request(req);
sdev->device_busy++;

spin_unlock(q->queue_lock);
cmd = req->special;
if (unlikely(cmd == NULL)) {
printk(KERN_CRIT "impossible request in %s.\n"
"please mail a stack trace to "
"linux-scsi@vger.kernel.org\n",
__func__);
blk_dump_rq_flags(req, "foo");
BUG();
}
spin_lock(shost->host_lock);

/*
* We hit this when the driver is using a host wide
* tag map. For device level tag maps the queue_depth check
* in the device ready fn would prevent us from trying
* to allocate a tag. Since the map is a shared host resource
* we add the dev to the starved list so it eventually gets
* a run when a tag is freed.
*/
if (blk_queue_tagged(q) && !blk_rq_tagged(req)) {
if (list_empty(&sdev->starved_entry))
list_add_tail(&sdev->starved_entry,
&shost->starved_list);
goto not_ready;
}

if (!scsi_target_queue_ready(shost, sdev))
goto not_ready;

if (!scsi_host_queue_ready(q, shost, sdev))
goto not_ready;

scsi_target(sdev)->target_busy++;
shost->host_busy++;

/*
* XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
*        take the lock again.
*/
spin_unlock_irq(shost->host_lock);

/*
* Finally, initialize any error handling parameters, and set up
* the timers for timeouts.
*/
scsi_init_cmd_errh(cmd);     //初始化cmd

/*
* Dispatch the command to the low-level driver.
*/
rtn = scsi_dispatch_cmd(cmd);     //将cmd派发到底层的驱动
spin_lock_irq(q->queue_lock);
if(rtn) {
/* we're refusing the command; because of
* the way locks get dropped, we need to
* check here if plugging is required */
if(sdev->device_busy == 0)
blk_plug_device(q);

break;
}
}

goto out;

not_ready:
spin_unlock_irq(shost->host_lock);

/*
* lock q, handle tag, requeue req, and decrement device_busy. We
* must return with queue_lock held.
*
* Decrementing device_busy without checking it is OK, as all such
* cases (host limits or settings) should run the queue at some
* later time.
*/
spin_lock_irq(q->queue_lock);
blk_requeue_request(q, req);
sdev->device_busy--;
if(sdev->device_busy == 0)
blk_plug_device(q);
out:
/* must be careful here...if we trigger the ->remove() function
* we cannot be holding the q lock */
spin_unlock_irq(q->queue_lock);
put_device(&sdev->sdev_gendev);
spin_lock_irq(q->queue_lock);
}

struct request *blk_peek_request(struct request_queue *q)
{
struct request *rq;
int ret;

while ((rq = __elv_next_request(q)) != NULL) {
if (!(rq->cmd_flags & REQ_STARTED)) {
/*
* This is the first time the device driver
* sees this request (possibly after
* requeueing).  Notify IO scheduler.
*/
if (rq->cmd_flags & REQ_SORTED)
elv_activate_rq(q, rq);

/*
* just mark as started even if we don't start
* it, a request that has been delayed should
* not be passed by new incoming requests
*/
rq->cmd_flags |= REQ_STARTED;
trace_block_rq_issue(q, rq);
}

if (!q->boundary_rq || q->boundary_rq == rq) {
q->end_sector = rq_end_sector(rq);
q->boundary_rq = NULL;
}

if (rq->cmd_flags & REQ_DONTPREP)
break;

if (q->dma_drain_size && blk_rq_bytes(rq)) {
/*
* make sure space for the drain appears we
* know we can do this because max_hw_segments
* has been adjusted to be one fewer than the
* device can handle
*/
rq->nr_phys_segments++;
}

if (!q->prep_rq_fn)
break;

ret = q->prep_rq_fn(q, rq);     //为请求准备cmd
if (ret == BLKPREP_OK) {
break;
} else if (ret == BLKPREP_DEFER) {
/*
* the request may have been (partially) prepped.
* we need to keep this request in the front to
* avoid resource deadlock.  REQ_STARTED will
* prevent other fs requests from passing this one.
*/
if (q->dma_drain_size && blk_rq_bytes(rq) &&
!(rq->cmd_flags & REQ_DONTPREP)) {
/*
* remove the space for the drain we added
* so that we don't add it again
*/
--rq->nr_phys_segments;
}

rq = NULL;
break;
} else if (ret == BLKPREP_KILL) {
rq->cmd_flags |= REQ_QUIET;
/*
* Mark this request as started so we don't trigger
* any debug logic in the end I/O path.
*/
blk_start_request(rq);
__blk_end_request_all(rq, -EIO);
} else {
printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
break;
}
}

return rq;
}

/*获取下一个要处理的请求*/
static inline struct request *__elv_next_request(struct request_queue *q)
{
struct request *rq;

while (1) {
if (!list_empty(&q->queue_head)) {               //1、如果派发队列由请求，直接从派发队列头获取请求
rq = list_entry_rq(q->queue_head.next);
return rq;
}
/*
* Flush request is running and flush request isn't queueable
* in the drive, we can hold the queue till flush request is
* finished. Even we don't do this, driver can't dispatch next
* requests and will requeue them. And this can improve
* throughput too. For example, we have request flush1, write1,
* flush 2. flush1 is dispatched, then queue is hold, write1
* isn't inserted to queue. After flush1 is finished, flush2
* will be dispatched. Since disk cache is already clean,
* flush2 will be finished very soon, so looks like flush2 is
* folded to flush1.
* Since the queue is hold, a flag is set to indicate the queue
* should be restarted later. Please see flush_end_io() for
* details.
*/
if (q->flush_pending_idx != q->flush_running_idx &&
!queue_flush_queueable(q)) {
q->flush_queue_delayed = 1;
return NULL;
}
if (unlikely(blk_queue_dead(q)) ||
!q->elevator->ops->elevator_dispatch_fn(q, 0))          //2、如果派发队列没有请求，则调用调度器的派发请求回调，让调度器派发请求到派发队列后处理
return NULL;
}
}

void blk_start_request(struct request *req)
{
blk_dequeue_request(req);

/*
* We are now handing the request to the hardware, initialize
* resid_len to full count and add the timeout handler.
*/
req->resid_len = blk_rq_bytes(req);
if (unlikely(blk_bidi_rq(req)))
req->next_rq->resid_len = blk_rq_bytes(req->next_rq);

blk_add_timer(req);
}
内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理
标签： linux kernel
相关文章推荐
新的分享
章节导航