您的位置:首页 > 运维架构 > Linux

【Linux4.1.12源码分析】virtio_net之中断注册

2016-11-22 22:28 871 查看
virtio_net作为虚拟网卡驱动,最主要的就是实现网路报文收发,对比物理网卡,当收到网络报文时会产生中断,由中断处理函数接收网络报文,这一点可以类比到virtio_net。实际上,virtio_net设备的中断注册是在驱动的probe函数中完成的。

virtnet_probe函数

static int virtnet_probe(struct virtio_device *vdev)
{
int i, err;
struct net_device *dev;
struct virtnet_info *vi;
u16 max_queue_pairs;

if (!vdev->config->get) {
dev_err(&vdev->dev, "%s failure: config access disabled\n",
__func__);
return -EINVAL;
}

if (!virtnet_validate_features(vdev))
return -EINVAL;

/* Find if host supports multiqueue virtio_net device */
err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
struct virtio_net_config,
max_virtqueue_pairs, &max_queue_pairs);

/* We need at least 2 queue's */
if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
max_queue_pairs = 1;

/* Allocate ourselves a network device with room for our info */
dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); //创建net_device设备
if (!dev)
return -ENOMEM;

/* Set up network device as normal. */
dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
dev->netdev_ops = &virtnet_netdev;
dev->features = NETIF_F_HIGHDMA;

dev->ethtool_ops = &virtnet_ethtool_ops;
SET_NETDEV_DEV(dev, &vdev->dev);

/* Do we support "hardware" checksums? */
if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
/* This opens up the world of extra features. */
dev->hw_features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
if (csum)
dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;

if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
| NETIF_F_TSO_ECN | NETIF_F_TSO6;
}
/* Individual feature bits: what can host handle? */
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
dev->hw_features |= NETIF_F_TSO;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
dev->hw_features |= NETIF_F_TSO6;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
dev->hw_features |= NETIF_F_TSO_ECN;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
dev->hw_features |= NETIF_F_UFO;

dev->features |= NETIF_F_GSO_ROBUST;

if (gso)
dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
/* (!csum && gso) case will be fixed by register_netdev() */
}
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
dev->features |= NETIF_F_RXCSUM;

dev->vlan_features = dev->features;

/* Configuration may specify what MAC to use. Otherwise random. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
virtio_cread_bytes(vdev,
offsetof(struct virtio_net_config, mac), //配置mac地址
dev->dev_addr, dev->addr_len);
else
eth_hw_addr_random(dev); //随机生成mac地址

/* Set up our device-specific information */
vi = netdev_priv(dev);
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
vi->stats = alloc_percpu(struct virtnet_stats);
err = -ENOMEM;
if (vi->stats == NULL)
goto free;

for_each_possible_cpu(i) {
struct virtnet_stats *virtnet_stats;
virtnet_stats = per_cpu_ptr(vi->stats, i);
u64_stats_init(&virtnet_stats->tx_syncp);
u64_stats_init(&virtnet_stats->rx_syncp);
}

INIT_WORK(&vi->config_work, virtnet_config_changed_work);

/* If we can receive ANY GSO packets, we must allocate large ones. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
vi->big_packets = true;

if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vi->mergeable_rx_bufs = true;

if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || //vhost默认支持这两个特性
virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
else
vi->hdr_len = sizeof(struct virtio_net_hdr);

if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || //vhost默认不支持这个特性
virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) //vhost默认支持这个特性
vi->any_header_sg = true;

if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
vi->has_cvq = true;

if (vi->any_header_sg)
dev->needed_headroom = vi->hdr_len;

/* Use single tx/rx queue pair as default */
vi->curr_queue_pairs = 1;
vi->max_queue_pairs = max_queue_pairs;

/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
err = init_vqs(vi); //初始化virtqueue
if (err)
goto free_stats;

#ifdef CONFIG_SYSFS
if (vi->mergeable_rx_bufs)
dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
#endif
netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);

err = register_netdev(dev);
if (err) {
pr_debug("virtio_net: registering device failed\n");
goto free_vqs;
}

virtio_device_ready(vdev);

/* Last of all, set up some receive buffers. */
for (i = 0; i < vi->curr_queue_pairs; i++) {
try_fill_recv(vi, &vi->rq[i], GFP_KERNEL);

/* If we didn't even get one input buffer, we're useless. */
if (vi->rq[i].vq->num_free ==
virtqueue_get_vring_size(vi->rq[i].vq)) {
free_unused_bufs(vi);
err = -ENOMEM;
goto free_recv_bufs;
}
}

vi->nb.notifier_call = &virtnet_cpu_callback;
err = register_hotcpu_notifier(&vi->nb);
if (err) {
pr_debug("virtio_net: registering cpu notifier failed\n");
goto free_recv_bufs;
}

/* Assume link up if device can't report link status,
otherwise get link status from config. */
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
netif_carrier_off(dev);
schedule_work(&vi->config_work);
} else {
vi->status = VIRTIO_NET_S_LINK_UP;
netif_carrier_on(dev);
}

pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
dev->name, max_queue_pairs);

return 0;

free_recv_bufs:
vi->vdev->config->reset(vdev);

free_receive_bufs(vi);
unregister_netdev(dev);
free_vqs:
cancel_delayed_work_sync(&vi->refill);
free_receive_page_frags(vi);
virtnet_del_vqs(vi);
free_stats:
free_percpu(vi->stats);
free:
free_netdev(dev);
return err;
}

init_vqs函数

static int init_vqs(struct virtnet_info *vi)
{
int ret;

/* Allocate send & receive queues */
ret = virtnet_alloc_queues(vi); //初始化vi的发送和接收队列
if (ret)
goto err;

ret = virtnet_find_vqs(vi); //初始化virtqueue
if (ret)
goto err_free;

get_online_cpus();
virtnet_set_affinity(vi);
put_online_cpus();

return 0;

err_free:
virtnet_free_queues(vi);
err:
return ret;
}virtnet_find_vqs函数
static int virtnet_find_vqs(struct virtnet_info *vi)
{
vq_callback_t **callbacks;
struct virtqueue **vqs;
int ret = -ENOMEM;
int i, total_vqs;
const char **names;

/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
* possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
* possible control vq.
*/
total_vqs = vi->max_queue_pairs * 2 +
virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); //vq数量等于收发队列+控制队列

/* Allocate space for find_vqs parameters */
vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL); //创建vq指针数据
if (!vqs)
goto err_vq;
callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL); //创建callback指针数组
if (!callbacks)
goto err_callback;
names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL); //创建指向字符串的指针数组
if (!names)
goto err_names;

/* Parameters for control virtqueue, if any */
if (vi->has_cvq) {
callbacks[total_vqs - 1] = NULL; //设置控制vq的callback和name
names[total_vqs - 1] = "control";
}

/* Allocate/initialize parameters for send/receive virtqueues */
for (i = 0; i < vi->max_queue_pairs; i++) {
callbacks[rxq2vq(i)] = skb_recv_done; //收包队列回调函数
callbacks[txq2vq(i)] = skb_xmit_done; //发包队列回调函数
sprintf(vi->rq[i].name, "input.%d", i);
sprintf(vi->sq[i].name, "output.%d", i);
names[rxq2vq(i)] = vi->rq[i].name; //收包队列名
names[txq2vq(i)] = vi->sq[i].name; //发包队列名
}

ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, //实际调用vp_find_vqs函数
names);
if (ret)
goto err_find;

if (vi->has_cvq) {
vi->cvq = vqs[total_vqs - 1]; //设置cvq对象
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}

for (i = 0; i < vi->max_queue_pairs; i++) {
vi->rq[i].vq = vqs[rxq2vq(i)]; //设置rq的vq对象
vi->sq[i].vq = vqs[txq2vq(i)]; //设置sq的vq对象
}

kfree(names);
kfree(callbacks);
kfree(vqs);

return 0;

err_find:
kfree(names);
err_names:
kfree(callbacks);
err_callback:
kfree(vq
bad0
s);
err_vq:
return ret;
}
vp_find_vqs函数
/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[])
{
int err;

/* Try MSI-X with one vector per queue. */
err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true); //能够成功
if (!err)
return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */
err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
true, false);
if (!err)
return 0;
/* Finally fall back to regular interrupts. */
return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
false, false);
}vp_try_to_find_vqs函数
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[],
bool use_msix,
bool per_vq_vectors)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u16 msix_vec;
int i, err, nvectors, allocated_vectors;

vp_dev->vqs = kmalloc(nvqs * sizeof *vp_dev->vqs, GFP_KERNEL); //创建二位指针数组,数量为nvqs
if (!vp_dev->vqs)
return -ENOMEM;

if (!use_msix) {
/* Old style: one normal interrupt for change and all vqs. */
err = vp_request_intx(vdev); //非msix中断,注册中断
if (err)
goto error_find;
} else {
if (per_vq_vectors) {
/* Best option: one for change interrupt, one per vq. */
nvectors = 1; //要多一个中断给配置使用
for (i = 0; i < nvqs; ++i)
if (callbacks[i])
++nvectors; //nvectors数为nvqs+1或nvqs
} else {
/* Second best: one for change, shared for all vqs. */
nvectors = 2;
}

err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
if (err)
goto error_find;
}

vp_dev->per_vq_vectors = per_vq_vectors;
allocated_vectors = vp_dev->msix_used_vectors;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
continue;
} else if (!callbacks[i] || !vp_dev->msix_enabled)
msix_vec = VIRTIO_MSI_NO_VECTOR;
else if (vp_dev->per_vq_vectors)
msix_vec = allocated_vectors++;
else
msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto error_find;
}

if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
continue;

/* allocate per-vq irq if available and necessary */
snprintf(vp_dev->msix_names[msix_vec],
sizeof *vp_dev->msix_names,
"%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
err = request_irq(vp_dev->msix_entries[msix_vec].vector, //实际进此分支,每个队列一个中断
vring_interrupt, 0,
vp_dev->msix_names[msix_vec],
vqs[i]);
if (err) {
vp_del_vq(vqs[i]);
goto error_find;
}
}
return 0;

error_find:
vp_del_vqs(vdev);
return err;
}

实际情况是virtio_net会为每个vq申请一个中断,中断处理函数为vring_interrupt。

vring_interrupt函数

irqreturn_t vring_interrupt(int irq, void *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);

if (!more_used(vq)) {
pr_debug("virtqueue interrupt with no work for %p\n", vq);
return IRQ_NONE;
}

if (unlikely(vq->broken))
return IRQ_HANDLED;

pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
if (vq->vq.callback)
vq->vq.callback(&vq->vq); //实际调用skb_recv_done或skb_xmit_done,收包队列为skb_recv_done

return IRQ_HANDLED;
}

skb_recv_done函数
static void skb_recv_done(struct virtqueue *rvq)
{
struct virtnet_info *vi = rvq->vdev->priv;
struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];

/* Schedule NAPI, Suppress further interrupts if successful. */
if (napi_schedule_prep(&rq->napi)) {
virtqueue_disable_cb(rvq);
__napi_schedule(&rq->napi); //添加napi,触发软中断
}
}

至此,我们可以看到设备probe过程中会注册中断,中断的处理函数就是触发软中断,同时会关闭中断,即后端不会再触发中断,由poll读取vring进行收包。这里还有一个疑问留到以后解决,后端vhost_add_used_and_signal_n是怎么转化为中断的。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息