您的位置:首页 > 产品设计 > UI/UE

qemu的virtqueue实现

2016-04-27 16:27 176 查看
和内核一样,qemu也需要支持virtqueue,VirtQueue的定义如下

#define VIRTIO_PCI_VRING_ALIGN         4096

typedef struct VRingDesc
{
uint64_t addr;
uint32_t len;
uint16_t flags;
uint16_t next;
} VRingDesc;

typedef struct VRingAvail
{
uint16_t flags;
uint16_t idx;
uint16_t ring[0];
} VRingAvail;

typedef struct VRingUsedElem
{
uint32_t id;
uint32_t len;
} VRingUsedElem;

typedef struct VRingUsed
{
uint16_t flags;
uint16_t idx;
VRingUsedElem ring[0];
} VRingUsed;

typedef struct VRing
{
unsigned int num;
unsigned int align;
hwaddr desc;
hwaddr avail;
hwaddr used;
} VRing;

struct VirtQueue
{
VRing vring;  /* vring的元数据 */
hwaddr pa;  /* vring实际的内存地址 */
uint16_t last_avail_idx;
/* Last used index value we have signalled on */
uint16_t signalled_used;

/* Last used index value we have signalled on */
bool signalled_used_valid;

/* Notification enabled? */
bool notification;

uint16_t queue_index;

int inuse;

uint16_t vector;
void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
VirtIODevice *vdev;
EventNotifier guest_notifier;
EventNotifier host_notifier;
};
可以看出VRing结构体的定义,qemu和内核在ABI上是一致的。virtqueue_init用于初始化vring的元数据,同时qemu提供了一系列接口来读写vring的不同成员,e.g.

static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa,
int i)  /* 读取第i个VRingDesc的addr地址 */
{
hwaddr pa;
pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
return virtio_ldq_phys(vdev, pa);
}

static inline uint32_t vring_desc_len(VirtIODevice *vdev, hwaddr desc_pa, int i)  /* 读取第i个VRingDesc的len */
{
hwaddr pa;
pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
return virtio_ldl_phys(vdev, pa);
}

static inline uint16_t vring_desc_flags(VirtIODevice *vdev, hwaddr desc_pa,  /* 读取第i个VRingDesc的flags */
int i)
{
hwaddr pa;
pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
return virtio_lduw_phys(vdev, pa);
}

static inline uint16_t vring_desc_next(VirtIODevice *vdev, hwaddr desc_pa,  /* 读取第i个VRingDesc的next索引 */
int i)
{
hwaddr pa;
pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
return virtio_lduw_phys(vdev, pa);
}

static inline uint16_t vring_avail_flags(VirtQueue *vq) /* 读取avail ring的flags */
{
hwaddr pa;
pa = vq->vring.avail + offsetof(VRingAvail, flags);
return virtio_lduw_phys(vq->vdev, pa);
}

static inline uint16_t vring_avail_idx(VirtQueue *vq)  /* 读取avail ring的idx */
{
hwaddr pa;
pa = vq->vring.avail + offsetof(VRingAvail, idx);
return virtio_lduw_phys(vq->vdev, pa);
}

static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)  /* 读取avail ring的第i个idx */
{
hwaddr pa;
pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
return virtio_lduw_phys(vq->vdev, pa);
}

static inline uint16_t vring_used_event(VirtQueue *vq)  /* 读取avail ring中保存的used_event_idx */
{
return vring_avail_ring(vq, vq->vring.num);
}

static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)  /* 修改used ring中第i个elem的id */
{
hwaddr pa;
pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
virtio_stl_phys(vq->vdev, pa, val);
}

static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)  /* 修改used ring中第i个elem的len */
{
hwaddr pa;
pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
virtio_stl_phys(vq->vdev, pa, val);
}

static uint16_t vring_used_idx(VirtQueue *vq)  /* 读取used ring中的idx */
{
hwaddr pa;
pa = vq->vring.used + offsetof(VRingUsed, idx);
return virtio_lduw_phys(vq->vdev, pa);
}

static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)  /* 设置used ring中的idx */
{
hwaddr pa;
pa = vq->vring.used + offsetof(VRingUsed, idx);
virtio_stw_phys(vq->vdev, pa, val);
}

static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)  /* 设置used ring中flags的bit位 */
{
VirtIODevice *vdev = vq->vdev;
hwaddr pa;
pa = vq->vring.used + offsetof(VRingUsed, flags);
virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
}

static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)  /* 清理used ring中flags的bit位 */
{
VirtIODevice *vdev = vq->vdev;
hwaddr pa;
pa = vq->vring.used + offsetof(VRingUsed, flags);
virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
}
同时后端也提供了一系列接口来处理used ring,e.g.

virtqueue_pop主要用于从descriptor table中找到available ring中添加的buffer,即guest新添加并让后端处理的buffer

int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
{
unsigned int i, head, max;
hwaddr desc_pa = vq->vring.desc;
VirtIODevice *vdev = vq->vdev;

if (!virtqueue_num_heads(vq, vq->last_avail_idx)) /* 对比vring_avail_idx(vq)和vq->last_avail_idx,判断vq的avail idx是否有增长 */
return 0; /* 如果为0表示avail ring没有新的buffer,无需处理直接返回 */

/* When we start there are none of either input nor output. */
elem->out_num = elem->in_num = 0;

max = vq->vring.num;

i = head = virtqueue_get_head(vq, vq->last_avail_idx++); /* 从last_avail_idx开始,avail ring指向的vring desc entry索引 */
if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {  /* <span style="font-family: Arial, Helvetica, sans-serif;">如果guest enable VIRTIO_RING_F_EVENT_IDX  */</span>
vring_avail_event(vq, vring_avail_idx(vq));  /* 设置avail_event_idx为最新的avail ring idx值 */
}

if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) { /* 第i个desc的flags如果enable VRING_DESC_F_INDIRECT */
if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) { /* INDIRECT的desc len必须是sizeof(VRingDesc)的整数倍 */
error_report("Invalid size for indirect buffer table");
exit(1);
}

/* loop over the indirect descriptor table */
max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc); /* 最多遍历max个VRingDesc */
desc_pa = vring_desc_addr(vdev, desc_pa, i);  /* desc_pa指向indirect指向的VRingDesc数组 */
i = 0;
}

/* Collect all the descriptors */
do { /* 遍历VRingDesc的项,把addr, len填到VirtQueueElement结构体里 */
struct iovec *sg;

if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
error_report("Too many write descriptors in indirect table");
exit(1);
}
elem->in_addr[elem->in_num] = vring_desc_addr(vdev, desc_pa, i);
sg = &elem->in_sg[elem->in_num++];
} else {
if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
error_report("Too many read descriptors in indirect table");
exit(1);
}
elem->out_addr[elem->out_num] = vring_desc_addr(vdev, desc_pa, i);
sg = &elem->out_sg[elem->out_num++];
}

sg->iov_len = vring_desc_len(vdev, desc_pa, i); /* sg的iov_base部分被存放到in_addr, out_addr里 */

/* If we've got too many, that implies a descriptor loop. */
if ((elem->in_num + elem->out_num) > max) {
error_report("Looped descriptor");
exit(1);
}
} while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);  /* 遍历VRingDesc,直到max */

/* Now map what we have collected */
virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); /* 通过cpu_physical_memory_map把地址映射成HVA,存入sg->iov_base */
virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);

elem->index = head; /* index设置为VRingDesc head index */

vq->inuse++;

trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
return elem->in_num + elem->out_num;  /* 返回virtqueue_pop总共的VRingDesc个数, */
}
virtqueue_fill当virtio host端(qemu/vhost)处理完guest放入avail ring中的buffer之后,把buffer解除映射并放入used ring

void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len, unsigned int idx)
{
unsigned int offset;
int i;

trace_virtqueue_fill(vq, elem, len, idx);

offset = 0;
for (i = 0; i < elem->in_num; i++) { /* 取消sg_in的HVA内存映射 */
size_t size = MIN(len - offset, elem->in_sg[i].iov_len);

cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
elem->in_sg[i].iov_len,
1, size);

offset += size;
}

for (i = 0; i < elem->out_num; i++)  /* 取消sg_out的HVA内存映射 */
cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
elem->out_sg[i].iov_len,
0, elem->out_sg[i].iov_len);

idx = (idx + vring_used_idx(vq)) % vq->vring.num; /* 计算新的used ring idx值,通过idx + used_event_idx对vring.num取模 */

/* Get a pointer to the next entry in the used ring. */
vring_used_ring_id(vq, idx, elem->index);  /* 配置新的used ring项的内容,id是elem->index指向的VRingDesc的索引,len为其长度 */
vring_used_ring_len(vq, idx, len);
}
virtqueue_flush用于更新user ring的idx

void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
uint16_t old, new;
/* Make sure buffer is written before we update index. */
smp_wmb();
trace_virtqueue_flush(vq, count);
old = vring_used_idx(vq);
new = old + count;
vring_used_idx_set(vq, new);
vq->inuse -= count;
if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
vq->signalled_used_valid = false;  /* 是否触发used_event */
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: