您的位置:首页 > 运维架构 > 网站架构

1.1Qemu 用户态架构

2015-07-26 15:44 1701 查看
本节首先分析Qemu的初始化top level流程;从而引出Qemu各大功能模块的描述。最后分析Qemu与内核态KVM的通讯接口。

1.1.1Main的主流程

main– (vl.c function main)

a) module_call_init(MODULE_INIT_QOM);--设备驱动初始化和注册 type_init(x86_cpu_register_types)(target-i386/cpu.c)

b) module_call_init(MODULE_INIT_MACHINE); -- 机器类型注册初始化

machine_init(pc_machine_init)

c) socket_init

d) qemu_init_cpu_loop

e) configure_accelerator--tcg对KVM而言采用kvm type, 并调用kvm_init

accel_list[i].init();accel_list[] = {

{ "tcg", "tcg",tcg_available, tcg_init, &tcg_allowed },

{ "xen", "Xen",xen_available, xen_init, &xen_allowed },

{ "kvm","KVM", kvm_available, kvm_init, &kvm_allowed }, //open /dev/kvm

{ "qtest", "QTest",qtest_available, qtest_init, &qtest_allowed }, }

f) qemu_init_main_loop; –

qeume_mutex_lock

qemue_event_init

qemu_signal_init

g) qemu_init_cpu_loop

h) cpu_exec_init_all

1) memory_map_init建立系统内存的管理信息,第3章分析

memory_region_init

address_space_init

2) call io_mem_init èmemory_region_init_io

i) bdrv_init_with_whitelist();块设备类型初始化

block层的注册函数:block_init(bdrv_init)

j) blk_mig_init块设备migration功能初始化

k) qdev_machine_initQemu要使用的Machine信息初始化

l) machine->init(&args);调用machine初始化,建立虚拟机的硬件信息

对于PC 而言该函数是pc_init_pci_init(pc_piix.c);Machine type 在pc_machine_init中注册

m) init_displaystate --qemu 本身的display init

n) vm_start() 启动虚拟机,是vcpu开始执行

o) main_loop()

1.1.2Qemu 设备管理架构

(1) Qemu设备分类

Qemu将设备分为如下几类:

typedef enum {

MODULE_INIT_BLOCK, //存储设备 如:scsi ,qcow2

MODULE_INIT_MACHINE, //虚拟机目标机器类型

MODULE_INIT_QAPI,

MODULE_INIT_QOM, 虚拟机中设备类别

MODULE_INIT_MAX

} module_init_type;

不同类别的注册函数如下:

#define block_init(function)module_init(function, MODULE_INIT_BLOCK)

#define machine_init(function)module_init(function, MODULE_INIT_MACHINE)

#define qapi_init(function)module_init(function, MODULE_INIT_QAPI)

#define type_init(function)module_init(function, MODULE_INIT_QOM)

#define module_init(function, type) \

static void __attribute__((constructor)) do_qemu_init_## function(void) { \

register_module_init(function, type);

(2) Cpu类别

(i386-target/cpu.c)

static const TypeInfo x86_cpu_type_info = {

.name = TYPE_X86_CPU,

.parent = TYPE_CPU,

.instance_size = sizeof(X86CPU),

.instance_init= x86_cpu_initfn,

.abstract = false,

.class_size = sizeof(X86CPUClass),

.class_init = x86_cpu_common_class_init,

};

static void x86_cpu_register_types(void)

{

type_register_static(&x86_cpu_type_info); //注册到一个hasttable中

}

type_init(x86_cpu_register_types)

(target-i386/help.c)

X86CPU *cpu_x86_init(const char *cpu_model)

{

X86CPU *cpu;

.............

cpu = X86_CPU(object_new(TYPE_X86_CPU)); //根据Cpu类别创建cpu实例

..................

}

object的管理代码在(object.c中)

object_new ==> object_new_with_type==> object_initialize_with_type==>

type_initialize==>type_info->Class_init

object_initialize_with_type==> type_info-> instance_init

Qemu采用了类似面向对象的方式来管理虚拟机中的设备;2.1节将分析这种机制。

(3) Machine 类别:

(hw/pc_piix.c)

static QEMUMachine pc_machine_v1_3 = {

.name = "pc-1.3",

.alias = "pc",

.desc = "Standard PC",

.init = pc_init_pci,

.max_cpus = 255,

.is_default = 1,

.default_machine_opts = KVM_MACHINE_OPTIONS,

};

static void pc_machine_init(void)

{

qemu_register_machine(&pc_machine_v1_3);

.................

}

machine_init(pc_machine_init);

machine->init(main.c) ==> pc_init_pci==>pc_init1

(4) Object 与 Objectclass

Object用于记录设备对象的信息,而object class则记录设备的类别信息。

object_initialize_with_type ==> x86_cpu_initfn(Object * object)

struct Object

{

/*< private >*/

ObjectClass *class;//指向设备类别信息的指针

QTAILQ_HEAD(, ObjectProperty) properties; //一个object可以有多个属性

uint32_t ref;//应用计数

Object *parent; //指向设备的父类别的对象指针,用来实现继承关系

};

a. type_initialize 中将分配class:

ti->class =g_malloc0(ti->class_size);

ti->class->type = ti;

b. object_initialize_with_type 中 obj->class = type->class;

1.1.3Qemu 调用kvm内核模块流程

(1) KVM初始化

configure_accelerator--tcg ==> kvm_init

==>qemu_open("/dev/kvm", O_RDWR); //KVM访问句柄

==>kvm_ioctl(s, KVM_GET_API_VERSION, 0);

==>kvm_ioctl(s,KVM_CREATE_VM, 0); //创建virtual machine访问句柄

==> kvm_arch_init ==>

kvm_vm_ioctl(s,KVM_SET_IDENTITY_MAP_ADDR, &identity_base

kvm_vm_ioctl(s,KVM_SET_TSS_ADDR, identity_base + 0x1000)

kvm_vm_ioctl(s,KVM_SET_NR_MMU_PAGES, shadow_mem)

==>kvm_irqchip_create(s) ==> kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP)

(2) CPU虚拟化访问接口

pc_init1 ==> pc_cpus_init(hw/pc.c)==> pc_new_cpu ==>cpu_x86_init==>x86_cpu_realize(hw/helper.c)==>x86_cpu_realize(target-i386/cpus.c)==> qemu_init_vcpu==>qemu_kvm_start_vcpu

qemu_kvm_start_vcpu

static voidqemu_kvm_start_vcpu(CPUArchState *env)

{

CPUState *cpu = ENV_GET_CPU(env);

cpu->thread = g_malloc0(sizeof(QemuThread));

env->halt_cond = g_malloc0(sizeof(QemuCond));

qemu_cond_init(env->halt_cond);

qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,

QEMU_THREAD_JOINABLE);//启动线程qemu_kvm_cpu_thread_fn

while (env->created == 0) {

qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);

}

}

qemu_kvm_cpu_thread_fn => kvm_init_vcpu(kvm-all.c)

static void *qemu_kvm_cpu_thread_fn(void*arg)

{

CPUArchState *env = arg;

CPUState *cpu = ENV_GET_CPU(env);

int r;

qemu_mutex_lock(&qemu_global_mutex);

qemu_thread_get_self(cpu->thread);

env->thread_id = qemu_get_thread_id();

cpu_single_env = env;

r= kvm_init_vcpu(env);

if (r < 0) {

fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));

exit(1);

}

qemu_kvm_init_cpu_signals(env);

/* signal CPU creation */

env->created = 1;

qemu_cond_signal(&qemu_cpu_cond);

while (1) {

//如果VM处于Running状态 (main==>vm_start 回事vm进入running态)

if (cpu_can_run(env)) {

r = kvm_cpu_exec(env);

if (r == EXCP_DEBUG) {

cpu_handle_guest_debug(env);

}

}

qemu_kvm_wait_io_event(env);

}

return NULL;

}

int kvm_init_vcpu(CPUArchState *env)

{

.......

ret= kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index); //创建vcpu访问句柄

if (ret < 0) {

DPRINTF("kvm_create_vcpu failed\n");

goto err;

}

env->kvm_fd = ret;

env->kvm_state = s;

env->kvm_vcpu_dirty = 1;

mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE,0); //取得内核vcpu结构的大小

......

env->kvm_run = mmap(NULL, mmap_size,PROT_READ | PROT_WRITE, MAP_SHARED,

env->kvm_fd, 0); //映射内核vcpu结构

......

ret = kvm_arch_init_vcpu(env); //得到内核对cpu虚拟化支持的参数

......

}

int kvm_cpu_exec(CPUArchState *env)

{

......

do {

......

kvm_arch_pre_run(env, run);

if (env->exit_request) {

qemu_cpu_kick_self();

}

qemu_mutex_unlock_iothread();

//Cpu 进入 VM-Entry; 该函数返回表明Cpu进入VM-Exit 或 VM-Entry failed

run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);

qemu_mutex_lock_iothread();

......//

//根据Exit的原因处理VM-Exit

switch (run->exit_reason) {

case KVM_EXIT_IO: //VM Exit Cause by IO operation

kvm_handle_io(run->io.port,

(uint8_t *)run +run->io.data_offset,

run->io.direction,

run->io.size,

run->io.count);

ret = 0;

break;

case KVM_EXIT_MMIO: //VM Exit caused by MMIO

cpu_physical_memory_rw(run->mmio.phys_addr,

run->mmio.data,

run->mmio.len,

run->mmio.is_write);

ret = 0;

break;

case KVM_EXIT_IRQ_WINDOW_OPEN:

ret = EXCP_INTERRUPT;

break;

case KVM_EXIT_SHUTDOWN:

qemu_system_reset_request();

ret = EXCP_INTERRUPT;

break;

case KVM_EXIT_UNKNOWN:

fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64"\n",

(uint64_t)run->hw.hardware_exit_reason);

ret = -1;

break;

case KVM_EXIT_INTERNAL_ERROR: //cpu内部错误(异常)

ret = kvm_handle_internal_error(env,run);

break;

default:

ret = kvm_arch_handle_exit(env,run);

break;

}

}while (ret == 0); //处理完VM-Exit若虚拟机未Halt或严重错误, 继续准备再次VM-Entry

if (ret < 0) {

cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);

vm_stop(RUN_STATE_INTERNAL_ERROR);

}

env->exit_request = 0;

return ret;

}



小结Qemu访问KVM的句柄有:

(1) KVM访问句柄 (2)虚拟机访问句柄 (3) VCPU访问句柄

(2)内存虚拟化访问接口

内存管理结构初始化

kvm_init ==> memory_listener_register(&kvm_memory_listener,NULL);

static MemoryListener kvm_memory_listener ={

.begin = kvm_begin,

.commit = kvm_commit,

.region_add = kvm_region_add,

.region_del = kvm_region_del,

.region_nop = kvm_region_nop,

.log_start = kvm_log_start,

.log_stop = kvm_log_stop,

.log_sync = kvm_log_sync,

.log_global_start = kvm_log_global_start,

.log_global_stop = kvm_log_global_stop,

.eventfd_add = kvm_eventfd_add,

.eventfd_del = kvm_eventfd_del,

.priority = 10,

};

memory_region_add_subregion ==> listener_add_address_space ==>region_add

kvm_region_add==> kvm_set_phys_mem==>kvm_set_user_memory_region ==>

kvm_vm_ioctl(s,KVM_SET_USER_MEMORY_REGION, &mem);

虚拟机内存初始化:

pc_init1==>pc_memory_init==>memory_region_add_subregion(memory.c) 添加内存区域到虚拟机的内存管理结构; 第3章将分析内存虚拟化。

1.1.4Qemu IO管理

main==>cpu_exec_init_all()

void cpu_exec_init_all(void)

{

memory_map_init();

io_mem_init();

}

X86 有两种硬件访问方式PIO 与 MMIO, 下面分别讲解

(1) PIO

isa_cirrus_vga采用IO port方式访问

IO port的注册

vga_initfn (cirrus_vga.c)==》 cirrus_init_common ==》 register_ioport_read(ioport.c)

int register_ioport_read(pio_addr_t start,int length, int size,

IOPortReadFunc *func,void *opaque)

{

......

for(i = start; i < start + length; ++i) {

ioport_read_table[bsize][i] = func;

if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)

hw_error("register_ioport_read: invalid opaque for address0x%x",

i);

ioport_opaque[i] = opaque;

}

return 0;

}

当虚拟机由IO port引起VM-Exit时

kvm_handle_io==> cpu_inl (ioport.c)==> ioport_read

static uint32_t ioport_read(int index,uint32_t address)

{

static IOPortReadFunc * const default_func[3] = {

default_ioport_readb,

default_ioport_readw,

default_ioport_readl

};

IOPortReadFunc *func = ioport_read_table[index][address];

if (!func)

func = default_func[index];

return func(ioport_opaque[address], address);

}

(2)MMIO

cirrus_init_common (CirrusVGAState * s, intdevice_id, int is_pci,

MemoryRegion *system_memory)

{

........

memory_region_init(&s->low_mem_container,

"cirrus-lowmem-container",0x20000);

memory_region_init_io(&s->low_mem, &cirrus_vga_mem_ops, s,

"cirrus-low-memory", 0x20000);

memory_region_add_subregion(&s->low_mem_container, 0,&s->low_mem);

.......

}

定义mmio的read,write

static const MemoryRegionOpscirrus_vga_mem_ops = {

.read = cirrus_vga_mem_read,

.write = cirrus_vga_mem_write,

.endianness = DEVICE_LITTLE_ENDIAN,

.impl = {

.min_access_size = 1,

.max_access_size = 1,

},

};

当虚拟机由IO port引起VM-Exit时

cpu_physical_memory_rw(exec.c)==>io_mem_read(memory.c)==>

memory_region_dispatch_read==> access_with_adjusted_size

static uint64_tmemory_region_dispatch_read1(MemoryRegion *mr,

target_phys_addr_t addr,

unsigned size)

{

.......

access_with_adjusted_size(addr, &data, size,

mr->ops->impl.min_access_size,

mr->ops->impl.max_access_size,

memory_region_read_accessor, mr);

return data;

}

memory_region_read_accessor ==>mr->ops->read

第5.1节将详细介绍io的管理框架

1.1.5Qemu IO thread

IO thread 用来管理虚拟机的IO 读写,如对block设备的访问。5.4节将做详细介绍

int main_loop_wait(int nonblocking)

{

int ret;

uint32_t timeout = UINT32_MAX;

if (nonblocking) {

timeout = 0;

}else {

qemu_bh_update_timeout(&timeout);

}

/* poll any events */

/* XXX: separate device handlers from system ones */

nfds = -1;

FD_ZERO(&rfds);

FD_ZERO(&wfds);

FD_ZERO(&xfds);

#ifdef CONFIG_SLIRP

slirp_update_timeout(&timeout);

slirp_select_fill(&nfds, &rfds, &wfds, &xfds);

#endif

qemu_iohandler_fill(&nfds, &rfds, &wfds, &xfds);

ret = os_host_main_loop_wait(timeout);

qemu_iohandler_poll(&rfds, &wfds, &xfds, ret);

#ifdef CONFIG_SLIRP

slirp_select_poll(&rfds, &wfds, &xfds, (ret < 0));

#endif

qemu_run_all_timers();

/* Check bottom-halves last in case any of the earlier events triggered

them. */

qemu_bh_poll();

return ret;

}

Qemu中常用的IO描述符有下面几类:

· block io:虚拟磁盘相关的io,为了保证高性能,主要使用aio;
· qemu_notify_event
例子:qemu的时钟模拟利用了linux kernel的signalfd, 定期产生SIGALRM信号(qemu-timer.c;
· eventfd:主要用于qemu和kvm之间的notifier, 比如qemu的模拟设备可以通过notifier向kvm发送一个模拟中断,kvm也可以通过notifier向qemu报告guest的各种状态;

address_space_update_topology==>address_space_update_ioeventfds==>address_space_add_del_ioeventfds==>MEMORY_LISTENER_CALL==>eventfd_add(kvm_mem_ioeventfd_add)==>kvm_vm_ioctl(kvm_state,KVM_IOEVENTFD,
&iofd);

· socket:用于虚拟机迁移,qmp管理等
该函数同时还负责轮询系统中所有的定时器,并调用定时器的回调函数;

IO Handler
用来表示一个IO描述符,其结构定义如下;iohandler.c中定义了一个全局的链表io_handlers,并提供qemu_set_fd_handler()和qemu_set_fd_handler2()函数将一个fd加入到这个链表QLIST_INSERT_HEAD; 在IO thread主循环中qemu_iohandler_fill()函数负责将io_handlers链表中的所有描述符,加入select测试集合。

IO thread同步

Qemu IO thread和vcputhread使用一个全局共享线程锁来保证同步,函数qemu_mutex_lock_iothread()和qemu_mutex_unlock_iothread()分别用来获取和释放该锁

1.1.6 Qemu的模块

下面的表格是本系列文章将会分析到的代码和其对应的模块:

模块名与描述

文件

章节

参数管理与main函数

Vl.c

Qemu-config.c

Arch_init.c

Qemu-opt.c

1.1

8.3

Kvm访问接口层

Target-i386\Kvm.c

Kvm-all.c

1.2

2章

设备对象模型

Qdev.c;

qdev-propreties.c

module.c

2.1

Machine与cpu管理

Hw\pc_piix.c

Hw\pc.c

Target-i386\Machine.c

cpu_exec.c

第2章

中断与时间管理

Hw\kvm\(ioapic.c, i8259.c,i8254.c, apic.c clockc)

第4章

内存管理

Memory.c

Memory-mapping.c

Exec.c

第3章

硬件辅助虚拟化

Hw\(pci.c, pcie.c,pci-birdge.c,piix_pci.c)

Hw\ide\(core.c,pci.c,piix.c, piix.c)

5.1

5.2

5.3

半虚拟化

Hw\(virtio.c, virtio-pci.c, virtio-ballon.c)

6章

直接io

Hw\kvm\pci-assign.c)

7章

块设备

Block.c

Blockdev.c

Block\raw-posix.c

5.4

异步io

Aio.c

posix-aio-compact.c

iohandler.c

main-loop.c

5.4

字符设备

Qemu-char.c

8.1

管理模块

Qmp.c; hmp.c

qdev-monitor.c

Monitor.c

Vmsave.c

8.1

8.2

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: