OpenStack Create Snapshot源码流程分析,理解创建快照的本质

源代码流程分析
1 nova/compute/api.py
# NOTE(melwitt): We don't check instance lock for snapshot because lock is 
# intended to prevent accidental change/delete of instances 
@wrap_check_policy 
@check_instance_cell 
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED, 
vm_states.PAUSED, vm_states.SUSPENDED]) 
def snapshot(self, context, instance, name, extra_properties=None): 
"""Snapshot the given instance. 
:param instance: nova.db.sqlalchemy.models.Instance 
:param name: name of the snapshot 
:param extra_properties: dict of extra image properties to include 
when creating the image. 
:returns: A dict containing image metadata 
""" 
#调用glance api创建image entry,为后将snapshot上传为镜像做准备,虽然镜像和snapshot在可以上传到glance作为镜像启动虚拟机, 
#但是为了区分二者的不同,glance将镜像和snapshot标记卫不同的类型:type=image 和 type=snapshot 
image_meta = self._create_image(context, instance, name, 
'snapshot', 
extra_properties=extra_properties) 
# NOTE(comstud): Any changes to this method should also be made 
# to the snapshot_instance() method in nova/cells/messaging.py 
# 将任务状态(task state) 设置为:image_snapshot_pending 
instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING 
instance.save(expected_task_state=[None]) 
#通过rpc调用nova/compute/rpcapi.py的snapshot_instance函数 
self.compute_rpcapi.snapshot_instance(context, instance, 
image_meta['id  
2 nova/compute/rpcapi.py
#梳理下流程: (1)用户发起create snapshot的请求; (2)nova-api服务接收到这个请求并进行前期处理,即3.1中代码的处理流程; 
(3)真正的snapshot操作是需要在nova-compute节点上执行的,所以nova-api需要向nova-compute发送message 
#由于OpenStack环境中会有很多个nova-compute,所以需要通过server=_compute_host(None, instance)来获取虚拟机所在的host,并向其发送message。 
def snapshot_instance(self, ctxt, instance, image_id): 
version = '3.0' 
cctxt = self.client.prepare(server=_compute_host(None, instance), 
version=version) 
cctxt.cast(ctxt, 'snapshot_instance', 
instance=instance, 
image_id=image_id) 
3 nova/virt/libvirt/driver.py 
def snapshot(self, context, instance, image_id, update_task_state): 
"""Create snapshot from a running VM instance. 
This command only works with qemu 0.14+ 
""" 
try: 
virt_dom = self._get_domain(instance) 
except exception.InstanceNotFound: 
raise exception.InstanceNotRunning(instance_id=instance['uuid']) 
base_image_ref = instance['image_ref'] 
base = compute_utils.get_image_metadata( 
context, self._image_api, base_image_ref, instance) 
snapshot = self._image_api.get(context, image_id) 
disk_path = libvirt_utils.find_disk(virt_dom) 
source_format = libvirt_utils.get_disk_type(disk_path) 
image_format = CONF.libvirt.snapshot_image_format or source_format 
# NOTE(bfilippov): save lvm and rbd as raw 
if image_format == 'lvm' or image_format == 'rbd': 
image_format = 'raw' 
metadata = self._create_snapshot_metadata(base, 
instance, 
image_format, 
snapshot['name']) 
snapshot_name = uuid.uuid4().hex 
state = LIBVIRT_POWER_STATE[virt_dom.info()[0]] 
# NOTE(rmk): Live snapshots require QEMU 1.3 and Libvirt 1.0.0. 
# These restrictions can be relaxed as other configurations 
# can be validated. 
# NOTE(dgenin): Instances with LVM encrypted ephemeral storage require 
# cold snapshots. Currently, checking for encryption is 
# redundant because LVM supports only cold snapshots. 
# It is necessary in case this situation changes in the 
# future. 
#这里需要注意,解释了为啥现在是cold snapshot而不是live snapshot: 
# 有人提过live snapshot的bug,社区认为live snapshot目前不稳定,所以默认条件下采用cold snapshot,并且是通过硬编码来实现的 
# 看下面这个判断条件,成立的时候将live_snapshot = true,其中MIN_LIBVIRT_LIVESNAPSHOT_VERSION=1.3.0, 其实现在libvirt的最新版本 
# 才到1.2.11, 所以这个live_snapshot的条件不满足,就变成了cold_snapshot 
if (self._host.has_min_version(MIN_LIBVIRT_LIVESNAPSHOT_VERSION, 
MIN_QEMU_LIVESNAPSHOT_VERSION, 
REQ_HYPERVISOR_LIVESNAPSHOT) 
and source_format not in ('lvm', 'rbd') 
and not CONF.ephemeral_storage_encryption.enabled): 
live_snapshot = True 
# Abort is an idempotent operation, so make sure any block 
# jobs which may have failed are ended. This operation also 
# confirms the running instance, as opposed to the system as a 
# whole, has a new enough version of the hypervisor (bug 1193146). 
try: 
virt_dom.blockJobAbort(disk_path, 0) 
except libvirt.libvirtError as ex: 
error_code = ex.get_error_code() 
if error_code == libvirt.VIR_ERR_CONFIG_UNSUPPORTED: 
live_snapshot = False 
else: 
pass 
else: 
live_snapshot = False 
# NOTE(rmk): We cannot perform live snapshots when a managedSave 
# file is present, so we will use the cold/legacy method 
# for instances which are shutdown. 
if state == power_state.SHUTDOWN: 
live_snapshot = False 
# NOTE(dkang): managedSave does not work for LXC 
#注意这里,如果live_snashot目前是false,所以在做snapshot之前先要执行: 
#(1)_detach_pci_devices, 卸载虚拟机挂载的pci设备,比如数据盘 
#(2) self._detach_sriov_ports, 卸载虚拟机挂载的SRIOV设备,比如支持SRIOV的网卡设备 
if CONF.libvirt.virt_type != 'lxc' and not live_snapshot: 
if state == power_state.RUNNING or state == power_state.PAUSED: 
self._detach_pci_devices(virt_dom, 
pci_manager.get_instance_pci_devs(instance)) 
self._detach_sriov_ports(instance, virt_dom) 
virt_dom.managedSave(0) 
#判断虚拟机的后端存储是什么,不同的后端存储做snapshot是不同的,本地文件系统的化,默认qcow2 
snapshot_backend = self.image_backend.snapshot(instance, 
disk_path, 
image_type=source_format) 
if live_snapshot: 
LOG.info(_LI("Beginning live snapshot process"), 
instance=instance) 
else: 
LOG.info(_LI("Beginning cold snapshot process"), 
instance=instance) 
#更新任务的状态为:image_pending_upload, 大家都知道做完snapshot要上传 
update_task_state(task_state=task_states.IMAGE_PENDING_UPLOAD) 
#目前做快照的过程是: 
#(1)现在../data/nova/instance/snapshots目录下生成临时目录,比如nova/instances/snapshots/tmptHr585 
#(2)然后将快照生成到这个目录,具体参见snapshot_backend.snapshot_extract(out_path, image_format)这个函数 
#(3)生成完成后,通过glance api上传,具体参见 self._image_api.update 
snapshot_directory = CONF.libvirt.snapshots_directory 
fileutils.ensure_tree(snapshot_directory) 
with utils.tempdir(dir=snapshot_directory) as tmpdir: 
try: 
out_path = os.path.join(tmpdir, snapshot_name) 
if live_snapshot: 
# NOTE(xqueralt): libvirt needs o+x in the temp directory 
os.chmod(tmpdir, 0o701) 
self._live_snapshot(context, instance, virt_dom, disk_path, 
out_path, image_format, base) 
else: 
#这个函数实际执行了一条命令: qemu-img convert -f qcow2 -O qcow2 disk_path out_path,算是生成了快照 
snapshot_backend.snapshot_extract(out_path, image_format) 
finally: 
new_dom = None 
# NOTE(dkang): because previous managedSave is not called 
# for LXC, _create_domain must not be called. 
if CONF.libvirt.virt_type != 'lxc' and not live_snapshot: 
if state == power_state.RUNNING: 
new_dom = self._create_domain(domain=virt_dom) ##恢复做快照之前虚拟机的状态 
elif state == power_state.PAUSED: 
new_dom = self._create_domain(domain=virt_dom, 
launch_flags=libvirt.VIR_DOMAIN_START_PAUSED) 
if new_dom is not None: 
self._attach_pci_devices(new_dom, 
pci_manager.get_instance_pci_devs(instance)) 
self._attach_sriov_ports(context, instance, new_dom) 
LOG.info(_LI("Snapshot extracted, beginning image upload"), 
instance=instance) 
# Upload that image to the image service 
update_task_state(task_state=task_states.IMAGE_UPLOADING, 
expected_state=task_states.IMAGE_PENDING_UPLOAD) 
with libvirt_utils.file_open(out_path) as image_file: ###将生成的快照上传到glance 
self._image_api.update(context, 
image_id, 
metadata, 
image_file) 
LOG.info(_LI("Snapshot image upload complete"), 
instance=instance)  
结论:
目前OpenStack默认的快照方式都是cold snapshot, 首先先关机,其次执行如下命令生成一个镜像文件,再次开机,最后再调用glance api将镜像上传。
qemu-img convert -f qcow2 -O qcow2 <disk_path> <out_path> 
所以目前并不是真正意义的快照,其实和关闭虚拟机,拷贝一份,再上传没有本质区别。

原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/57944.html

(0)
上一篇 2021年8月9日
下一篇 2021年8月9日

相关推荐

发表回复

登录后才能评论