forked from firecracker-microvm/firecracker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_snapshot_restore_cross_kernel.py
187 lines (150 loc) · 6.44 KB
/
test_snapshot_restore_cross_kernel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Test to restore snapshots across kernel versions."""
import json
import logging
import os
import pathlib
import re
import shutil
import pytest
from framework.artifacts import (
Artifact,
ArtifactType,
Snapshot,
create_net_devices_configuration,
)
from framework.builder import MicrovmBuilder
from framework.defs import DEFAULT_TEST_SESSION_ROOT_PATH, FC_WORKSPACE_DIR
from framework.utils import (
generate_mmds_get_request,
generate_mmds_session_token,
guest_run_fio_iteration,
populate_data_store,
)
from framework.utils_cpuid import CpuVendor, get_cpu_vendor
from framework.utils_vsock import check_vsock_device
from integration_tests.functional.test_balloon import (
MB_TO_PAGES,
get_stable_rss_mem_by_pid,
make_guest_dirty_memory,
)
# Define 4 net device configurations.
net_ifaces = create_net_devices_configuration(4)
def _test_balloon(microvm):
# Get the firecracker pid.
firecracker_pid = microvm.jailer_clone_pid
# Check memory usage.
first_reading = get_stable_rss_mem_by_pid(firecracker_pid)
# Dirty 300MB of pages.
make_guest_dirty_memory(microvm.ssh, amount=300 * MB_TO_PAGES)
# Check memory usage again.
second_reading = get_stable_rss_mem_by_pid(firecracker_pid)
assert second_reading > first_reading
# Inflate the balloon. Get back 200MB.
response = microvm.balloon.patch(amount_mib=200)
assert microvm.api_session.is_status_no_content(response.status_code)
third_reading = get_stable_rss_mem_by_pid(firecracker_pid)
# Ensure that there is a reduction in RSS.
assert second_reading > third_reading
def _get_snapshot_files_paths(snapshot_dir):
mem = vmstate = ssh_key = disk = None
for file in os.listdir(snapshot_dir):
file_path = os.path.join(Artifact.LOCAL_ARTIFACT_DIR, file)
if file.endswith(".mem"):
mem = file_path
elif file.endswith(".vmstate"):
vmstate = file_path
elif file.endswith(".id_rsa"):
ssh_key = Artifact(
None,
os.path.basename(file),
ArtifactType.SSH_KEY,
DEFAULT_TEST_SESSION_ROOT_PATH,
)
file_path = ssh_key.local_path()
pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
elif file.endswith(".ext4"):
disk = file_path
# Copy to default root session.
shutil.copy(os.path.join(snapshot_dir, file), file_path)
assert os.path.isfile(file_path)
# Ensure all required snapshot files are present inside the dir.
assert mem and vmstate and disk and ssh_key
# Change ssh key permissions.
os.chmod(ssh_key.local_path(), 0o400)
return mem, vmstate, disk, ssh_key
def _test_mmds(vm, mmds_net_iface):
# Populate MMDS.
data_store = {"latest": {"meta-data": {"ami-id": "ami-12345678"}}}
populate_data_store(vm, data_store)
mmds_ipv4_address = "169.254.169.254"
vm.ssh_config["hostname"] = mmds_net_iface.guest_ip
# Insert new rule into the routing table of the guest.
cmd = "ip route add {} dev {}".format(
mmds_net_iface.guest_ip, mmds_net_iface.dev_name
)
code, _, _ = vm.ssh.execute_command(cmd)
assert code == 0
# The base microVM had MMDS version 2 configured, which was persisted
# across the snapshot-restore.
token = generate_mmds_session_token(vm.ssh, mmds_ipv4_address, token_ttl=60)
cmd = generate_mmds_get_request(mmds_ipv4_address, token=token)
_, stdout, _ = vm.ssh.execute_command(cmd)
assert json.load(stdout) == data_store
@pytest.mark.nonci
@pytest.mark.parametrize(
"cpu_template",
["C3", "T2", "T2S", "None"] if get_cpu_vendor() == CpuVendor.INTEL else ["None"],
)
def test_snap_restore_from_artifacts(
bin_cloner_path, bin_vsock_path, test_fc_session_root_path, cpu_template
):
"""
Restore from snapshots obtained with all supported guest kernel versions.
The snapshot artifacts have been generated through the
`create_snapshot_artifacts` devtool command. The base microVM snapshotted
has been built from the config file at
~/firecracker/tools/create_snapshot_artifact/complex_vm_config.json.
"""
logger = logging.getLogger("cross_kernel_snapshot_restore")
builder = MicrovmBuilder(bin_cloner_path)
snapshot_root_name = "snapshot_artifacts"
snapshot_root_dir = os.path.join(FC_WORKSPACE_DIR, snapshot_root_name)
pathlib.Path(Artifact.LOCAL_ARTIFACT_DIR).mkdir(parents=True, exist_ok=True)
# Iterate through all subdirectories based on CPU template
# in the snapshot root dir.
subdir_filter = r".*_" + re.escape(cpu_template) + r"_guest_snapshot"
snap_subdirs = [
d for d in os.listdir(snapshot_root_dir) if re.match(subdir_filter, d)
]
for subdir_name in snap_subdirs:
snapshot_dir = os.path.join(snapshot_root_dir, subdir_name)
assert os.path.isdir(snapshot_dir)
logger.info("Working with snapshot artifacts in %s.", snapshot_dir)
mem, vmstate, disk, ssh_key = _get_snapshot_files_paths(snapshot_dir)
logger.info("Creating snapshot from artifacts...")
snapshot = Snapshot(mem, vmstate, [disk], net_ifaces, ssh_key)
logger.info("Loading microVM from snapshot...")
vm, _ = builder.build_from_snapshot(snapshot, resume=True, diff_snapshots=False)
# Ensure microVM is running.
response = vm.machine_cfg.get()
assert vm.api_session.is_status_ok(response.status_code)
assert vm.state == "Running"
# Test that net devices have connectivity after restore.
for iface in snapshot.net_ifaces:
logger.info("Testing net device %s...", iface.dev_name)
vm.ssh_config["hostname"] = iface.guest_ip
exit_code, _, _ = vm.ssh.execute_command("sync")
assert exit_code == 0
logger.info("Testing data store behavior...")
_test_mmds(vm, snapshot.net_ifaces[3])
logger.info("Testing balloon device...")
_test_balloon(vm)
logger.info("Testing vsock device...")
check_vsock_device(vm, bin_vsock_path, test_fc_session_root_path, vm.ssh)
# Run fio on the guest.
# TODO: check the result of FIO or use fsck to check that the root device is
# not corrupted. No obvious errors will be returned here.
guest_run_fio_iteration(vm.ssh, 0)
vm.kill()