456fe99fa6
Balloon driver scrub memory page before giving it back to the hypervisor. Normally this is a good thing, to avoid leaking VM's memory data into Xen and other domains. But during initial startup when maxmem is bigger than initial memory, on HVM and PVH, Populate-on-Demand (PoD) is in use. This means every page on initial balloon down needs to be first mapped by Xen into VM's memory (as it wasn't populated before - and in fact didn't have any data), scrubbed by the kernel and then given back to Xen. This is great waste of time. Such operation with default settings (initial memory 400M, maxmem 4000M) can take few seconds, delaying every VM startup (including DispVM). In extreme situation, when running inside nested virtualization, the effect is much worse. Avoid this problem by disabling memory scrubbing during initial boot, and re-enable it as soon as user space kicks in - in initramfs, before mounting root filesystem, to be sure it's enabled before memory contains any kind of secrets. This commit handle only one case - when kernel in managed by the VM itself. It is critical to enable initramfs module whenever xen_scrub_pages=0 kernel option is given, so make them depend on the same condition and ship them in the same package. Fixes QubesOS/qubes-issues#1963
113 lines
3.3 KiB
Bash
113 lines
3.3 KiB
Bash
#!/bin/sh
|
|
echo "Qubes initramfs script here:"
|
|
|
|
mkdir -p /proc /sys /dev
|
|
mount -t proc proc /proc
|
|
mount -t sysfs sysfs /sys
|
|
mount -t devtmpfs devtmpfs /dev
|
|
|
|
if [ -w /sys/devices/system/xen_memory/xen_memory0/scrub_pages ]; then
|
|
# re-enable xen-balloon pages scrubbing, after initial balloon down
|
|
echo 1 > /sys/devices/system/xen_memory/xen_memory0/scrub_pages
|
|
fi
|
|
|
|
if [ -e /dev/mapper/dmroot ] ; then
|
|
echo "Qubes: FATAL error: /dev/mapper/dmroot already exists?!"
|
|
fi
|
|
|
|
/sbin/modprobe xenblk || /sbin/modprobe xen-blkfront || echo "Qubes: Cannot load Xen Block Frontend..."
|
|
|
|
die() {
|
|
echo "$@" >&2
|
|
exit 1
|
|
}
|
|
|
|
echo "Waiting for /dev/xvda* devices..."
|
|
while ! [ -e /dev/xvda ]; do sleep 0.1; done
|
|
|
|
# prefer partition if exists
|
|
if [ -b /dev/xvda1 ]; then
|
|
if [ -d /dev/disk/by-partlabel ]; then
|
|
ROOT_DEV=$(basename $(readlink "/dev/disk/by-partlabel/Root\\x20filesystem"))
|
|
else
|
|
ROOT_DEV=$(grep -l "PARTNAME=Root filesystem" /sys/block/xvda/xvda*/uevent |\
|
|
grep -o "xvda[0-9]")
|
|
fi
|
|
if [ -z "$ROOT_DEV" ]; then
|
|
# fallback to third partition
|
|
ROOT_DEV=xvda3
|
|
fi
|
|
else
|
|
ROOT_DEV=xvda
|
|
fi
|
|
|
|
SWAP_SIZE=$(( 1024 * 1024 * 2 )) # sectors, 1GB
|
|
|
|
if [ `cat /sys/class/block/$ROOT_DEV/ro` = 1 ] ; then
|
|
echo "Qubes: Doing COW setup for AppVM..."
|
|
|
|
while ! [ -e /dev/xvdc ]; do sleep 0.1; done
|
|
VOLATILE_SIZE=$(cat /sys/class/block/xvdc/size) # sectors
|
|
ROOT_SIZE=$(cat /sys/class/block/$ROOT_DEV/size) # sectors
|
|
if [ $VOLATILE_SIZE -lt $SWAP_SIZE ]; then
|
|
die "volatile.img smaller than 1GB, cannot continue"
|
|
fi
|
|
/sbin/sfdisk -q --unit S /dev/xvdc >/dev/null <<EOF
|
|
1,$SWAP_SIZE,S
|
|
,,L
|
|
EOF
|
|
if [ $? -ne 0 ]; then
|
|
echo "Qubes: failed to setup partitions on volatile device"
|
|
exit 1
|
|
fi
|
|
while ! [ -e /dev/xvdc1 ]; do sleep 0.1; done
|
|
/sbin/mkswap /dev/xvdc1
|
|
while ! [ -e /dev/xvdc2 ]; do sleep 0.1; done
|
|
|
|
echo "0 `cat /sys/class/block/$ROOT_DEV/size` snapshot /dev/$ROOT_DEV /dev/xvdc2 N 16" | \
|
|
/sbin/dmsetup create dmroot || { echo "Qubes: FATAL: cannot create dmroot!"; exit 1; }
|
|
/sbin/dmsetup mknodes dmroot
|
|
echo Qubes: done.
|
|
else
|
|
echo "Qubes: Doing R/W setup for TemplateVM..."
|
|
while ! [ -e /dev/xvdc ]; do sleep 0.1; done
|
|
/sbin/sfdisk -q --unit S /dev/xvdc >/dev/null <<EOF
|
|
1,$SWAP_SIZE,S
|
|
EOF
|
|
if [ $? -ne 0 ]; then
|
|
die "Qubes: failed to setup partitions on volatile device"
|
|
fi
|
|
while ! [ -e /dev/xvdc1 ]; do sleep 0.1; done
|
|
/sbin/mkswap /dev/xvdc1
|
|
ln -s ../$ROOT_DEV /dev/mapper/dmroot
|
|
echo Qubes: done.
|
|
fi
|
|
|
|
/sbin/modprobe ext4
|
|
|
|
mkdir -p /sysroot
|
|
mount /dev/mapper/dmroot /sysroot -o ro
|
|
NEWROOT=/sysroot
|
|
|
|
kver="`uname -r`"
|
|
if ! [ -d "$NEWROOT/lib/modules/$kver/kernel" ]; then
|
|
echo "Waiting for /dev/xvdd device..."
|
|
while ! [ -e /dev/xvdd ]; do sleep 0.1; done
|
|
|
|
# Mount only `uname -r` subdirectory, to leave the rest of /lib/modules writable
|
|
mkdir -p /tmp/modules
|
|
mount -n -t ext3 /dev/xvdd /tmp/modules
|
|
if ! [ -d "$NEWROOT/lib/modules/$kver" ]; then
|
|
mount "$NEWROOT" -o remount,rw
|
|
mkdir -p "$NEWROOT/lib/modules/$kver"
|
|
mount "$NEWROOT" -o remount,ro
|
|
fi
|
|
mount --bind "/tmp/modules/$kver" "$NEWROOT/lib/modules/$kver"
|
|
umount /tmp/modules
|
|
rmdir /tmp/modules
|
|
fi
|
|
|
|
umount /dev /sys /proc
|
|
|
|
exec /sbin/switch_root $NEWROOT /sbin/init
|