Merge pull request #29 from nix-community/static_ips

kexec: restore static ips after kexec
This commit is contained in:
Jörg Thalheim 2022-11-27 20:05:22 +01:00 committed by GitHub
commit eb802f5652
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 345 additions and 83 deletions

View file

@ -29,8 +29,8 @@ build_kexec_bundle() {
build_kexec_installer() {
declare -r tag=$1 arch=$2 tmp=$3
# run the test once we have kvm support in github actions
# ignore=$(nix-build ./nix/kexec-installer-test.nix -I "nixpkgs=https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz" --argstr system "$arch")
out=$(nix-build '<nixpkgs/nixos>' -o "$tmp/kexec-installer-$arch" -I nixos-config=./nix/kexec-installer.nix -I "nixpkgs=https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz" --argstr system "$arch" -A config.system.build.kexecTarball)
# ignore=$(nix-build ./nix/kexec-installer/test.nix -I "nixpkgs=https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz" --argstr system "$arch")
out=$(nix-build '<nixpkgs/nixos>' -o "$tmp/kexec-installer-$arch" -I nixos-config=./nix/kexec-installer/module.nix -I "nixpkgs=https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz" --argstr system "$arch" -A config.system.build.kexecTarball)
echo "$out/nixos-kexec-installer-$arch.tar.gz"
}

View file

@ -1,75 +0,0 @@
{ pkgs ? import <nixpkgs> {} }:
let
makeTest = import (pkgs.path + "/nixos/tests/make-test-python.nix");
makeTest' = args: makeTest args {
inherit pkgs;
inherit (pkgs) system;
};
in makeTest' {
name = "kexec-installer";
meta = with pkgs.lib.maintainers; {
maintainers = [ mic92 ];
};
nodes = {
node1 = { ... }: {
virtualisation.vlans = [ ];
virtualisation.memorySize = 2 * 1024 + 512;
virtualisation.diskSize = 4 * 1024;
virtualisation.useBootLoader = true;
virtualisation.useEFIBoot = true;
boot.loader.systemd-boot.enable = true;
boot.loader.efi.canTouchEfiVariables = true;
services.openssh.enable = true;
};
node2 = { pkgs, modulesPath, ... }: {
virtualisation.vlans = [ ];
environment.systemPackages = [ pkgs.hello ];
imports = [
./kexec-installer.nix
];
};
};
testScript = { nodes, ... }: ''
# Test whether reboot via kexec works.
node1.wait_for_unit("multi-user.target")
node1.succeed('kexec --load /run/current-system/kernel --initrd /run/current-system/initrd --command-line "$(</proc/cmdline)"')
node1.execute("systemctl kexec >&2 &", check_return=False)
node1.connected = False
node1.connect()
node1.wait_for_unit("multi-user.target")
# Check if the machine with netboot-minimal.nix profile boots up
node2.wait_for_unit("multi-user.target")
node2.shutdown()
node1.wait_for_unit("sshd.service")
host_ed25519_before = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub")
node1.succeed('ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -q -N ""')
root_ed25519_before = node1.succeed('tee /root/.ssh/authorized_keys < /root/.ssh/id_ed25519.pub')
# Kexec node1 to the toplevel of node2 via the kexec-boot script
node1.succeed('touch /run/foo')
node1.fail('hello')
node1.succeed('tar -xf ${nodes.node2.config.system.build.kexecTarball}/nixos-kexec-installer-${pkgs.system}.tar.gz -C /root')
node1.execute('/root/kexec/run')
# wait for machine to kexec
node1.execute('sleep 9999', check_return=False)
node1.succeed('! test -e /run/foo')
node1.succeed('hello')
node1.succeed('[ "$(hostname)" = "node2" ]')
node1.wait_for_unit("sshd.service")
host_ed25519_after = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub")
assert host_ed25519_before == host_ed25519_after, f"{host_ed25519_before} != {host_ed25519_after}"
root_ed25519_after = node1.succeed("cat /root/.ssh/authorized_keys")
assert root_ed25519_before == root_ed25519_after, f"{root_ed25519_before} != {root_ed25519_after}"
node1.shutdown()
'';
}

View file

@ -1,5 +1,9 @@
{ config, lib, modulesPath, pkgs, ... }:
{
let
restoreNetwork = pkgs.writers.writePython3 "restore-network" {
flakeIgnore = ["E501"];
} ./restore_routes.py;
in {
imports = [
(modulesPath + "/installer/netboot/netboot-minimal.nix")
];
@ -32,17 +36,27 @@
for p in /etc/ssh/ssh_host_*; do
cp -a "$p" ssh
done
# save the networking config for later use
if type -p ip &>/dev/null; then
ip --json addr > addrs.json
ip -4 --json route > routes-v4.json
ip -6 --json route > routes-v6.json
else
echo "Skip saving static network addresses because no iproute2 binary is available." 2>&1
echo "The image can depends only on DHCP to get network after reboot!" 2>&1
fi
find | cpio -o -H newc | gzip -9 > ../extra.gz
popd
cat "''${SCRIPT_DIR}/initrd" extra.gz > final-initrd
cat extra.gz >> "''${SCRIPT_DIR}/initrd"
rm -r "$INITRD_TMP"
"$SCRIPT_DIR/kexec" --load "''${SCRIPT_DIR}/bzImage" \
--initrd=final-initrd \
--initrd="''${SCRIPT_DIR}/initrd" \
--command-line "init=${config.system.build.toplevel}/init ${toString config.boot.kernelParams}"
# kexec will map the new kernel in memory so we can remove the kernel at this point
rm -r "$INITRD_TMP"
# Disconnect our background kexec from the terminal
echo "machine will boot into nixos in in 6s..."
if [[ -e /dev/kmsg ]]; then
@ -76,17 +90,42 @@
# Not really needed. Saves a few bytes and the only service we are running is sshd, which we want to be reachable.
networking.firewall.enable = false;
systemd.network.enable = true;
networking.dhcpcd.enable = false;
# for detection if we are on kexec
environment.etc.is_kexec.text = "true";
systemd.services.restoreNetwork = {
before = [ "network-pre.target" ];
wants = [ "network-pre.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = [
"${restoreNetwork} /root/network/addrs.json /root/network/routes-v4.json /root/network/routes-v6.json /etc/systemd/network"
];
};
unitConfig.ConditionPathExists = [
"/root/network/addrs.json"
"/root/network/routes-v4.json"
"/root/network/routes-v6.json"
];
};
# Restore ssh host and user keys if they are available.
# This avoids warnings of unknown ssh keys.
boot.initrd.postMountCommands = ''
mkdir -m 700 -p /mnt-root/root/.ssh
mkdir -m 755 -p /mnt-root/etc/ssh
mkdir -m 755 -p /mnt-root/root/network
if [[ -f ssh/authorized_keys ]]; then
install -m 400 ssh/authorized_keys /mnt-root/root/.ssh
fi
install -m 400 ssh/ssh_host_* /mnt-root/etc/ssh
cp *.json /mnt-root/root/network/
'';
}

View file

@ -0,0 +1,111 @@
import json
import sys
from pathlib import Path
from typing import Any
def filter_interfaces(network: list[dict[str, Any]]) -> list[dict[str, Any]]:
output = []
for net in network:
if net.get("link_type") == "loopback":
continue
if not net.get("address"):
# We need a mac address to match devices reliable
continue
addr_info = []
has_dynamic_address = False
for addr in net["addr_info"]:
# no link-local ipv4/ipv6
if addr.get("scope") == "link":
continue
# do not explicitly configure addresses from dhcp or router advertisment
if addr.get("dynamic", False):
has_dynamic_address = True
continue
else:
addr_info.append(addr)
if addr_info != [] or has_dynamic_address:
net["addr_info"] = addr_info
output.append(net)
return output
def filter_routes(routes: list[dict[str, Any]]) -> list[dict[str, Any]]:
filtered = []
for route in routes:
# Filter out routes set by addresses with subnets, dhcp and router advertisment
if route.get("protocol") in ["dhcp", "kernel", "ra"]:
continue
filtered.append(route)
return filtered
def generate_networkd_units(
interfaces: list[dict[str, Any]], routes: list[dict[str, Any]], directory: Path
) -> None:
directory.mkdir(exist_ok=True)
for interface in interfaces:
name = f"{interface['ifname']}.network"
addresses = [
f"Address = {addr['local']}/{addr['prefixlen']}"
for addr in interface["addr_info"]
]
route_sections = []
for route in routes:
if route["dev"] != interface["ifname"]:
continue
route_section = "[Route]\n"
if route["dst"] != "default":
# can be skipped for default routes
route_section += f"Destination = {route['dst']}\n"
gateway = route.get("gateway")
if gateway:
route_section += f"Gateway = {gateway}\n"
# we may ignore on-link default routes here, but I don't see how
# they would be useful for internet connectivity anyway
route_sections.append(route_section)
# FIXME in some networks we might not want to trust dhcp or router advertisments
unit = f"""
[Match]
MACAddress = {interface["address"]}
[Network]
DHCP = yes
IPv6AcceptRA = yes
"""
unit += "\n".join(addresses)
unit += "\n" + "\n".join(route_sections)
(directory / name).write_text(unit)
def main() -> None:
if len(sys.argv) < 5:
print(
f"USAGE: {sys.argv[0]} addresses routes-v4 routes-v6 networkd-directory",
file=sys.stderr,
)
sys.exit(1)
with open(sys.argv[1]) as f:
addresses = json.load(f)
with open(sys.argv[2]) as f:
v4_routes = json.load(f)
with open(sys.argv[3]) as f:
v6_routes = json.load(f)
networkd_directory = Path(sys.argv[4])
relevant_interfaces = filter_interfaces(addresses)
relevant_routes = filter_routes(v4_routes) + filter_routes(v6_routes)
generate_networkd_units(relevant_interfaces, relevant_routes, networkd_directory)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,187 @@
{ pkgs ? import <nixpkgs> {} }:
let
makeTest = import (pkgs.path + "/nixos/tests/make-test-python.nix");
makeTest' = args: makeTest args {
inherit pkgs;
inherit (pkgs) system;
};
in makeTest' {
name = "kexec-installer";
meta = with pkgs.lib.maintainers; {
maintainers = [ mic92 ];
};
nodes = {
node1 = { modulesPath, ... }: {
virtualisation.vlans = [ 1 ];
environment.noXlibs = false; # avoid recompilation
imports = [
(modulesPath + "/profiles/minimal.nix")
];
virtualisation.memorySize = 2 * 1024 + 512;
virtualisation.diskSize = 4 * 1024;
virtualisation.useBootLoader = true;
virtualisation.useEFIBoot = true;
boot.loader.systemd-boot.enable = true;
boot.loader.efi.canTouchEfiVariables = true;
services.openssh.enable = true;
networking = {
useNetworkd = true;
useDHCP = false;
};
systemd.network = {
networks = {
# systemd-networkd will load the first network unit file
# that matches, ordered lexiographically by filename.
# /etc/systemd/network/{40-eth1,99-main}.network already
# exists. This network unit must be loaded for the test,
# however, hence why this network is named such.
"01-eth1" = {
name = "eth1";
address = [
# Some static addresses that we want to see in the kexeced image
"192.168.42.1/24"
"42::1/64"
];
routes = [
# Some static routes that we want to see in the kexeced image
{ routeConfig = { Destination = "192.168.43.0/24"; }; }
{ routeConfig = { Destination = "192.168.44.0/24"; Gateway = "192.168.43.1"; }; }
{ routeConfig = { Destination = "43::0/64"; }; }
{ routeConfig = { Destination = "44::1/64"; Gateway = "43::1"; }; }
];
networkConfig = {
DHCP = "yes";
IPv6AcceptRA = true;
};
};
};
};
};
node2 = { pkgs, modulesPath, ... }: {
environment.systemPackages = [ pkgs.hello ];
imports = [
./module.nix
];
};
router = { config, pkgs, ... }: {
virtualisation.vlans = [ 1 ];
networking = {
useNetworkd = true;
useDHCP = false;
firewall.enable = false;
};
systemd.network = {
networks = {
# systemd-networkd will load the first network unit file
# that matches, ordered lexiographically by filename.
# /etc/systemd/network/{40-eth1,99-main}.network already
# exists. This network unit must be loaded for the test,
# however, hence why this network is named such.
"01-eth1" = {
name = "eth1";
address = [
"2001:db8::1/64"
];
ipv6Prefixes = [
{ ipv6PrefixConfig = { Prefix = "2001:db8::/64"; AddressAutoconfiguration = true; OnLink = true; }; }
];
ipv6RoutePrefixes = [
{ ipv6RoutePrefixConfig = { Route = "::/0"; LifetimeSec = 3600; }; }
];
networkConfig = {
DHCPServer = true;
Address = "10.0.0.1/24";
IPv6SendRA = true;
};
dhcpServerConfig = {
PoolOffset = 100;
PoolSize = 1;
EmitRouter = true;
};
};
};
};
};
};
testScript = { nodes, ... }: ''
# Test whether reboot via kexec works.
router.wait_for_unit("network-online.target")
router.succeed("ip addr >&2")
router.succeed("ip route >&2")
router.succeed("ip -6 route >&2")
router.succeed("networkctl status eth1 >&2")
node1.wait_until_succeeds("ping -c1 10.0.0.1")
node1.wait_until_succeeds("ping -c1 2001:db8::1")
node1.succeed("ip addr >&2")
node1.succeed("ip route >&2")
node1.succeed("ip -6 route >&2")
node1.succeed("networkctl status eth1 >&2")
host_ed25519_before = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub")
node1.succeed('ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -q -N ""')
root_ed25519_before = node1.succeed('tee /root/.ssh/authorized_keys < /root/.ssh/id_ed25519.pub')
# Kexec node1 to the toplevel of node2 via the kexec-boot script
node1.succeed('touch /run/foo')
node1.fail('hello')
node1.succeed('tar -xf ${nodes.node2.config.system.build.kexecTarball}/nixos-kexec-installer-${pkgs.system}.tar.gz -C /root')
node1.execute('/root/kexec/run')
# wait for machine to kexec
node1.execute('sleep 9999', check_return=False)
node1.succeed('! test -e /run/foo')
node1.succeed('hello')
node1.succeed('[ "$(hostname)" = "node2" ]')
node1.wait_for_unit("sshd.service")
host_ed25519_after = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub")
assert host_ed25519_before == host_ed25519_after, f"{host_ed25519_before} != {host_ed25519_after}"
root_ed25519_after = node1.succeed("cat /root/.ssh/authorized_keys")
assert root_ed25519_before == root_ed25519_after, f"{root_ed25519_before} != {root_ed25519_after}"
# See if we can reach the router after kexec
node1.wait_for_unit("restoreNetwork.service")
node1.wait_until_succeeds("cat /etc/systemd/network/eth1.network >&2")
node1.wait_until_succeeds("ping -c1 10.0.0.1")
node1.wait_until_succeeds("ping -c1 2001:db8::1")
# Check if static addresses have been restored
node1.wait_until_succeeds("ping -c1 42::1")
node1.wait_until_succeeds("ping -c1 192.168.42.1")
out = node1.wait_until_succeeds("ip route get 192.168.43.2")
print(out)
assert "192.168.43.2 dev eth1" in out
out = node1.wait_until_succeeds("ip route get 192.168.44.2")
print(out)
assert "192.168.44.2 via 192.168.43.1" in out
out = node1.wait_until_succeeds("ip route get 43::2")
print(out)
assert "43::2 from :: dev eth1" in out
out = node1.wait_until_succeeds("ip route get 44::2")
print(out)
assert "44::2 from :: via 43::1" in out
node1.succeed("ip addr >&2")
node1.succeed("ip route >&2")
node1.succeed("ip -6 route >&2")
node1.succeed("networkctl status eth1 >&2")
node1.shutdown()
'';
}