diff --git a/build-images.sh b/build-images.sh index ed7f013..54e1c5f 100755 --- a/build-images.sh +++ b/build-images.sh @@ -29,8 +29,8 @@ build_kexec_bundle() { build_kexec_installer() { declare -r tag=$1 arch=$2 tmp=$3 # run the test once we have kvm support in github actions - # ignore=$(nix-build ./nix/kexec-installer-test.nix -I "nixpkgs=https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz" --argstr system "$arch") - out=$(nix-build '' -o "$tmp/kexec-installer-$arch" -I nixos-config=./nix/kexec-installer.nix -I "nixpkgs=https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz" --argstr system "$arch" -A config.system.build.kexecTarball) + # ignore=$(nix-build ./nix/kexec-installer/test.nix -I "nixpkgs=https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz" --argstr system "$arch") + out=$(nix-build '' -o "$tmp/kexec-installer-$arch" -I nixos-config=./nix/kexec-installer/module.nix -I "nixpkgs=https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz" --argstr system "$arch" -A config.system.build.kexecTarball) echo "$out/nixos-kexec-installer-$arch.tar.gz" } diff --git a/nix/kexec-installer-test.nix b/nix/kexec-installer-test.nix deleted file mode 100644 index 9b273cc..0000000 --- a/nix/kexec-installer-test.nix +++ /dev/null @@ -1,75 +0,0 @@ -{ pkgs ? import {} }: - -let - makeTest = import (pkgs.path + "/nixos/tests/make-test-python.nix"); - makeTest' = args: makeTest args { - inherit pkgs; - inherit (pkgs) system; - }; - -in makeTest' { - name = "kexec-installer"; - meta = with pkgs.lib.maintainers; { - maintainers = [ mic92 ]; - }; - - nodes = { - node1 = { ... }: { - virtualisation.vlans = [ ]; - virtualisation.memorySize = 2 * 1024 + 512; - virtualisation.diskSize = 4 * 1024; - virtualisation.useBootLoader = true; - virtualisation.useEFIBoot = true; - boot.loader.systemd-boot.enable = true; - boot.loader.efi.canTouchEfiVariables = true; - services.openssh.enable = true; - }; - - node2 = { pkgs, modulesPath, ... }: { - virtualisation.vlans = [ ]; - environment.systemPackages = [ pkgs.hello ]; - imports = [ - ./kexec-installer.nix - ]; - }; - }; - - testScript = { nodes, ... }: '' - # Test whether reboot via kexec works. - node1.wait_for_unit("multi-user.target") - node1.succeed('kexec --load /run/current-system/kernel --initrd /run/current-system/initrd --command-line "$(&2 &", check_return=False) - node1.connected = False - node1.connect() - node1.wait_for_unit("multi-user.target") - - # Check if the machine with netboot-minimal.nix profile boots up - node2.wait_for_unit("multi-user.target") - node2.shutdown() - - node1.wait_for_unit("sshd.service") - host_ed25519_before = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub") - - node1.succeed('ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -q -N ""') - root_ed25519_before = node1.succeed('tee /root/.ssh/authorized_keys < /root/.ssh/id_ed25519.pub') - # Kexec node1 to the toplevel of node2 via the kexec-boot script - node1.succeed('touch /run/foo') - node1.fail('hello') - node1.succeed('tar -xf ${nodes.node2.config.system.build.kexecTarball}/nixos-kexec-installer-${pkgs.system}.tar.gz -C /root') - node1.execute('/root/kexec/run') - # wait for machine to kexec - node1.execute('sleep 9999', check_return=False) - node1.succeed('! test -e /run/foo') - node1.succeed('hello') - node1.succeed('[ "$(hostname)" = "node2" ]') - node1.wait_for_unit("sshd.service") - - host_ed25519_after = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub") - assert host_ed25519_before == host_ed25519_after, f"{host_ed25519_before} != {host_ed25519_after}" - - root_ed25519_after = node1.succeed("cat /root/.ssh/authorized_keys") - assert root_ed25519_before == root_ed25519_after, f"{root_ed25519_before} != {root_ed25519_after}" - - node1.shutdown() - ''; -} diff --git a/nix/kexec-installer.nix b/nix/kexec-installer/module.nix similarity index 70% rename from nix/kexec-installer.nix rename to nix/kexec-installer/module.nix index 1a9151b..86f376d 100644 --- a/nix/kexec-installer.nix +++ b/nix/kexec-installer/module.nix @@ -1,5 +1,9 @@ { config, lib, modulesPath, pkgs, ... }: -{ +let + restoreNetwork = pkgs.writers.writePython3 "restore-network" { + flakeIgnore = ["E501"]; + } ./restore_routes.py; +in { imports = [ (modulesPath + "/installer/netboot/netboot-minimal.nix") ]; @@ -32,17 +36,27 @@ for p in /etc/ssh/ssh_host_*; do cp -a "$p" ssh done + + # save the networking config for later use + if type -p ip &>/dev/null; then + ip --json addr > addrs.json + + ip -4 --json route > routes-v4.json + ip -6 --json route > routes-v6.json + else + echo "Skip saving static network addresses because no iproute2 binary is available." 2>&1 + echo "The image can depends only on DHCP to get network after reboot!" 2>&1 + fi + find | cpio -o -H newc | gzip -9 > ../extra.gz popd - cat "''${SCRIPT_DIR}/initrd" extra.gz > final-initrd + cat extra.gz >> "''${SCRIPT_DIR}/initrd" + rm -r "$INITRD_TMP" "$SCRIPT_DIR/kexec" --load "''${SCRIPT_DIR}/bzImage" \ - --initrd=final-initrd \ + --initrd="''${SCRIPT_DIR}/initrd" \ --command-line "init=${config.system.build.toplevel}/init ${toString config.boot.kernelParams}" - # kexec will map the new kernel in memory so we can remove the kernel at this point - rm -r "$INITRD_TMP" - # Disconnect our background kexec from the terminal echo "machine will boot into nixos in in 6s..." if [[ -e /dev/kmsg ]]; then @@ -76,17 +90,42 @@ # Not really needed. Saves a few bytes and the only service we are running is sshd, which we want to be reachable. networking.firewall.enable = false; + systemd.network.enable = true; + networking.dhcpcd.enable = false; + # for detection if we are on kexec environment.etc.is_kexec.text = "true"; + systemd.services.restoreNetwork = { + before = [ "network-pre.target" ]; + wants = [ "network-pre.target" ]; + wantedBy = [ "multi-user.target" ]; + + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = [ + "${restoreNetwork} /root/network/addrs.json /root/network/routes-v4.json /root/network/routes-v6.json /etc/systemd/network" + ]; + }; + + unitConfig.ConditionPathExists = [ + "/root/network/addrs.json" + "/root/network/routes-v4.json" + "/root/network/routes-v6.json" + ]; + }; + # Restore ssh host and user keys if they are available. # This avoids warnings of unknown ssh keys. boot.initrd.postMountCommands = '' mkdir -m 700 -p /mnt-root/root/.ssh mkdir -m 755 -p /mnt-root/etc/ssh + mkdir -m 755 -p /mnt-root/root/network if [[ -f ssh/authorized_keys ]]; then install -m 400 ssh/authorized_keys /mnt-root/root/.ssh fi install -m 400 ssh/ssh_host_* /mnt-root/etc/ssh + cp *.json /mnt-root/root/network/ ''; } diff --git a/nix/kexec-installer/restore_routes.py b/nix/kexec-installer/restore_routes.py new file mode 100644 index 0000000..9578f10 --- /dev/null +++ b/nix/kexec-installer/restore_routes.py @@ -0,0 +1,111 @@ +import json +import sys +from pathlib import Path +from typing import Any + + +def filter_interfaces(network: list[dict[str, Any]]) -> list[dict[str, Any]]: + output = [] + for net in network: + if net.get("link_type") == "loopback": + continue + if not net.get("address"): + # We need a mac address to match devices reliable + continue + addr_info = [] + has_dynamic_address = False + for addr in net["addr_info"]: + # no link-local ipv4/ipv6 + if addr.get("scope") == "link": + continue + # do not explicitly configure addresses from dhcp or router advertisment + if addr.get("dynamic", False): + has_dynamic_address = True + continue + else: + addr_info.append(addr) + if addr_info != [] or has_dynamic_address: + net["addr_info"] = addr_info + output.append(net) + + return output + + +def filter_routes(routes: list[dict[str, Any]]) -> list[dict[str, Any]]: + filtered = [] + for route in routes: + # Filter out routes set by addresses with subnets, dhcp and router advertisment + if route.get("protocol") in ["dhcp", "kernel", "ra"]: + continue + filtered.append(route) + + return filtered + + +def generate_networkd_units( + interfaces: list[dict[str, Any]], routes: list[dict[str, Any]], directory: Path +) -> None: + directory.mkdir(exist_ok=True) + for interface in interfaces: + name = f"{interface['ifname']}.network" + addresses = [ + f"Address = {addr['local']}/{addr['prefixlen']}" + for addr in interface["addr_info"] + ] + + route_sections = [] + for route in routes: + if route["dev"] != interface["ifname"]: + continue + + route_section = "[Route]\n" + if route["dst"] != "default": + # can be skipped for default routes + route_section += f"Destination = {route['dst']}\n" + gateway = route.get("gateway") + if gateway: + route_section += f"Gateway = {gateway}\n" + + # we may ignore on-link default routes here, but I don't see how + # they would be useful for internet connectivity anyway + route_sections.append(route_section) + + # FIXME in some networks we might not want to trust dhcp or router advertisments + unit = f""" +[Match] +MACAddress = {interface["address"]} + +[Network] +DHCP = yes +IPv6AcceptRA = yes +""" + unit += "\n".join(addresses) + unit += "\n" + "\n".join(route_sections) + (directory / name).write_text(unit) + + +def main() -> None: + if len(sys.argv) < 5: + print( + f"USAGE: {sys.argv[0]} addresses routes-v4 routes-v6 networkd-directory", + file=sys.stderr, + ) + sys.exit(1) + + with open(sys.argv[1]) as f: + addresses = json.load(f) + with open(sys.argv[2]) as f: + v4_routes = json.load(f) + with open(sys.argv[3]) as f: + v6_routes = json.load(f) + + networkd_directory = Path(sys.argv[4]) + + relevant_interfaces = filter_interfaces(addresses) + relevant_routes = filter_routes(v4_routes) + filter_routes(v6_routes) + + generate_networkd_units(relevant_interfaces, relevant_routes, networkd_directory) + + +if __name__ == "__main__": + main() diff --git a/nix/kexec-installer/test.nix b/nix/kexec-installer/test.nix new file mode 100644 index 0000000..2d46285 --- /dev/null +++ b/nix/kexec-installer/test.nix @@ -0,0 +1,187 @@ +{ pkgs ? import {} }: + +let + makeTest = import (pkgs.path + "/nixos/tests/make-test-python.nix"); + makeTest' = args: makeTest args { + inherit pkgs; + inherit (pkgs) system; + }; + +in makeTest' { + name = "kexec-installer"; + meta = with pkgs.lib.maintainers; { + maintainers = [ mic92 ]; + }; + + nodes = { + node1 = { modulesPath, ... }: { + virtualisation.vlans = [ 1 ]; + environment.noXlibs = false; # avoid recompilation + imports = [ + (modulesPath + "/profiles/minimal.nix") + ]; + + virtualisation.memorySize = 2 * 1024 + 512; + virtualisation.diskSize = 4 * 1024; + virtualisation.useBootLoader = true; + virtualisation.useEFIBoot = true; + boot.loader.systemd-boot.enable = true; + boot.loader.efi.canTouchEfiVariables = true; + services.openssh.enable = true; + + networking = { + useNetworkd = true; + useDHCP = false; + }; + + systemd.network = { + networks = { + # systemd-networkd will load the first network unit file + # that matches, ordered lexiographically by filename. + # /etc/systemd/network/{40-eth1,99-main}.network already + # exists. This network unit must be loaded for the test, + # however, hence why this network is named such. + "01-eth1" = { + name = "eth1"; + address = [ + # Some static addresses that we want to see in the kexeced image + "192.168.42.1/24" + "42::1/64" + ]; + routes = [ + # Some static routes that we want to see in the kexeced image + { routeConfig = { Destination = "192.168.43.0/24"; }; } + { routeConfig = { Destination = "192.168.44.0/24"; Gateway = "192.168.43.1"; }; } + { routeConfig = { Destination = "43::0/64"; }; } + { routeConfig = { Destination = "44::1/64"; Gateway = "43::1"; }; } + ]; + networkConfig = { + DHCP = "yes"; + IPv6AcceptRA = true; + }; + }; + }; + }; + }; + + node2 = { pkgs, modulesPath, ... }: { + environment.systemPackages = [ pkgs.hello ]; + imports = [ + ./module.nix + ]; + }; + + router = { config, pkgs, ... }: { + virtualisation.vlans = [ 1 ]; + networking = { + useNetworkd = true; + useDHCP = false; + firewall.enable = false; + }; + systemd.network = { + networks = { + # systemd-networkd will load the first network unit file + # that matches, ordered lexiographically by filename. + # /etc/systemd/network/{40-eth1,99-main}.network already + # exists. This network unit must be loaded for the test, + # however, hence why this network is named such. + "01-eth1" = { + name = "eth1"; + address = [ + "2001:db8::1/64" + ]; + ipv6Prefixes = [ + { ipv6PrefixConfig = { Prefix = "2001:db8::/64"; AddressAutoconfiguration = true; OnLink = true; }; } + ]; + ipv6RoutePrefixes = [ + { ipv6RoutePrefixConfig = { Route = "::/0"; LifetimeSec = 3600; }; } + ]; + networkConfig = { + DHCPServer = true; + Address = "10.0.0.1/24"; + IPv6SendRA = true; + }; + dhcpServerConfig = { + PoolOffset = 100; + PoolSize = 1; + EmitRouter = true; + }; + }; + }; + }; + }; + + }; + + testScript = { nodes, ... }: '' + # Test whether reboot via kexec works. + + router.wait_for_unit("network-online.target") + router.succeed("ip addr >&2") + router.succeed("ip route >&2") + router.succeed("ip -6 route >&2") + router.succeed("networkctl status eth1 >&2") + + node1.wait_until_succeeds("ping -c1 10.0.0.1") + node1.wait_until_succeeds("ping -c1 2001:db8::1") + node1.succeed("ip addr >&2") + node1.succeed("ip route >&2") + node1.succeed("ip -6 route >&2") + node1.succeed("networkctl status eth1 >&2") + + host_ed25519_before = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub") + node1.succeed('ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -q -N ""') + root_ed25519_before = node1.succeed('tee /root/.ssh/authorized_keys < /root/.ssh/id_ed25519.pub') + + # Kexec node1 to the toplevel of node2 via the kexec-boot script + node1.succeed('touch /run/foo') + node1.fail('hello') + node1.succeed('tar -xf ${nodes.node2.config.system.build.kexecTarball}/nixos-kexec-installer-${pkgs.system}.tar.gz -C /root') + node1.execute('/root/kexec/run') + # wait for machine to kexec + node1.execute('sleep 9999', check_return=False) + node1.succeed('! test -e /run/foo') + node1.succeed('hello') + node1.succeed('[ "$(hostname)" = "node2" ]') + node1.wait_for_unit("sshd.service") + + host_ed25519_after = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub") + assert host_ed25519_before == host_ed25519_after, f"{host_ed25519_before} != {host_ed25519_after}" + + root_ed25519_after = node1.succeed("cat /root/.ssh/authorized_keys") + assert root_ed25519_before == root_ed25519_after, f"{root_ed25519_before} != {root_ed25519_after}" + + # See if we can reach the router after kexec + node1.wait_for_unit("restoreNetwork.service") + node1.wait_until_succeeds("cat /etc/systemd/network/eth1.network >&2") + node1.wait_until_succeeds("ping -c1 10.0.0.1") + node1.wait_until_succeeds("ping -c1 2001:db8::1") + + # Check if static addresses have been restored + node1.wait_until_succeeds("ping -c1 42::1") + node1.wait_until_succeeds("ping -c1 192.168.42.1") + + out = node1.wait_until_succeeds("ip route get 192.168.43.2") + print(out) + assert "192.168.43.2 dev eth1" in out + + out = node1.wait_until_succeeds("ip route get 192.168.44.2") + print(out) + assert "192.168.44.2 via 192.168.43.1" in out + + out = node1.wait_until_succeeds("ip route get 43::2") + print(out) + assert "43::2 from :: dev eth1" in out + + out = node1.wait_until_succeeds("ip route get 44::2") + print(out) + assert "44::2 from :: via 43::1" in out + + node1.succeed("ip addr >&2") + node1.succeed("ip route >&2") + node1.succeed("ip -6 route >&2") + node1.succeed("networkctl status eth1 >&2") + + node1.shutdown() + ''; +}