From f3854ade971bfba5e57dc5b8a64263e124cd2f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Sun, 14 May 2023 21:04:01 +0200 Subject: [PATCH 1/6] flake.lock: Update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flake lock file updates: • Updated input 'nixos-2211': 'github:NixOS/nixpkgs/3d302c67ab8647327dba84fbdb443cdbf0e82744' (2023-04-19) → 'github:NixOS/nixpkgs/c6d2f3dc0d3efd4285eebe4f8a36a47ba438138e' (2023-05-14) • Updated input 'nixos-unstable': 'github:NixOS/nixpkgs/cafa2f02fbbcade5c5c257c190061da555d90913' (2023-04-19) → 'github:NixOS/nixpkgs/3e2728f51a8ed5287f2577737c6b5505d56de3e9' (2023-05-14) flake.lock: Update Flake lock file updates: • Updated input 'nixos-unstable': 'github:B4dM4n/nixpkgs/c51fe112cc73f87f282de24aa0fd929e72bb6ed0' (2023-05-12) → 'github:NixOS/nixpkgs/3007746b3f5bfcb49e102b517bca891822a41b31' (2023-05-14) --- flake.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flake.lock b/flake.lock index f3cc6b0..badd496 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixos-2211": { "locked": { - "lastModified": 1681932375, - "narHash": "sha256-tSXbYmpnKSSWpzOrs27ie8X3I0yqKA6AuCzCYNtwbCU=", + "lastModified": 1684141842, + "narHash": "sha256-sbdzOwBDcyzz/Dr1ztdF+tElMyM/cgx+4XxVgz+NLRM=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "3d302c67ab8647327dba84fbdb443cdbf0e82744", + "rev": "2eb0795720849ae19c068e39b17362d3ebcd585c", "type": "github" }, "original": { @@ -18,11 +18,11 @@ }, "nixos-unstable": { "locked": { - "lastModified": 1681914506, - "narHash": "sha256-frb95rhVUKAeRdHKfD2vbO1kv8U+G9JMAoLHLCRPNa4=", + "lastModified": 1684106623, + "narHash": "sha256-Fxw/lWpx+Cr1sJQ+1msdPBnrRuO0ll1eT48+ym0oqDg=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "cafa2f02fbbcade5c5c257c190061da555d90913", + "rev": "8fb30f6066a87a91c21241f1993a26ff57005486", "type": "github" }, "original": { From 8d32c7409c1d8ebcc26b890c595735dab52e069d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Sun, 14 May 2023 21:31:12 +0200 Subject: [PATCH 2/6] kexec-installer/test: use our kexecTarball directly before we were importing the configuration into a nixos module. Now we are actually using our unmodified tarball. This saves some rebuilds and is closer to what we ship. --- flake.nix | 8 +- nix/kexec-installer/ssh-keys/id_ed25519 | 7 + nix/kexec-installer/ssh-keys/id_ed25519.pub | 1 + nix/kexec-installer/test.nix | 229 ++++++++++---------- 4 files changed, 126 insertions(+), 119 deletions(-) create mode 100644 nix/kexec-installer/ssh-keys/id_ed25519 create mode 100644 nix/kexec-installer/ssh-keys/id_ed25519.pub diff --git a/flake.nix b/flake.nix index b62f6e0..b5595bc 100644 --- a/flake.nix +++ b/flake.nix @@ -40,14 +40,18 @@ checks.x86_64-linux = let pkgs = nixos-unstable.legacyPackages.x86_64-linux; in { - kexec-installer-unstable = pkgs.callPackage ./nix/kexec-installer/test.nix {}; + kexec-installer-unstable = pkgs.callPackage ./nix/kexec-installer/test.nix { + kexecTarball = self.packages.x86_64-linux.kexec-installer-nixos-unstable-noninteractive; + }; shellcheck = pkgs.runCommand "shellcheck" { nativeBuildInputs = [ pkgs.shellcheck ]; } '' shellcheck ${(pkgs.nixos [self.nixosModules.kexec-installer]).config.system.build.kexecRun} touch $out ''; - kexec-installer-2211 = nixos-2211.legacyPackages.x86_64-linux.callPackage ./nix/kexec-installer/test.nix {}; + kexec-installer-2211 = nixos-2211.legacyPackages.x86_64-linux.callPackage ./nix/kexec-installer/test.nix { + kexecTarball = self.packages.x86_64-linux.kexec-installer-nixos-2211-noninteractive; + }; }; }; } diff --git a/nix/kexec-installer/ssh-keys/id_ed25519 b/nix/kexec-installer/ssh-keys/id_ed25519 new file mode 100644 index 0000000..ae86675 --- /dev/null +++ b/nix/kexec-installer/ssh-keys/id_ed25519 @@ -0,0 +1,7 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +QyNTUxOQAAACA8wk9uIqPk7FZFhRs0ZQ4Q/b0Rd//Rpq2i9e3v33+WwgAAAJjeXdO33l3T +twAAAAtzc2gtZWQyNTUxOQAAACA8wk9uIqPk7FZFhRs0ZQ4Q/b0Rd//Rpq2i9e3v33+Wwg +AAAEBiNUp5mUe87gWrXbjd36dqt/6waDLdoYV1woR8in4ehDzCT24io+TsVkWFGzRlDhD9 +vRF3/9GmraL17e/ff5bCAAAAE2pvZXJnQHR1cmluZ21hY2hpbmUBAg== +-----END OPENSSH PRIVATE KEY----- diff --git a/nix/kexec-installer/ssh-keys/id_ed25519.pub b/nix/kexec-installer/ssh-keys/id_ed25519.pub new file mode 100644 index 0000000..a6d72ad --- /dev/null +++ b/nix/kexec-installer/ssh-keys/id_ed25519.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDzCT24io+TsVkWFGzRlDhD9vRF3/9GmraL17e/ff5bC joerg@turingmachine diff --git a/nix/kexec-installer/test.nix b/nix/kexec-installer/test.nix index 659edfe..b690cbf 100644 --- a/nix/kexec-installer/test.nix +++ b/nix/kexec-installer/test.nix @@ -1,4 +1,6 @@ -{ pkgs ? import {} }: +{ pkgs +, kexecTarball +}: let makeTest = import (pkgs.path + "/nixos/tests/make-test-python.nix"); @@ -6,8 +8,8 @@ let inherit pkgs; inherit (pkgs) system; }; - -in makeTest' { +in +makeTest' { name = "kexec-installer"; meta = with pkgs.lib.maintainers; { maintainers = [ mic92 ]; @@ -15,18 +17,20 @@ in makeTest' { nodes = { node1 = { modulesPath, ... }: { - virtualisation.vlans = [ 1 ]; + virtualisation.vlans = [ ]; environment.noXlibs = false; # avoid recompilation imports = [ (modulesPath + "/profiles/minimal.nix") ]; - virtualisation.memorySize = 1024 + 512; + system.extraDependencies = [ kexecTarball ]; + virtualisation.memorySize = 1 * 1024 + 512; virtualisation.diskSize = 4 * 1024; - virtualisation.useBootLoader = true; - virtualisation.useEFIBoot = true; - boot.loader.systemd-boot.enable = true; - boot.loader.efi.canTouchEfiVariables = true; + virtualisation.forwardPorts = [{ + host.port = 2222; + guest.port = 22; + }]; + services.openssh.enable = true; networking = { @@ -34,6 +38,8 @@ in makeTest' { useDHCP = false; }; + users.users.root.openssh.authorizedKeys.keyFiles = [ ./ssh-keys/id_ed25519.pub ]; + systemd.network = { networks = { # systemd-networkd will load the first network unit file @@ -41,12 +47,12 @@ in makeTest' { # /etc/systemd/network/{40-eth1,99-main}.network already # exists. This network unit must be loaded for the test, # however, hence why this network is named such. - "01-eth1" = { - name = "eth1"; + + "01-eth0" = { + name = "eth0"; address = [ # Some static addresses that we want to see in the kexeced image - "192.168.42.1/24" - "42::1/64" + "192.168.42.1/24" "42::1/64" ]; routes = [ # Some static routes that we want to see in the kexeced image @@ -55,138 +61,127 @@ in makeTest' { { routeConfig = { Destination = "43::0/64"; }; } { routeConfig = { Destination = "44::1/64"; Gateway = "43::1"; }; } ]; - networkConfig = { - DHCP = "yes"; - IPv6AcceptRA = true; - }; + networkConfig = { DHCP = "yes"; IPv6AcceptRA = true; }; }; }; }; }; - - node2 = { pkgs, modulesPath, ... }: { - environment.systemPackages = [ pkgs.hello ]; - imports = [ - ./module.nix - ../noninteractive.nix - ]; - }; - - router = { config, pkgs, ... }: { - virtualisation.vlans = [ 1 ]; - networking = { - useNetworkd = true; - useDHCP = false; - firewall.enable = false; - }; - systemd.network = { - networks = { - # systemd-networkd will load the first network unit file - # that matches, ordered lexiographically by filename. - # /etc/systemd/network/{40-eth1,99-main}.network already - # exists. This network unit must be loaded for the test, - # however, hence why this network is named such. - "01-eth1" = { - name = "eth1"; - address = [ - "2001:db8::1/64" - ]; - ipv6Prefixes = [ - { ipv6PrefixConfig = { Prefix = "2001:db8::/64"; AddressAutoconfiguration = true; OnLink = true; }; } - ]; - # does not work in 22.11 - #ipv6RoutePrefixes = [ { ipv6RoutePrefixConfig = { Route = "::/0"; LifetimeSec = 3600; }; }]; - extraConfig = '' - [IPv6RoutePrefix] - Route = ::/0 - LifetimeSec = 3600 - ''; - networkConfig = { - DHCPServer = true; - Address = "10.0.0.1/24"; - IPv6SendRA = true; - }; - dhcpServerConfig = { - PoolOffset = 100; - PoolSize = 1; - EmitRouter = true; - }; - }; - }; - }; - }; - }; - testScript = { nodes, ... }: '' - # Test whether reboot via kexec works. + testScript = '' + import time + import subprocess + import socket + import http.server + from threading import Thread + from typing import Optional - router.wait_for_unit("network-online.target") - router.succeed("ip addr >&2") - router.succeed("ip route >&2") - router.succeed("ip -6 route >&2") - router.succeed("networkctl status eth1 >&2") + start_all() + + class DualStackServer(http.server.HTTPServer): + def server_bind(self): + self.socket.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) + return super().server_bind() + DualStackServer.address_family = socket.AF_INET6 + httpd = DualStackServer(("::", 0), http.server.SimpleHTTPRequestHandler) + + http.server.HTTPServer.address_family = socket.AF_INET6 + port = httpd.server_port + def serve_forever(httpd): + with httpd: + httpd.serve_forever() + thread = Thread(target=serve_forever, args=(httpd, )) + thread.setDaemon(True) + thread.start() + + node1.wait_until_succeeds(f"curl -v -I http://10.0.2.2:{port}") + node1.wait_until_succeeds(f"curl -v -I http://[fec0::2]:{port}") - node1.wait_until_succeeds("ping -c1 10.0.0.1") - node1.wait_until_succeeds("ping -c1 2001:db8::1") node1.succeed("ip addr >&2") node1.succeed("ip route >&2") node1.succeed("ip -6 route >&2") - node1.succeed("networkctl status eth1 >&2") + node1.succeed("networkctl status eth0 >&2") - host_ed25519_before = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub") + def ssh(cmd: list[str], check: bool = True, stdout: Optional[int] = None) -> subprocess.CompletedProcess: + ssh_cmd = [ + "${pkgs.openssh}/bin/ssh", + "-o", "StrictHostKeyChecking=no", + "-o", "ConnectTimeout=1", + "-i", "${./ssh-keys/id_ed25519}", + "-p", "2222", + "root@127.0.0.1", + "--" + ] + cmd + print(" ".join(ssh_cmd)) + return subprocess.run(ssh_cmd, + text=True, + check=check, + stdout=stdout) + + + while not ssh(["true"], check=False).returncode == 0: + time.sleep(1) + ssh(["cp", "--version"]) + + host_ed25519_before = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub").strip() node1.succeed('ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -q -N ""') - root_ed25519_before = node1.succeed('tee /root/.ssh/authorized_keys < /root/.ssh/id_ed25519.pub') + root_ed25519_before = node1.succeed('tee /root/.ssh/authorized_keys < /root/.ssh/id_ed25519.pub').strip() # Kexec node1 to the toplevel of node2 via the kexec-boot script node1.succeed('touch /run/foo') - node1.fail('hello') - node1.succeed('tar -xf ${nodes.node2.system.build.kexecTarball}/nixos-kexec-installer-${pkgs.system}.tar.gz -C /root') - node1.execute('/root/kexec/run') - # wait for machine to kexec - node1.execute('sleep 9999', check_return=False) - node1.succeed('! test -e /run/foo') - node1.succeed('hello') - node1.succeed('[ "$(hostname)" = "node2" ]') - node1.wait_for_unit("sshd.service") + node1.fail('parted --version >&2') + node1.succeed('tar -xf ${kexecTarball}/nixos-kexec-installer-noninteractive-${pkgs.system}.tar.gz -C /root') + node1.execute('/root/kexec/run >&2') - host_ed25519_after = node1.succeed("cat /etc/ssh/ssh_host_ed25519_key.pub") - assert host_ed25519_before == host_ed25519_after, f"{host_ed25519_before} != {host_ed25519_after}" + # wait for kexec to finish + while ssh(["true"], check=False).returncode == 0: + print("Waiting for kexec to finish...") + time.sleep(1) - root_ed25519_after = node1.succeed("cat /root/.ssh/authorized_keys") - assert root_ed25519_before == root_ed25519_after, f"{root_ed25519_before} != {root_ed25519_after}" + while ssh(["true"], check=False).returncode != 0: + print("Waiting for node2 to come up...") + time.sleep(1) - # See if we can reach the router after kexec - node1.wait_for_unit("restore-network.service") - node1.wait_until_succeeds("cat /etc/systemd/network/eth1.network >&2") - node1.wait_until_succeeds("ping -c1 10.0.0.1") - node1.wait_until_succeeds("ping -c1 2001:db8::1") + print(ssh(["ip", "addr"])) + print(ssh(["ip", "route"])) + print(ssh(["ip", "-6", "route"])) + print(ssh(["networkctl", "status"])) - # Check if static addresses have been restored - node1.wait_until_succeeds("ping -c1 42::1") - node1.wait_until_succeeds("ping -c1 192.168.42.1") + assert ssh(["ls", "-la", "/run/foo"], check=False).returncode != 0, "kexeced node1 still has /run/foo" + print(ssh(["parted", "--version"])) + host = ssh(["hostname"], stdout=subprocess.PIPE).stdout.strip() + assert host == "nixos", f"hostname is {host}, not nixos" - out = node1.wait_until_succeeds("ip route get 192.168.43.2") + host_ed25519_after = ssh(["cat", "/etc/ssh/ssh_host_ed25519_key.pub"], stdout=subprocess.PIPE).stdout.strip() + assert host_ed25519_before == host_ed25519_after, f"'{host_ed25519_before}' != '{host_ed25519_after}'" + + root_ed25519_after = ssh(["cat", "/root/.ssh/authorized_keys"], stdout=subprocess.PIPE).stdout.strip() + assert root_ed25519_before in root_ed25519_after, f"'{root_ed25519_before}' not included in '{root_ed25519_after}'" + + print(ssh(["cat", "/etc/systemd/network/eth0.network"])) + ssh(["curl", "-v", "-I", f"http://10.0.2.2:{port}"]) + ssh(["curl", "-v", "-I", f"http://[fec0::2]:{port}"]) + + ## Check if static addresses have been restored + ssh(["ping", "-c1", "42::1"]) + ssh(["ping", "-c1", "192.168.42.1"]) + + out = ssh(["ip", "route", "get", "192.168.43.2"], stdout=subprocess.PIPE).stdout print(out) - assert "192.168.43.2 dev eth1" in out + assert "192.168.43.2 dev" in out, f"route to `192.168.43.2 dev` not found: {out}" - out = node1.wait_until_succeeds("ip route get 192.168.44.2") + out = ssh(["ip", "route", "get", "192.168.44.2"], stdout=subprocess.PIPE).stdout print(out) - assert "192.168.44.2 via 192.168.43.1" in out + assert "192.168.44.2 via 192.168.43.1" in out, f"route to `192.168.44.2 via 192.168.43.1` not found: {out}" - out = node1.wait_until_succeeds("ip route get 43::2") + out = ssh(["ip", "route", "get", "43::2"], stdout=subprocess.PIPE).stdout print(out) - assert "43::2 from :: dev eth1" in out + assert "43::2 from :: dev" in out, f"route `43::2 from dev` not found: {out}" - out = node1.wait_until_succeeds("ip route get 44::2") + out = ssh(["ip", "route", "get", "44::2"], stdout=subprocess.PIPE).stdout print(out) - assert "44::2 from :: via 43::1" in out + assert "44::2 from :: via 43::1" in out, f"route to `44::2 from :: via 43::1` not found: {out}" - node1.succeed("ip addr >&2") - node1.succeed("ip route >&2") - node1.succeed("ip -6 route >&2") - node1.succeed("networkctl status eth1 >&2") - - node1.shutdown() + node1.crash() ''; } From 3db198a018f694842f095febbda6a9d3562964cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 15 May 2023 11:15:15 +0200 Subject: [PATCH 3/6] kexec-installer: print dmesg if kexec --load fails In some cases dmesg contains interesting errors --- nix/kexec-installer/kexec-run.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nix/kexec-installer/kexec-run.sh b/nix/kexec-installer/kexec-run.sh index c56fb7c..09d9d33 100644 --- a/nix/kexec-installer/kexec-run.sh +++ b/nix/kexec-installer/kexec-run.sh @@ -53,9 +53,13 @@ done find . | cpio -o -H newc | gzip -9 >> "$SCRIPT_DIR/initrd" # Dropped --kexec-syscall-auto because it broke on GCP... -"$SCRIPT_DIR/kexec" --load "$SCRIPT_DIR/bzImage" \ +if ! "$SCRIPT_DIR/kexec" --load "$SCRIPT_DIR/bzImage" \ --initrd="$SCRIPT_DIR/initrd" --no-checks \ - --command-line "init=$init $kernelParams" + --command-line "init=$init $kernelParams"; then + echo "kexec failed, dumping dmesg" + dmesg | tail -n 100 + exit 1 +fi # Disconnect our background kexec from the terminal echo "machine will boot into nixos in in 6s..." From 4ad9c55cf2fbbdc95c3d22a50a44b16247814f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 15 May 2023 11:39:00 +0200 Subject: [PATCH 4/6] kexec-installer: use latest kernel compatible with zfs --- nix/kexec-installer/module.nix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nix/kexec-installer/module.nix b/nix/kexec-installer/module.nix index df2a7d7..0a852bc 100644 --- a/nix/kexec-installer/module.nix +++ b/nix/kexec-installer/module.nix @@ -24,6 +24,9 @@ in { # We are stateless, so just default to latest. system.stateVersion = config.system.nixos.version; + # use latest kernel we can support to get more hardware support + boot.kernelPackages = config.boot.zfs.package.latestCompatibleLinuxPackages; + # This is a variant of the upstream kexecScript that also allows embedding # a ssh key. system.build.kexecRun = pkgs.runCommand "kexec-run" {} '' From 8451894552d16950ed4de40ebb751bd9df5614c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 15 May 2023 12:04:37 +0200 Subject: [PATCH 5/6] kexec-installer: add smoketest for binaries --- nix/kexec-installer/module.nix | 4 ++++ nix/kexec-installer/test.nix | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/nix/kexec-installer/module.nix b/nix/kexec-installer/module.nix index 0a852bc..98f11bb 100644 --- a/nix/kexec-installer/module.nix +++ b/nix/kexec-installer/module.nix @@ -47,6 +47,10 @@ in { cp "${config.system.build.kexecRun}" kexec/run cp "${pkgs.pkgsStatic.kexec-tools}/bin/kexec" kexec/kexec cp "${iprouteStatic}/bin/ip" kexec/ip + ${lib.optionalString (pkgs.hostPlatform == pkgs.buildPlatform) '' + kexec/ip -V + kexec/kexec --version + ''} tar -czvf $out/${config.system.kexec-installer.name}-${pkgs.stdenv.hostPlatform.system}.tar.gz kexec ''; diff --git a/nix/kexec-installer/test.nix b/nix/kexec-installer/test.nix index b690cbf..431480c 100644 --- a/nix/kexec-installer/test.nix +++ b/nix/kexec-installer/test.nix @@ -131,7 +131,9 @@ makeTest' { node1.succeed('touch /run/foo') node1.fail('parted --version >&2') node1.succeed('tar -xf ${kexecTarball}/nixos-kexec-installer-noninteractive-${pkgs.system}.tar.gz -C /root') - node1.execute('/root/kexec/run >&2') + node1.succeed('/root/kexec/ip -V >&2') + node1.succeed('/root/kexec/kexec --version >&2') + node1.succeed('/root/kexec/run >&2') # wait for kexec to finish while ssh(["true"], check=False).returncode == 0: From d7311ed7f88b34047ef01753898446357dae3f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 15 May 2023 13:32:57 +0200 Subject: [PATCH 6/6] add missing parted package back to 22.11 release --- nix/noninteractive.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nix/noninteractive.nix b/nix/noninteractive.nix index 3a832f2..bcdad55 100644 --- a/nix/noninteractive.nix +++ b/nix/noninteractive.nix @@ -18,7 +18,10 @@ programs.nano.syntaxHighlight = lib.mkForce false; # prevents nano, strace - environment.defaultPackages = lib.mkForce [ pkgs.rsync ]; + environment.defaultPackages = lib.mkForce [ + pkgs.rsync + pkgs.parted + ]; # zfs support is accidentally disabled by excluding base.nix, re-enable it boot = {