Skip to content

Commit

Permalink
hosts(pve-epyc): auto restart vGPU manager if mdev is not registered
Browse files Browse the repository at this point in the history
  • Loading branch information
xddxdd committed Jan 8, 2025
1 parent e4fc2d7 commit 3488698
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 3 deletions.
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions nixos/hardware/nvidia/vgpu-extension.nix
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,36 @@ in
};
};

systemd.services.nvidia-vgpu-watchdog = {
wantedBy = [ "multi-user.target" ];
after = [ "nvidia-vgpu-mgr.service" ];
before = [
"pvedaemon.service"
"pve-guests.service"
];
requiredBy = [
"pvedaemon.service"
"pve-guests.service"
];

path = with pkgs; [
util-linux
systemd
];

script = ''
while true; do
(dmesg | grep nvidia | grep "MDEV: Registered") && break
echo "Restarting VGPU manager"
systemctl restart nvidia-vgpu-mgr
sleep 5
done
exit 0
'';

serviceConfig.Type = "oneshot";
};

systemd.tmpfiles.rules = [
"L /usr/share/nvidia/vgpu/vgpuConfig.xml - - - - ${nvidia_x11.bin}/share/nvidia/vgpu/vgpuConfig.xml"
];
Expand Down

0 comments on commit 3488698

Please sign in to comment.