Program.cs 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. using System;
  2. using System.Diagnostics;
  3. using System.IO;
  4. using System.Net.Http;
  5. using System.Threading;
  6. using IvfTl.Watchdog.Core;
  7. namespace IvfTl.Watchdog
  8. {
  9. /// <summary>
  10. /// control 崩溃看门狗(D3-05)。常驻探活 control /ping,崩溃自动重拉(读 DPAPI 缓存凭据)。
  11. /// 不依赖 operate 是否在跑。可 --pause/--resume(维护让路)、--stop(停这次)、--install/--uninstall(开机自启)。
  12. /// </summary>
  13. public static class Program
  14. {
  15. private const string MutexName = @"Global\ivf_tl_watchdog_singleton";
  16. private const string StopEventName = @"Global\ivf_tl_watchdog_stop";
  17. private const string RunKey = @"HKCU\Software\Microsoft\Windows\CurrentVersion\Run";
  18. private const string RunValueName = "ivf_tl_watchdog";
  19. private const int RelaunchCooldownSec = 30; // 两次重拉最小间隔(防崩溃风暴)
  20. private const int MaxConsecutiveFailures = 5; // 连续失败超此数 → 拉长冷却 + 告警
  21. private const int LongCooldownSec = 300;
  22. private static readonly HttpClient _http = new HttpClient { Timeout = TimeSpan.FromSeconds(2) };
  23. [STAThread]
  24. public static int Main(string[] args)
  25. {
  26. var a = WatchdogArgs.Parse(args);
  27. try
  28. {
  29. switch (a.Command)
  30. {
  31. case WatchdogCommand.Install: return DoInstall(a);
  32. case WatchdogCommand.Uninstall: return DoUninstall();
  33. case WatchdogCommand.Pause: WatchdogPaths.SetPaused(true); Log("已暂停看门狗(只探活不重拉);--resume 恢复"); return 0;
  34. case WatchdogCommand.Resume: WatchdogPaths.SetPaused(false); Log("已恢复看门狗守护"); return 0;
  35. case WatchdogCommand.Stop: SignalStop(); Log("已向常驻看门狗发送停止信号"); return 0;
  36. default: return RunLoop(a);
  37. }
  38. }
  39. catch (Exception ex)
  40. {
  41. Log("看门狗致命异常:" + ex);
  42. return 1;
  43. }
  44. }
  45. /// <summary>常驻探活循环。</summary>
  46. private static int RunLoop(WatchdogArgs a)
  47. {
  48. bool isNew;
  49. using (var mutex = new Mutex(true, MutexName, out isNew))
  50. {
  51. if (!isNew) { Log("已有看门狗实例在运行,本进程退出"); return 0; }
  52. using (var stopEvent = new EventWaitHandle(false, EventResetMode.ManualReset, StopEventName))
  53. {
  54. Log($"看门狗启动 port={a.Port} interval={a.IntervalSec}s controlExe={ResolveControlExe()}");
  55. int intervalMs = Math.Max(1, a.IntervalSec) * 1000;
  56. DateTime lastRelaunchUtc = DateTime.MinValue;
  57. int consecutiveFailures = 0;
  58. bool? lastAlive = null; // 状态变化才记日志,避免刷屏
  59. while (true)
  60. {
  61. // 先判停止信号(--stop / --uninstall 触发):立刻优雅退出。
  62. if (stopEvent.WaitOne(0)) { Log("收到停止信号,看门狗退出"); return 0; }
  63. bool alive = IsControlAlive(a.Port);
  64. if (alive != lastAlive) { Log(alive ? "control 存活(探活正常)" : "control 探活失败(不在)"); lastAlive = alive; }
  65. if (alive)
  66. {
  67. consecutiveFailures = 0;
  68. }
  69. else
  70. {
  71. bool paused = WatchdogPaths.IsPaused();
  72. bool stopped = WatchdogPaths.IsDeliberatelyStopped();
  73. int cooldown = consecutiveFailures >= MaxConsecutiveFailures ? LongCooldownSec : RelaunchCooldownSec;
  74. bool cooldownActive = (DateTime.UtcNow - lastRelaunchUtc).TotalSeconds < cooldown;
  75. if (RelaunchDecision.ShouldRelaunch(alive, paused, stopped, cooldownActive))
  76. {
  77. var creds = CredentialStore.Load();
  78. if (creds == null)
  79. {
  80. Log($"control 不在,但无可用缓存凭据(creds.dat 存在={File.Exists(WatchdogPaths.CredsFile)});等 operate 首次拉起 control 后再守护");
  81. }
  82. else
  83. {
  84. lastRelaunchUtc = DateTime.UtcNow;
  85. consecutiveFailures++;
  86. bool ok = Relaunch(creds, a.Port);
  87. if (ok && consecutiveFailures >= MaxConsecutiveFailures)
  88. Log($"⚠ control 已连续 {consecutiveFailures} 次重拉仍未稳定,冷却拉长至 {LongCooldownSec}s,请人工检查");
  89. }
  90. }
  91. else if (paused) Log("control 不在,但看门狗处于暂停(--resume 恢复),不重拉");
  92. else if (stopped) Log("control 不在,但为故意停机(受护栏 /shutdown),不重拉");
  93. // cooldown 中静默等待,不刷屏
  94. }
  95. // 等一个周期;期间若收到停止信号则立刻醒来退出。
  96. if (stopEvent.WaitOne(intervalMs)) { Log("收到停止信号,看门狗退出"); return 0; }
  97. }
  98. }
  99. }
  100. }
  101. /// <summary>探活:control /ping 是否可达。</summary>
  102. private static bool IsControlAlive(int port)
  103. {
  104. try
  105. {
  106. var resp = _http.GetAsync($"http://127.0.0.1:{port}/ping").GetAwaiter().GetResult();
  107. return resp.IsSuccessStatusCode;
  108. }
  109. catch { return false; }
  110. }
  111. /// <summary>读缓存凭据,提权拉起 control(与 operate ControlProcessLauncher 同形态)。</summary>
  112. private static bool Relaunch(Credentials c, int port)
  113. {
  114. string exe = ResolveControlExe();
  115. if (!File.Exists(exe)) { Log("找不到 control.exe:" + exe); return false; }
  116. try
  117. {
  118. var psi = new ProcessStartInfo
  119. {
  120. FileName = exe,
  121. Arguments = $"--account={c.Account} --password={c.Password} --cacheDisk={c.CacheDisk} --port={port}",
  122. UseShellExecute = true, // requireAdministrator 子进程需 ShellExecute(看门狗已是管理员,不弹 UAC)
  123. WindowStyle = ProcessWindowStyle.Hidden
  124. };
  125. Process.Start(psi);
  126. Log($"control 不在 → 已重拉 control.exe(port={port})");
  127. return true;
  128. }
  129. catch (Exception ex) { Log("重拉 control 失败:" + ex.Message); return false; }
  130. }
  131. /// <summary>control 可执行路径:默认与看门狗同目录(部署在同一 control\ 子目录)。</summary>
  132. private static string ResolveControlExe() =>
  133. Path.Combine(AppContext.BaseDirectory, "ivf_tl_ControlHost.exe");
  134. // ── 控制面 ──────────────────────────────────────────────
  135. private static int DoInstall(WatchdogArgs a)
  136. {
  137. string exe = Process.GetCurrentProcess().MainModule.FileName;
  138. string cmd = $"\"{exe}\" --port={a.Port} --interval={a.IntervalSec}";
  139. int rc = RunReg($"add \"{RunKey}\" /v {RunValueName} /t REG_SZ /d \"{cmd}\" /f");
  140. Log(rc == 0 ? $"已写开机自启:{cmd}" : "写开机自启失败 rc=" + rc);
  141. return rc;
  142. }
  143. private static int DoUninstall()
  144. {
  145. // 先停常驻实例,再删自启项 = 一条命令彻底卸载、无残留。
  146. SignalStop();
  147. int rc = RunReg($"delete \"{RunKey}\" /v {RunValueName} /f");
  148. Log(rc == 0 ? "已删除开机自启项,看门狗已卸载" : "删除开机自启项(可能本就不存在) rc=" + rc);
  149. return 0;
  150. }
  151. /// <summary>给常驻看门狗发停止信号(打开已存在的命名事件并置位);无常驻实例则静默。</summary>
  152. private static void SignalStop()
  153. {
  154. try
  155. {
  156. using (var ev = EventWaitHandle.OpenExisting(StopEventName)) ev.Set();
  157. }
  158. catch (WaitHandleCannotBeOpenedException) { Log("无常驻看门狗在运行(无需停止)"); }
  159. catch (Exception ex) { Log("发送停止信号异常:" + ex.Message); }
  160. }
  161. private static int RunReg(string arguments)
  162. {
  163. try
  164. {
  165. var psi = new ProcessStartInfo("reg.exe", arguments) { UseShellExecute = false, CreateNoWindow = true };
  166. var p = Process.Start(psi);
  167. p.WaitForExit();
  168. return p.ExitCode;
  169. }
  170. catch (Exception ex) { Log("reg.exe 执行异常:" + ex.Message); return -1; }
  171. }
  172. // ── 日志 ────────────────────────────────────────────────
  173. private static readonly object _logLock = new object();
  174. private static void Log(string msg)
  175. {
  176. string line = $"{DateTime.Now:yyyy-MM-dd HH:mm:ss.fff} {msg}";
  177. try
  178. {
  179. lock (_logLock)
  180. {
  181. WatchdogPaths.EnsureDataDir();
  182. File.AppendAllText(Path.Combine(WatchdogPaths.DataDir, "watchdog.log"), line + Environment.NewLine);
  183. }
  184. }
  185. catch { }
  186. try { Console.WriteLine(line); } catch { }
  187. }
  188. }
  189. }