fix: 修复服务停止和启动中的子进程泄漏及 IPv6 健康检查问题
- 修复 taskkill 缺少 /T 标志导致子进程残留 - 使用 psutil 收集并终止所有子进程 - 新增 check_http_multi 支持 localhost/IPv4/IPv6 健康检查 - 修复 Vite 仅监听 IPv6 导致健康检查超时问题 - restart 命令等待时间从 2 秒增加到 3 秒 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+37
-12
@@ -167,6 +167,18 @@ class HealthChecker:
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def check_http_multi(
|
||||
port: int, path: str, service_config: ServiceConfig, timeout: float = 2.0
|
||||
) -> bool:
|
||||
"""HTTP 健康检查(自动尝试 IPv4 和 IPv6)"""
|
||||
hosts = ["localhost", "127.0.0.1", "[::1]"]
|
||||
for host in hosts:
|
||||
url = f"http://{host}:{port}{path}"
|
||||
if HealthChecker.check_http(url, service_config, timeout):
|
||||
return True
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def wait_for_ready(
|
||||
cls, config: ServiceConfig, timeout: int = 60, interval: int = 2
|
||||
@@ -180,9 +192,8 @@ class HealthChecker:
|
||||
while time.time() - start_time < timeout:
|
||||
# 先检查端口
|
||||
if cls.check_port(config.port):
|
||||
# 端口通了,再进行 HTTP 检查
|
||||
url = f"http://127.0.0.1:{config.port}{config.health_check_path}"
|
||||
if cls.check_http(url, config):
|
||||
# 端口通了,再进行 HTTP 检查(支持 IPv4 和 IPv6)
|
||||
if cls.check_http_multi(config.port, config.health_check_path, config):
|
||||
print(f"\r{Colors.INFO} 等待服务就绪... [OK]")
|
||||
return True
|
||||
|
||||
@@ -458,7 +469,7 @@ class ProcessManager:
|
||||
|
||||
@classmethod
|
||||
def stop(cls, config: ServiceConfig, timeout: int = 10) -> bool:
|
||||
"""优雅停止服务"""
|
||||
"""优雅停止服务(包括所有子进程)"""
|
||||
pid = cls._read_pid(config)
|
||||
if pid is None:
|
||||
print(f"{Colors.WARN} 未找到 {config.name} 的 PID 文件")
|
||||
@@ -472,13 +483,23 @@ class ProcessManager:
|
||||
print(f"{Colors.INFO} 停止 {config.name} (PID: {pid})...")
|
||||
|
||||
try:
|
||||
process = psutil.Process(pid)
|
||||
parent = psutil.Process(pid)
|
||||
|
||||
# 发送终止信号
|
||||
# 收集所有子进程
|
||||
children = parent.children(recursive=True)
|
||||
|
||||
# 先发送终止信号给子进程
|
||||
for child in children:
|
||||
try:
|
||||
child.terminate()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
|
||||
# 再终止父进程
|
||||
if platform.system() == "Windows":
|
||||
subprocess.run(["taskkill", "/PID", str(pid)], capture_output=True)
|
||||
else:
|
||||
process.terminate()
|
||||
parent.terminate()
|
||||
|
||||
# 等待进程退出
|
||||
for _ in range(timeout * 2):
|
||||
@@ -488,15 +509,19 @@ class ProcessManager:
|
||||
return True
|
||||
time.sleep(0.5)
|
||||
|
||||
# 超时,强制杀死
|
||||
# 超时,强制杀死父进程和子进程
|
||||
print(f"{Colors.WARN} {config.name} 未响应,强制终止...")
|
||||
for child in children:
|
||||
try:
|
||||
child.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
if platform.system() == "Windows":
|
||||
subprocess.run(
|
||||
["taskkill", "/F", "/PID", str(pid)], capture_output=True
|
||||
)
|
||||
else:
|
||||
# Windows 上 signal 没有 SIGKILL,用 SIGTERM 替代
|
||||
process.kill()
|
||||
parent.kill()
|
||||
|
||||
time.sleep(1)
|
||||
cls._write_pid(config, None)
|
||||
@@ -723,11 +748,11 @@ class ServiceManager:
|
||||
"""重启服务"""
|
||||
if service_key and service_key != "all":
|
||||
self.stop(service_key)
|
||||
time.sleep(2)
|
||||
time.sleep(3)
|
||||
self.start(service_key)
|
||||
else:
|
||||
self._stop_all()
|
||||
time.sleep(2)
|
||||
time.sleep(3)
|
||||
self._start_all()
|
||||
|
||||
def status(self):
|
||||
|
||||
Reference in New Issue
Block a user