feat: add simple files

This commit is contained in:
chuan
2026-03-27 17:08:58 +08:00
Unverified
parent f213aa3edf
commit a50fd78652
8 changed files with 594 additions and 9 deletions
Generated
+287
View File
@@ -2,6 +2,293 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "anstream"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
[[package]]
name = "anstyle-parse"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys",
]
[[package]]
name = "clap"
version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
[[package]]
name = "colorchoice"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "indexmap"
version = "2.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "once_cell_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_spanned"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "876ac351060d4f882bb1032b6369eb0aef79ad9df1ea8bc404874d8cc3d0cd98"
dependencies = [
"serde_core",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "toml"
version = "1.1.0+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8195ca05e4eb728f4ba94f3e3291661320af739c4e43779cbdfae82ab239fcc"
dependencies = [
"indexmap",
"serde_core",
"serde_spanned",
"toml_datetime",
"toml_parser",
"toml_writer",
"winnow",
]
[[package]]
name = "toml_datetime"
version = "1.1.0+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f"
dependencies = [
"serde_core",
]
[[package]]
name = "toml_parser"
version = "1.1.0+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011"
dependencies = [
"winnow",
]
[[package]]
name = "toml_writer"
version = "1.1.0+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d282ade6016312faf3e41e57ebbba0c073e4056dab1232ab1cb624199648f8ed"
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "watchdog"
version = "0.1.0"
dependencies = [
"clap",
"serde",
"toml",
]
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
[[package]]
name = "winnow"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8"
+3
View File
@@ -4,3 +4,6 @@ version = "0.1.0"
edition = "2024"
[dependencies]
toml = "*"
clap = { version = "4", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
+1 -2
View File
@@ -17,5 +17,4 @@ sudo apt install -y gcc-aarch64-linux-gnu
rustup target add aarch64-unknown-linux-musl
sudo apt install -y musl-tools gcc-aarch64-linux-gnu
```
```
+190
View File
@@ -0,0 +1,190 @@
# Watchdog 项目 PRD
## 1. 项目概述
本项目目标是实现一个运行于 Linux 平台的守护型看门狗程序,用于监控一个或多个目标程序的运行状态,并在目标程序异常退出、未启动或持续不稳定时,按照预定义策略执行重启与回退。该程序首先服务于 Linux arm64 部署场景,要求支持在 x64 开发环境中交叉编译到 arm64 设备运行。系统设计时需保留未来扩展到 Windows 的可能,但第一版本不承担 Windows 兼容实现。
该项目不负责业务程序本身的逻辑,不承担升级平台、远程编排、图形界面或复杂运维平台职责。其定位是一个本地进程守护与回退执行器,提供稳定、可预测、可测试的最小核心能力。
## 2. 项目目标
第一版本的核心目标如下:
1. 支持监控多个目标程序。
2. 支持通过轮询方式检测程序是否处于有效运行状态。
3. 支持在目标程序未运行时自动拉起。
4. 支持对频繁崩溃或持续启动失败的程序执行 fallback。
5. fallback 顺序固定,但每一级 fallback 的目标内容由配置指定。
6. fallback 为临时运行时切换,不持久化到磁盘。
7. 支持固定 CLI 子命令执行。
8. 支持 stdout/stderr 日志输出。
9. 支持单元测试,保证核心状态机与策略逻辑可验证。
## 3. 非目标
第一版本明确不做以下内容:
- 不支持任意 shell 指令编排。
- 不支持动态插件系统。
- 不支持配置热更新。
- 不支持远程管理与网络 API。
- 不支持安装包管理、升级平台和复杂版本治理。
- 不支持集成测试框架自动化,仅要求单元测试。
- 不支持所有极端失败场景的复杂恢复决策;若最终 fallback 目标仍无法启动,则系统持续尝试最后一级 fallback 即可。
## 4. 核心业务场景
### 4.1 目标程序未启动
看门狗轮询检测目标程序状态;若检测失败,系统尝试执行该目标当前激活版本的启动命令。
### 4.2 目标程序启动后短时间退出
若程序启动后未达到“稳定运行判定窗口”,则该次行为视为启动失败,不计入 crash。
### 4.3 目标程序稳定运行后退出
若程序已连续存活至少 5 秒,再发生退出,则视为一次 crash 事件。看门狗根据配置统计检测窗口内 crash 次数,达到阈值后触发 fallback。
### 4.4 目标程序持续不稳定
当一个目标在给定检测窗口内达到阈值,例如默认 1 分钟内 3 次 crash,则切换到固定顺序中的下一级 fallback 目标。
### 4.5 fallback 已触发
一旦某目标切换到 fallback,除非整个 watchdog 进程重启,否则不主动恢复到主目标。系统继续守护当前 fallback 目标;若仍失败,则继续向后切换,直到最终 fallback 目标。最终 fallback 目标若仍无法启动,则持续尝试该最终目标。
## 5. 功能需求
## 5.1 多目标监控
系统需支持多个目标程序并发受管。第一版本允许采用单线程轮询模型,即每一轮按顺序遍历所有目标,分别执行检测、重启和 fallback 决策。每个目标需维护独立运行状态,互不影响。
## 5.2 启动方式
所有目标及 fallback 均采用 `exec` 方式启动。即配置明确给出:
- 可执行文件路径
- 参数列表
- 可选工作目录
第一版本不通过 shell 解释器启动,不依赖 `sh -c` 或类似方式,以降低转义复杂度、不确定性和命令注入风险。
## 5.3 检测机制
检测机制采用“provider + 配置项”的方式实现。第一版本不构建高度抽象的通用组合 DSL,而是定义若干固定检测方法,每个方法对应明确配置字段。一个目标可配置多个检测方法;所有检测方法均通过时,视为目标当前健康。
已确认需要支持或预留的检测维度包括:
- 进程是否存在
- 程序文件是否存在
- PID 文件是否存在
- 指定文件是否存在
“检测程序是否存在”属于检测条件之一,不等同于进程是否存在。
## 5.4 稳定运行与 crash 判定
系统需定义稳定运行窗口。当前约定为 5 秒:
- 启动后若存活不足 5 秒即退出,视为启动失败。
- 启动后若连续运行至少 5 秒再退出,视为 crash。
看门狗需统计检测窗口中的 crash 事件次数。默认策略为“1 分钟内 3 次 crash 触发 fallback”,该阈值与窗口长度应允许通过配置覆盖。
## 5.5 fallback 机制
fallback 顺序固定,第一版本定义为:
1. 主目标
2. 上一个版本
3. 更新后的出厂版本
4. 出厂版本
配置文件负责指定每一级实际启动目标,不负责改写顺序。fallback 的本质是运行时切换当前激活目标,不修改持久化配置、不覆盖原始程序文件、不执行安装动作。切换后仅影响当前 watchdog 生命周期内的行为。
## 5.6 CLI 子命令
程序需支持固定 CLI 子命令。第一版本至少应考虑以下命令集合:
- `run`:启动 watchdog 主循环
- `check`:执行一次检测
- `start`:手动启动指定目标
- `check-env`:检测运行环境
是否进一步开放 fallback 相关手工命令可在后续迭代决定,但不作为第一版本必需能力。
## 5.7 日志
第一版本日志输出目标为 stdout/stderr。日志需要覆盖以下关键信息:
- watchdog 启动与退出
- 每个目标的检测结果
- 启动尝试
- 启动失败
- 稳定运行判定通过
- crash 事件记录
- fallback 切换
- 最终 fallback 持续尝试
后续如需写文件或接入 journald,应通过独立日志层扩展,而不影响核心状态机。
## 5.8 测试
必须提供单元测试,覆盖以下核心逻辑:
- 启动成功与启动失败判定
- 稳定运行 5 秒后的 crash 判定
- 检测窗口内 crash 计数
- fallback 触发阈值
- fallback 顺序推进
- 多目标状态相互隔离
## 6. 状态机要求
每个目标必须维护独立状态。最少应包含以下运行态信息:
- 当前激活层级(主目标或某一级 fallback)
- 当前进程状态
- 最近一次启动时间
- 是否已达到稳定运行门槛
- 检测窗口内 crash 记录
- 当前是否处于 fallback 模式
watchdog 主循环根据检测结果和状态决定下一步动作:
1. 检测通过:保持当前状态。
2. 检测失败且当前无进程:尝试启动当前激活目标。
3. 启动后未满 5 秒退出:记为启动失败。
4. 启动后满 5 秒退出:记为一次 crash。
5. crash 在窗口内达到阈值:切换到下一级 fallback。
6. 已在最终 fallback:持续尝试最终 fallback,不再设计额外终止分支。
## 7. 架构原则
第一版本应坚持以下架构原则:
- Linux 优先,接口设计与平台实现分离,为未来 Windows 预留空间。
- 业务状态机与系统调用分离。
- 检测器、启动器、fallback 决策解耦。
- 多目标共享框架、独立状态。
- 配置驱动目标定义,避免业务逻辑硬编码。
- 第一版本优先简单可维护,不提前引入过度抽象。
## 8. 实施计划
### 阶段一:基础框架
完成项目骨架、CLI 入口、日志初始化、配置加载与最小多目标数据模型。
### 阶段二:核心运行闭环
实现目标轮询、exec 启动、基础检测器、稳定窗口判定、crash 记录。
### 阶段三:fallback 机制
实现固定顺序 fallback、目标切换、最终 fallback 持续尝试。
### 阶段四:测试与部署
补充关键单元测试,完善 x64 到 arm64 musl 交叉编译与部署脚本。
## 9. 成功标准
第一版本交付时,应满足以下标准:
1. 能在 Linux arm64 目标机上稳定运行。
2. 能同时监控多个目标程序。
3. 能对未启动程序自动拉起。
4. 能识别“启动失败”和“稳定运行后 crash”。
5. 能在检测窗口内依据 crash 次数触发 fallback。
6. 能按照固定顺序切换 fallback 目标。
7. 能输出足够定位问题的日志。
8. 核心状态机具备单元测试覆盖。
## 10. 结论
该项目第一版本定位为一个本地、轻量、配置驱动的多目标 watchdog。其核心价值在于以明确、稳定、低复杂度的方式实现进程守护、稳定性判定与临时 fallback 切换。设计上应优先保证 Linux 场景落地与 arm64 部署稳定性,同时通过合理模块边界为后续平台扩展、检测器扩展与日志扩展保留空间。
+54
View File
@@ -0,0 +1,54 @@
use std::fs;
use std::path::Path;
use crate::config::Config;
fn load_file() -> Result<Config, Box<dyn std::error::Error>> {
let config_path = Path::new("watchdog.toml");
if !config_path.exists() {
return Err(format!("missing config file: {}", config_path.display()).into());
}
let content = fs::read_to_string(config_path)?;
if content.trim().is_empty() {
return Err("watchdog.toml exists but is empty".into());
}
let config = toml::from_str::<Config>(&content)?;
config.validate()?;
Ok(config)
}
pub fn run() {
println!("run: not implemented yet");
}
pub fn check() -> Result<(), Box<dyn std::error::Error>> {
let config = load_file()?;
println!("checking service: {}", config.service_name);
println!("would run: {}", config.check_command);
Ok(())
}
pub fn start() {
println!("start: not implemented yet");
}
pub fn check_env() -> Result<(), Box<dyn std::error::Error>> {
let config = load_file()?;
println!("environment ok: config parsed successfully");
println!("config = {:?}", config);
println!("interval = {}", config.interval);
println!("service_name = {}", config.service_name);
println!("check_command = {}", config.check_command);
Ok(())
}
+26
View File
@@ -0,0 +1,26 @@
use serde::Deserialize;
#[derive(Deserialize, Debug)]
pub struct Config {
pub interval: u64,
pub service_name: String,
pub check_command: String,
}
impl Config {
pub fn validate(&self) -> Result<(), Box<dyn std::error::Error>> {
if self.interval == 0 {
return Err("interval must be greater than 0".into());
}
if self.service_name.trim().is_empty() {
return Err("service_name must not be empty".into());
}
if self.check_command.trim().is_empty() {
return Err("check_command must not be empty".into());
}
Ok(())
}
}
+30 -7
View File
@@ -1,10 +1,33 @@
fn main() {
println!("Hello, world!");
use clap::{Parser, Subcommand};
let a = 1;
let b = 2;
mod commands;
mod config;
let c = a + b;
println!("The sum of {} and {} is {}", a, b, c);
#[derive(Parser, Debug)]
#[command(name = "watchdog")]
#[command(about = "A lightweight watchdog service", long_about = None)]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Subcommand, Debug)]
enum Command {
Run,
Check,
Start,
CheckEnv,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let cli = Cli::parse();
match cli.command {
Command::Run => commands::run(),
Command::Check => commands::check()?,
Command::Start => commands::start(),
Command::CheckEnv => commands::check_env()?,
}
Ok(())
}
+3
View File
@@ -0,0 +1,3 @@
interval = 60
service_name = "demo"
check_command = "echo hello"