fix(proxy): harden takeover-residue recovery across config-dir switches

Changing app_config_dir relocates the SQLite database, so a restart
triggered while proxy takeover is active used to strand the live
configs: the new instance reads a fresh DB with no live backups, the
first-run import then persisted the PROXY_MANAGED placeholder as the
`default` provider, and the no-backup recovery path wrote that
placeholder right back to the live files — leaving Claude/Codex/Gemini
pointed at a dead local proxy with no automatic way out.

Three orthogonal fixes, defense in depth:

- restart_app now awaits cleanup_before_exit() before app.restart().
  Since #4069 the ExitRequested handler intentionally defers restart
  requests to Tauri's default re-exec without custom cleanup, which is
  correct for same-DB restarts but not for this command's dir-change
  use case: only the old instance holds the backups needed to restore
  the taken-over live files, so it must restore them while its event
  loop is still alive.
- import_default_config refuses to import a live config that is under
  proxy takeover (placeholder detected), instead of persisting it as
  the current provider.
- restore_live_from_ssot_for_app validates that the current provider's
  settings_config does not itself contain takeover placeholders before
  writing it back; polluted SSOT now falls through to the placeholder
  cleanup fallback.

Regression tests cover the import guard and the no-backup recovery
path (the latter fails before this change by writing PROXY_MANAGED
back to live).
This commit is contained in:
Jason
2026-06-11 19:24:20 +08:00
Unverified
parent a3598fd976
commit 98f4788458
5 changed files with 119 additions and 1 deletions
+6
View File
@@ -79,6 +79,12 @@ pub async fn restart_app(app: AppHandle) -> Result<bool, String> {
// 在后台延迟重启,让函数有时间返回响应
tauri::async_runtime::spawn(async move {
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
// app.restart() 走 RESTART_EXIT_CODE 路径,ExitRequested 处理器会直接
// 放行给 Tauri 默认 re-exec,不执行代理/Live 清理。但本命令用于
// app_config_dir 变更后的重启:新实例会切到新数据库,拿不到旧库里的
// Live 备份,无法恢复被接管的 Live 配置。因此必须趁旧实例的事件循环
// 仍存活,在这里同步完成恢复(保留代理状态,新实例启动时自动重新接管)。
crate::cleanup_before_exit(&app).await;
app.restart();
});
Ok(true)
+16
View File
@@ -1133,6 +1133,22 @@ pub fn import_default_config(state: &AppState, app_type: AppType) -> Result<bool
return Ok(false);
}
// 拒绝把"被代理接管的 Live"导入为供应商:接管期间 Live 里只有
// PROXY_MANAGED 占位符和本地代理地址,不是用户的真实配置。一旦导入,
// 它会成为 current providerSSOT),后续"无备份恢复"路径会把占位符
// 当真实配置写回 Live,永久卡在已失效的本地代理上。
// 典型触发场景:代理接管开启时切换 app_config_dir 并重启,新数据库首启导入。
if state
.proxy_service
.detect_takeover_in_live_config_for_app(&app_type)
{
return Err(AppError::localized(
"provider.import.live_taken_over",
"Live 配置当前处于代理接管状态(包含占位符),不能导入为供应商。请先关闭代理接管或恢复 Live 配置后重试。",
"The live config is currently taken over by the proxy (contains placeholders) and cannot be imported as a provider. Disable proxy takeover or restore the live config first.",
));
}
let settings_config = match app_type {
AppType::Codex => crate::codex_config::read_codex_live_settings()?,
AppType::Claude => {
+11 -1
View File
@@ -1632,7 +1632,7 @@ impl ProxyService {
///
/// 返回值:
/// - Ok(true):已成功写回
/// - Ok(false):缺少当前供应商/供应商不存在,无法写回
/// - Ok(false):缺少当前供应商/供应商不存在/供应商本身含占位符,无法写回
fn restore_live_from_ssot_for_app(&self, app_type: &AppType) -> Result<bool, String> {
let current_id = crate::settings::get_effective_current_provider(&self.db, app_type)
.map_err(|e| format!("获取 {app_type:?} 当前供应商失败: {e}"))?;
@@ -1650,6 +1650,16 @@ impl ProxyService {
return Ok(false);
};
// 供应商配置本身含接管占位符时不可写回(历史异常:接管期间 Live 被
// 误导入成了供应商)。写回只会把占位符固化进 Live;返回 Ok(false)
// 让调用方落到"清理占位符"兜底。
if Self::live_has_proxy_placeholder_for_app(app_type, &provider.settings_config) {
log::warn!(
"{app_type:?} 当前供应商配置含代理接管占位符(疑似接管期间被导入的残留),跳过 SSOT 写回,改走占位符清理"
);
return Ok(false);
}
write_live_with_common_config(self.db.as_ref(), app_type, provider)
.map_err(|e| format!("写入 {app_type:?} Live 配置失败: {e}"))?;
+27
View File
@@ -588,3 +588,30 @@ fn switch_provider_codex_missing_auth_returns_error_and_keeps_state() {
"current provider should remain empty or be the attempted id on failure, got: {current_id:?}"
);
}
#[test]
fn import_refuses_live_config_under_proxy_takeover() {
let _guard = test_mutex().lock().expect("acquire test mutex");
reset_test_fs();
ensure_test_home();
// 接管态 Codex Liveauth 是 PROXY_MANAGED 占位符,不是用户真实配置
let auth = json!({"OPENAI_API_KEY": "PROXY_MANAGED"});
let config = r#"model = "gpt-5"
"#;
write_codex_live_atomic(&auth, Some(config)).expect("seed taken-over codex live");
let state = create_test_state().expect("create test state");
import_default_config_test_hook(&state, AppType::Codex)
.expect_err("importing a taken-over live config must fail");
let providers = state
.db
.get_all_providers(AppType::Codex.as_str())
.expect("get codex providers");
assert!(
providers.is_empty(),
"taken-over live import must not create providers"
);
}
+59
View File
@@ -1911,3 +1911,62 @@ fn provider_service_delete_current_provider_returns_error() {
other => panic!("expected Config/Message error, got {other:?}"),
}
}
#[test]
fn recover_from_crash_without_backup_cleans_placeholder_instead_of_writing_it_back() {
let _guard = test_mutex().lock().expect("acquire test mutex");
reset_test_fs();
let _home = ensure_test_home();
// 接管态 Claude Live,且 DB 中无备份(模拟切换 app_config_dir 后新库首启的场景)
let taken_over_live = json!({
"env": {
"ANTHROPIC_BASE_URL": "http://127.0.0.1:15721",
"ANTHROPIC_AUTH_TOKEN": "PROXY_MANAGED"
}
});
let settings_path = get_claude_settings_path();
std::fs::create_dir_all(settings_path.parent().expect("settings dir")).expect("create dir");
std::fs::write(
&settings_path,
serde_json::to_string_pretty(&taken_over_live).expect("serialize taken over live"),
)
.expect("write taken over live");
let state = create_test_state().expect("create test state");
// 模拟历史异常:接管态 Live 已被导入成 current providerSSOT 被污染)
let provider = Provider::with_id(
"default".to_string(),
"default".to_string(),
taken_over_live.clone(),
None,
);
state
.db
.save_provider(AppType::Claude.as_str(), &provider)
.expect("save placeholder provider");
state
.db
.set_current_provider(AppType::Claude.as_str(), "default")
.expect("set current provider");
futures::executor::block_on(state.proxy_service.recover_from_crash())
.expect("recover from crash");
let live_after: serde_json::Value =
read_json_file(&settings_path).expect("read live settings after recovery");
let env = live_after.get("env").cloned().unwrap_or_else(|| json!({}));
assert_ne!(
env.get("ANTHROPIC_AUTH_TOKEN").and_then(|v| v.as_str()),
Some("PROXY_MANAGED"),
"recovery must not write the placeholder back to live"
);
assert!(
env.get("ANTHROPIC_BASE_URL")
.and_then(|v| v.as_str())
.map(|url| !url.starts_with("http://127.0.0.1"))
.unwrap_or(true),
"recovery must drop the local proxy base URL"
);
}