refactor: improve thinking logic

2026-02-03 13:00:52 +08:00 · 2026-01-14 08:32:02 +08:00
parent 5a7e5bd870
commit 0b06d637e7
76 changed files with 8712 additions and 1815 deletions
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -0,0 +1,116 @@
+// Package claude implements thinking configuration scaffolding for Claude models.
+//
+// Claude models use the thinking.budget_tokens format with values in the range
+// 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5),
+// while older models do not.
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-6
+package claude
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for Claude models.
+// This applier is stateless and holds no configuration.
+type Applier struct{}
+
+// NewApplier creates a new Claude thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("claude", NewApplier())
+}
+
+// Apply applies thinking configuration to Claude request body.
+//
+// IMPORTANT: This method expects config to be pre-validated by thinking.ValidateConfig.
+// ValidateConfig handles:
+//   - Mode conversion (Level→Budget, Auto→Budget)
+//   - Budget clamping to model range
+//   - ZeroAllowed constraint enforcement
+//
+// Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged.
+//
+// Expected output format when enabled:
+//
+//	{
+//	  "thinking": {
+//	    "type": "enabled",
+//	    "budget_tokens": 16384
+//	  }
+//	}
+//
+// Expected output format when disabled:
+//
+//	{
+//	  "thinking": {
+//	    "type": "disabled"
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return applyCompatibleClaude(body, config)
+	}
+
+	// Only process ModeBudget and ModeNone; other modes pass through
+	// (caller should use ValidateConfig first to normalize modes)
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced)
+	// Decide enabled/disabled based on budget value
+	if config.Budget == 0 {
+		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+	result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
+	return result, nil
+}
+
+func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	switch config.Mode {
+	case thinking.ModeNone:
+		result, _ := sjson.SetBytes(body, "thinking.type", "disabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	case thinking.ModeAuto:
+		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+		result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens")
+		return result, nil
+	default:
+		result, _ := sjson.SetBytes(body, "thinking.type", "enabled")
+		result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget)
+		return result, nil
+	}
+}
--- a/internal/thinking/provider/claude/apply_test.go
+++ b/internal/thinking/provider/claude/apply_test.go
@@ -0,0 +1,288 @@
+// Package claude implements thinking configuration for Claude models.
+package claude
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+// =============================================================================
+// Unit Tests: Applier Creation and Interface
+// =============================================================================
+
+func TestNewApplier(t *testing.T) {
+	applier := NewApplier()
+	if applier == nil {
+		t.Fatal("NewApplier() returned nil")
+	}
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	var _ thinking.ProviderApplier = (*Applier)(nil)
+}
+
+// =============================================================================
+// Unit Tests: Budget and Disable Logic (Pre-validated Config)
+// =============================================================================
+
+// TestClaudeApplyBudgetAndNone tests budget values and disable modes.
+// NOTE: These tests assume config has been pre-validated by ValidateConfig.
+// Apply trusts the input and does not perform clamping.
+func TestClaudeApplyBudgetAndNone(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildClaudeModelInfo()
+
+	tests := []struct {
+		name         string
+		config       thinking.ThinkingConfig
+		wantType     string
+		wantBudget   int
+		wantBudgetOK bool
+	}{
+		// Valid pre-validated budget values
+		{"budget 16k", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, "enabled", 16384, true},
+		{"budget min", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1024}, "enabled", 1024, true},
+		{"budget max", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 128000}, "enabled", 128000, true},
+		{"budget mid", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "enabled", 50000, true},
+		// Disable cases
+		{"budget zero disables", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "disabled", 0, false},
+		{"mode none disables", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "disabled", 0, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			thinkingType := gjson.GetBytes(result, "thinking.type").String()
+			if thinkingType != tt.wantType {
+				t.Fatalf("thinking.type = %q, want %q", thinkingType, tt.wantType)
+			}
+
+			budgetValue := gjson.GetBytes(result, "thinking.budget_tokens")
+			if budgetValue.Exists() != tt.wantBudgetOK {
+				t.Fatalf("thinking.budget_tokens exists = %v, want %v", budgetValue.Exists(), tt.wantBudgetOK)
+			}
+			if tt.wantBudgetOK {
+				if got := int(budgetValue.Int()); got != tt.wantBudget {
+					t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget)
+				}
+			}
+		})
+	}
+}
+
+// TestClaudeApplyPassthroughBudget tests that Apply trusts pre-validated budget values.
+// It does NOT perform clamping - that's ValidateConfig's responsibility.
+func TestClaudeApplyPassthroughBudget(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildClaudeModelInfo()
+
+	tests := []struct {
+		name       string
+		config     thinking.ThinkingConfig
+		wantBudget int
+	}{
+		// Apply should pass through the budget value as-is
+		// (ValidateConfig would have clamped these, but Apply trusts the input)
+		{"passes through any budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 500}, 500},
+		{"passes through large budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 200000}, 200000},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget {
+				t.Fatalf("thinking.budget_tokens = %d, want %d (passthrough)", got, tt.wantBudget)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Unit Tests: Mode Passthrough (Strict Layering)
+// =============================================================================
+
+// TestClaudeApplyModePassthrough tests that non-Budget/None modes pass through unchanged.
+// Apply expects ValidateConfig to have already converted Level/Auto to Budget.
+func TestClaudeApplyModePassthrough(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildClaudeModelInfo()
+
+	tests := []struct {
+		name   string
+		config thinking.ThinkingConfig
+		body   string
+	}{
+		{"ModeLevel passes through", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "high"}, `{"model":"test"}`},
+		{"ModeAuto passes through", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, `{"model":"test"}`},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			// Should return body unchanged
+			if string(result) != tt.body {
+				t.Fatalf("Apply() = %s, want %s (passthrough)", string(result), tt.body)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Unit Tests: Output Format
+// =============================================================================
+
+// TestClaudeApplyOutputFormat tests the exact JSON output format.
+//
+// Claude expects:
+//
+//	{
+//	  "thinking": {
+//	    "type": "enabled",
+//	    "budget_tokens": 16384
+//	  }
+//	}
+func TestClaudeApplyOutputFormat(t *testing.T) {
+	tests := []struct {
+		name     string
+		config   thinking.ThinkingConfig
+		wantJSON string
+	}{
+		{
+			"enabled with budget",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
+			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
+		},
+		{
+			"disabled",
+			thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0},
+			`{"thinking":{"type":"disabled"}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			applier := NewApplier()
+			modelInfo := buildClaudeModelInfo()
+
+			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if string(result) != tt.wantJSON {
+				t.Fatalf("Apply() = %s, want %s", result, tt.wantJSON)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Unit Tests: Body Merging
+// =============================================================================
+
+// TestClaudeApplyWithExistingBody tests applying config to existing request body.
+func TestClaudeApplyWithExistingBody(t *testing.T) {
+	tests := []struct {
+		name     string
+		body     string
+		config   thinking.ThinkingConfig
+		wantBody string
+	}{
+		{
+			"add to empty body",
+			`{}`,
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
+			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
+		},
+		{
+			"preserve existing fields",
+			`{"model":"claude-sonnet-4-5","messages":[]}`,
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
+			`{"model":"claude-sonnet-4-5","messages":[],"thinking":{"type":"enabled","budget_tokens":8192}}`,
+		},
+		{
+			"override existing thinking",
+			`{"thinking":{"type":"enabled","budget_tokens":1000}}`,
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
+			`{"thinking":{"type":"enabled","budget_tokens":16384}}`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			applier := NewApplier()
+			modelInfo := buildClaudeModelInfo()
+
+			result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if string(result) != tt.wantBody {
+				t.Fatalf("Apply() = %s, want %s", result, tt.wantBody)
+			}
+		})
+	}
+}
+
+// TestClaudeApplyWithNilBody tests handling of nil/empty body.
+func TestClaudeApplyWithNilBody(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildClaudeModelInfo()
+
+	tests := []struct {
+		name       string
+		body       []byte
+		wantBudget int
+	}{
+		{"nil body", nil, 16384},
+		{"empty body", []byte{}, 16384},
+		{"empty object", []byte(`{}`), 16384},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}
+			result, err := applier.Apply(tt.body, config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			if got := gjson.GetBytes(result, "thinking.type").String(); got != "enabled" {
+				t.Fatalf("thinking.type = %q, want %q", got, "enabled")
+			}
+			if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget {
+				t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget)
+			}
+		})
+	}
+}
+
+// =============================================================================
+// Helper Functions
+// =============================================================================
+
+func buildClaudeModelInfo() *registry.ModelInfo {
+	return &registry.ModelInfo{
+		ID: "claude-sonnet-4-5",
+		Thinking: &registry.ThinkingSupport{
+			Min:            1024,
+			Max:            128000,
+			ZeroAllowed:    true,
+			DynamicAllowed: false,
+		},
+	}
+}
--- a/internal/thinking/provider/codex/apply.go
+++ b/internal/thinking/provider/codex/apply.go
@@ -0,0 +1,138 @@
+// Package codex implements thinking configuration for Codex (OpenAI Responses API) models.
+//
+// Codex models use the reasoning.effort format with discrete levels
+// (low/medium/high). This is similar to OpenAI but uses nested field
+// "reasoning.effort" instead of "reasoning_effort".
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
+package codex
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for Codex models.
+//
+// Codex-specific behavior:
+//   - Output format: reasoning.effort (string: low/medium/high/xhigh)
+//   - Level-only mode: no numeric budget support
+//   - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Codex thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("codex", NewApplier())
+}
+
+// Apply applies thinking configuration to Codex request body.
+//
+// Expected output format:
+//
+//	{
+//	  "reasoning": {
+//	    "effort": "high"
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return applyCompatibleCodex(body, config)
+	}
+
+	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
+	if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeLevel {
+		result, _ := sjson.SetBytes(body, "reasoning.effort", string(config.Level))
+		return result, nil
+	}
+
+	effort := ""
+	support := modelInfo.Thinking
+	if config.Budget == 0 {
+		if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
+			effort = string(thinking.LevelNone)
+		}
+	}
+	if effort == "" && config.Level != "" {
+		effort = string(config.Level)
+	}
+	if effort == "" && len(support.Levels) > 0 {
+		effort = support.Levels[0]
+	}
+	if effort == "" {
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
+	return result, nil
+}
+
+func applyCompatibleCodex(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	var effort string
+	switch config.Mode {
+	case thinking.ModeLevel:
+		if config.Level == "" {
+			return body, nil
+		}
+		effort = string(config.Level)
+	case thinking.ModeNone:
+		effort = string(thinking.LevelNone)
+		if config.Level != "" {
+			effort = string(config.Level)
+		}
+	case thinking.ModeAuto:
+		// Auto mode for user-defined models: pass through as "auto"
+		effort = string(thinking.LevelAuto)
+	case thinking.ModeBudget:
+		// Budget mode: convert budget to level using threshold mapping
+		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
+		if !ok {
+			return body, nil
+		}
+		effort = level
+	default:
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning.effort", effort)
+	return result, nil
+}
+
+func hasLevel(levels []string, target string) bool {
+	for _, level := range levels {
+		if strings.EqualFold(strings.TrimSpace(level), target) {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/thinking/provider/gemini/apply.go
+++ b/internal/thinking/provider/gemini/apply.go
@@ -0,0 +1,172 @@
+// Package gemini implements thinking configuration for Gemini models.
+//
+// Gemini models have two formats:
+//   - Gemini 2.5: Uses thinkingBudget (numeric)
+//   - Gemini 3.x: Uses thinkingLevel (string: minimal/low/medium/high)
+//     or thinkingBudget=-1 for auto/dynamic mode
+//
+// Output format is determined by ThinkingConfig.Mode and ThinkingSupport.Levels:
+//   - ModeAuto: Always uses thinkingBudget=-1 (both Gemini 2.5 and 3.x)
+//   - len(Levels) > 0: Uses thinkingLevel (Gemini 3.x discrete levels)
+//   - len(Levels) == 0: Uses thinkingBudget (Gemini 2.5)
+package gemini
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier applies thinking configuration for Gemini models.
+//
+// Gemini-specific behavior:
+//   - Gemini 2.5: thinkingBudget format, flash series supports ZeroAllowed
+//   - Gemini 3.x: thinkingLevel format, cannot be disabled
+//   - Use ThinkingSupport.Levels to decide output format
+type Applier struct{}
+
+// NewApplier creates a new Gemini thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("gemini", NewApplier())
+}
+
+// Apply applies thinking configuration to Gemini request body.
+//
+// Expected output format (Gemini 2.5):
+//
+//	{
+//	  "generationConfig": {
+//	    "thinkingConfig": {
+//	      "thinkingBudget": 8192,
+//	      "includeThoughts": true
+//	    }
+//	  }
+//	}
+//
+// Expected output format (Gemini 3.x):
+//
+//	{
+//	  "generationConfig": {
+//	    "thinkingConfig": {
+//	      "thinkingLevel": "high",
+//	      "includeThoughts": true
+//	    }
+//	  }
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return a.applyCompatible(body, config)
+	}
+
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// Choose format based on config.Mode and model capabilities:
+	// - ModeLevel: use Level format (validation will reject unsupported levels)
+	// - ModeNone: use Level format if model has Levels, else Budget format
+	// - ModeBudget/ModeAuto: use Budget format
+	switch config.Mode {
+	case thinking.ModeLevel:
+		return a.applyLevelFormat(body, config)
+	case thinking.ModeNone:
+		// ModeNone: route based on model capability (has Levels or not)
+		if len(modelInfo.Thinking.Levels) > 0 {
+			return a.applyLevelFormat(body, config)
+		}
+		return a.applyBudgetFormat(body, config)
+	default:
+		return a.applyBudgetFormat(body, config)
+	}
+}
+
+func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
+		return a.applyLevelFormat(body, config)
+	}
+
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// ModeNone semantics:
+	//   - ModeNone + Budget=0: completely disable thinking (not possible for Level-only models)
+	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
+	// ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0.
+
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget")
+
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false)
+		if config.Level != "" {
+			result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
+		}
+		return result, nil
+	}
+
+	// Only handle ModeLevel - budget conversion should be done by upper layer
+	if config.Mode != thinking.ModeLevel {
+		return body, nil
+	}
+
+	level := string(config.Level)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true)
+	return result, nil
+}
+
+func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel")
+
+	budget := config.Budget
+	// ModeNone semantics:
+	//   - ModeNone + Budget=0: completely disable thinking
+	//   - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false)
+	// When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone.
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
+
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget)
+	result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	return result, nil
+}
--- a/internal/thinking/provider/gemini/apply_test.go
+++ b/internal/thinking/provider/gemini/apply_test.go
@@ -0,0 +1,526 @@
+// Package gemini implements thinking configuration for Gemini models.
+package gemini
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func TestNewApplier(t *testing.T) {
+	applier := NewApplier()
+	if applier == nil {
+		t.Fatal("NewApplier() returned nil")
+	}
+}
+
+// parseConfigFromSuffix parses a raw suffix into a ThinkingConfig.
+// This helper reduces code duplication in end-to-end tests (L1 fix).
+func parseConfigFromSuffix(rawSuffix string) (thinking.ThinkingConfig, bool) {
+	if budget, ok := thinking.ParseNumericSuffix(rawSuffix); ok {
+		return thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: budget}, true
+	}
+	if level, ok := thinking.ParseLevelSuffix(rawSuffix); ok {
+		return thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: level}, true
+	}
+	if mode, ok := thinking.ParseSpecialSuffix(rawSuffix); ok {
+		config := thinking.ThinkingConfig{Mode: mode}
+		if mode == thinking.ModeAuto {
+			config.Budget = -1
+		}
+		return config, true
+	}
+	return thinking.ThinkingConfig{}, false
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	// Compile-time check: if Applier doesn't implement the interface, this won't compile
+	var _ thinking.ProviderApplier = (*Applier)(nil)
+}
+
+// TestGeminiApply tests the Gemini thinking applier.
+//
+// Gemini-specific behavior:
+//   - Gemini 2.5: thinkingBudget format (numeric)
+//   - Gemini 3.x: thinkingLevel format (string)
+//   - Flash series: ZeroAllowed=true
+//   - Pro series: ZeroAllowed=false, Min=128
+//   - CRITICAL: When budget=0/none, set includeThoughts=false
+//
+// Depends on: Epic 7 Story 7-2, 7-3
+func TestGeminiApply(t *testing.T) {
+	applier := NewApplier()
+	tests := []struct {
+		name                string
+		model               string
+		config              thinking.ThinkingConfig
+		wantField           string
+		wantValue           interface{}
+		wantIncludeThoughts bool // CRITICAL: includeThoughts field
+	}{
+		// Gemini 2.5 Flash (ZeroAllowed=true)
+		{"flash budget 8k", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
+		{"flash zero", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false},
+		{"flash none", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false},
+
+		// Gemini 2.5 Pro (ZeroAllowed=false, Min=128)
+		{"pro budget 8k", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
+		{"pro zero - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 128, false},
+		{"pro none - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 128, false},
+		{"pro below min", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50}, "thinkingBudget", 128, true},
+		{"pro above max", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "thinkingBudget", 32768, true},
+		{"pro auto", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
+
+		// Gemini 3 Pro (Level mode, ZeroAllowed=false)
+		{"g3-pro high", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
+		{"g3-pro low", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true},
+		{"g3-pro auto", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
+
+		// Gemini 3 Flash (Level mode, minimal is lowest)
+		{"g3-flash high", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
+		{"g3-flash medium", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "thinkingLevel", "medium", true},
+		{"g3-flash minimal", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiModelInfo(tt.model)
+			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
+			switch want := tt.wantValue.(type) {
+			case int:
+				if int(gotField.Int()) != want {
+					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
+				}
+			case string:
+				if gotField.String() != want {
+					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
+				}
+			case bool:
+				if gotField.Bool() != want {
+					t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want)
+				}
+			default:
+				t.Fatalf("unsupported wantValue type %T", tt.wantValue)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
+			if gotIncludeThoughts != tt.wantIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
+			}
+		})
+	}
+}
+
+// TestGeminiApplyEndToEndBudgetZero tests suffix parsing + validation + apply for budget=0.
+//
+// This test covers the complete flow from suffix parsing to Apply output:
+//   - AC#1: ModeBudget+Budget=0 → ModeNone conversion
+//   - AC#3: Gemini 3 ModeNone+Budget>0 → includeThoughts=false + thinkingLevel=low
+//   - AC#4: Gemini 2.5 Pro (0) → clamped to 128 + includeThoughts=false
+func TestGeminiApplyEndToEndBudgetZero(t *testing.T) {
+	tests := []struct {
+		name                string
+		model               string
+		wantModel           string
+		wantField           string // "thinkingBudget" or "thinkingLevel"
+		wantValue           interface{}
+		wantIncludeThoughts bool
+	}{
+		// AC#4: Gemini 2.5 Pro - Budget format
+		{"gemini-25-pro zero", "gemini-2.5-pro(0)", "gemini-2.5-pro", "thinkingBudget", 128, false},
+		// AC#3: Gemini 3 Pro - Level format, ModeNone clamped to Budget=128, uses lowest level
+		{"gemini-3-pro zero", "gemini-3-pro-preview(0)", "gemini-3-pro-preview", "thinkingLevel", "low", false},
+		{"gemini-3-pro none", "gemini-3-pro-preview(none)", "gemini-3-pro-preview", "thinkingLevel", "low", false},
+		// Gemini 3 Flash - Level format, lowest level is "minimal"
+		{"gemini-3-flash zero", "gemini-3-flash-preview(0)", "gemini-3-flash-preview", "thinkingLevel", "minimal", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			suffix := thinking.ParseSuffix(tt.model)
+			if !suffix.HasSuffix {
+				t.Fatalf("ParseSuffix(%q) HasSuffix = false, want true", tt.model)
+			}
+			if suffix.ModelName != tt.wantModel {
+				t.Fatalf("ParseSuffix(%q) ModelName = %q, want %q", tt.model, suffix.ModelName, tt.wantModel)
+			}
+
+			// Parse suffix value using helper function (L1 fix)
+			config, ok := parseConfigFromSuffix(suffix.RawSuffix)
+			if !ok {
+				t.Fatalf("ParseSuffix(%q) RawSuffix = %q is not a valid suffix", tt.model, suffix.RawSuffix)
+			}
+
+			modelInfo := buildGeminiModelInfo(suffix.ModelName)
+			normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			// Verify the output field value
+			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
+			switch want := tt.wantValue.(type) {
+			case int:
+				if int(gotField.Int()) != want {
+					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
+				}
+			case string:
+				if gotField.String() != want {
+					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
+				}
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
+			if gotIncludeThoughts != tt.wantIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
+			}
+		})
+	}
+}
+
+// TestGeminiApplyEndToEndAuto tests auto mode through both suffix parsing and direct config.
+//
+// This test covers:
+//   - AC#2: Gemini 2.5 auto uses thinkingBudget=-1
+//   - AC#3: Gemini 3 auto uses thinkingBudget=-1 (not thinkingLevel)
+//   - Suffix parsing path: (auto) and (-1) suffixes
+//   - Direct config path: ModeLevel + Level=auto → ModeAuto conversion
+func TestGeminiApplyEndToEndAuto(t *testing.T) {
+	tests := []struct {
+		name                string
+		model               string                   // model name (with suffix for parsing, or plain for direct config)
+		directConfig        *thinking.ThinkingConfig // if not nil, use direct config instead of suffix parsing
+		wantField           string
+		wantValue           int
+		wantIncludeThoughts bool
+	}{
+		// Suffix parsing path - Budget-only model (Gemini 2.5)
+		{"suffix auto g25", "gemini-2.5-pro(auto)", nil, "thinkingBudget", -1, true},
+		{"suffix -1 g25", "gemini-2.5-pro(-1)", nil, "thinkingBudget", -1, true},
+		// Suffix parsing path - Hybrid model (Gemini 3)
+		{"suffix auto g3", "gemini-3-pro-preview(auto)", nil, "thinkingBudget", -1, true},
+		{"suffix -1 g3", "gemini-3-pro-preview(-1)", nil, "thinkingBudget", -1, true},
+		// Direct config path - Level=auto → ModeAuto conversion
+		{"direct level=auto g25", "gemini-2.5-pro", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true},
+		{"direct level=auto g3", "gemini-3-pro-preview", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var config thinking.ThinkingConfig
+			var modelName string
+
+			if tt.directConfig != nil {
+				// Direct config path
+				config = *tt.directConfig
+				modelName = tt.model
+			} else {
+				// Suffix parsing path
+				suffix := thinking.ParseSuffix(tt.model)
+				if !suffix.HasSuffix {
+					t.Fatalf("ParseSuffix(%q) HasSuffix = false", tt.model)
+				}
+				modelName = suffix.ModelName
+				var ok bool
+				config, ok = parseConfigFromSuffix(suffix.RawSuffix)
+				if !ok {
+					t.Fatalf("parseConfigFromSuffix(%q) failed", suffix.RawSuffix)
+				}
+			}
+
+			modelInfo := buildGeminiModelInfo(modelName)
+			normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			// Verify ModeAuto after validation
+			if normalized.Mode != thinking.ModeAuto {
+				t.Fatalf("ValidateConfig() Mode = %v, want ModeAuto", normalized.Mode)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField)
+			if int(gotField.Int()) != tt.wantValue {
+				t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), tt.wantValue)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
+			if gotIncludeThoughts != tt.wantIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
+			}
+		})
+	}
+}
+
+func TestGeminiApplyInvalidBody(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiModelInfo("gemini-2.5-flash")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking)
+	if err != nil {
+		t.Fatalf("ValidateConfig() error = %v", err)
+	}
+
+	tests := []struct {
+		name string
+		body []byte
+	}{
+		{"nil body", nil},
+		{"empty body", []byte{}},
+		{"invalid json", []byte("{\"not json\"")},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply(tt.body, *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotBudget := int(gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int())
+			if gotBudget != 8192 {
+				t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool()
+			if !gotIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true)
+			}
+		})
+	}
+}
+
+// TestGeminiApplyConflictingFields tests that conflicting fields are removed.
+//
+// When applying Budget format, any existing thinkingLevel should be removed.
+// When applying Level format, any existing thinkingBudget should be removed.
+func TestGeminiApplyConflictingFields(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name         string
+		model        string
+		config       thinking.ThinkingConfig
+		existingBody string
+		wantField    string // expected field to exist
+		wantNoField  string // expected field to NOT exist
+	}{
+		// Budget format should remove existing thinkingLevel
+		{
+			"budget removes level",
+			"gemini-2.5-pro",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
+			`{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`,
+			"thinkingBudget",
+			"thinkingLevel",
+		},
+		// Level format should remove existing thinkingBudget
+		{
+			"level removes budget",
+			"gemini-3-pro-preview",
+			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
+			`{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`,
+			"thinkingLevel",
+			"thinkingBudget",
+		},
+		// ModeAuto uses budget format, should remove thinkingLevel
+		{
+			"auto removes level",
+			"gemini-3-pro-preview",
+			thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1},
+			`{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`,
+			"thinkingBudget",
+			"thinkingLevel",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiModelInfo(tt.model)
+			result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			// Verify expected field exists
+			wantPath := "generationConfig.thinkingConfig." + tt.wantField
+			if !gjson.GetBytes(result, wantPath).Exists() {
+				t.Fatalf("%s should exist in result: %s", tt.wantField, string(result))
+			}
+
+			// Verify conflicting field was removed
+			noPath := "generationConfig.thinkingConfig." + tt.wantNoField
+			if gjson.GetBytes(result, noPath).Exists() {
+				t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result))
+			}
+		})
+	}
+}
+
+// TestGeminiApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil.
+func TestGeminiApplyThinkingNotSupported(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+
+	// Model with nil Thinking support
+	modelInfo := &registry.ModelInfo{ID: "gemini-unknown", Thinking: nil}
+
+	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err == nil {
+		t.Fatal("Apply() expected error for nil Thinking, got nil")
+	}
+
+	// Verify it's the correct error type
+	thinkErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
+	}
+	if thinkErr.Code != thinking.ErrThinkingNotSupported {
+		t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported)
+	}
+}
+
+func buildGeminiModelInfo(modelID string) *registry.ModelInfo {
+	support := &registry.ThinkingSupport{}
+	switch modelID {
+	case "gemini-2.5-pro":
+		support.Min = 128
+		support.Max = 32768
+		support.ZeroAllowed = false
+		support.DynamicAllowed = true
+	case "gemini-2.5-flash", "gemini-2.5-flash-lite":
+		support.Min = 0
+		support.Max = 24576
+		support.ZeroAllowed = true
+		support.DynamicAllowed = true
+	case "gemini-3-pro-preview":
+		support.Min = 128
+		support.Max = 32768
+		support.ZeroAllowed = false
+		support.DynamicAllowed = true
+		support.Levels = []string{"low", "high"}
+	case "gemini-3-flash-preview":
+		support.Min = 128
+		support.Max = 32768
+		support.ZeroAllowed = false
+		support.DynamicAllowed = true
+		support.Levels = []string{"minimal", "low", "medium", "high"}
+	default:
+		// Unknown model - return nil Thinking to trigger error path
+		return &registry.ModelInfo{ID: modelID, Thinking: nil}
+	}
+	return &registry.ModelInfo{
+		ID:       modelID,
+		Thinking: support,
+	}
+}
+
+// TestGeminiApplyNilModelInfo tests Apply behavior when modelInfo is nil.
+// Coverage: apply.go:56-58 (H1)
+func TestGeminiApplyNilModelInfo(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"existing": "data"}`)
+
+	result, err := applier.Apply(body, config, nil)
+	if err != nil {
+		t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
+	}
+	if string(result) != string(body) {
+		t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result)
+	}
+}
+
+// TestGeminiApplyEmptyModelID tests Apply when modelID is empty.
+// Coverage: apply.go:61-63 (H2)
+func TestGeminiApplyEmptyModelID(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	modelInfo := &registry.ModelInfo{ID: "", Thinking: nil}
+
+	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err == nil {
+		t.Fatal("Apply() with empty modelID and nil Thinking should error")
+	}
+	thinkErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
+	}
+	if thinkErr.Model != "unknown" {
+		t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown")
+	}
+}
+
+// TestGeminiApplyModeBudgetWithLevels tests that ModeBudget is applied with budget format
+// even for models with Levels. The Apply layer handles ModeBudget by applying thinkingBudget.
+// Coverage: apply.go:88-90
+func TestGeminiApplyModeBudgetWithLevels(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiModelInfo("gemini-3-flash-preview")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"existing": "data"}`)
+
+	result, err := applier.Apply(body, config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() error = %v", err)
+	}
+	// ModeBudget applies budget format
+	budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int()
+	if budget != 8192 {
+		t.Fatalf("Apply() expected thinkingBudget=8192, got: %d", budget)
+	}
+}
+
+// TestGeminiApplyUnsupportedMode tests behavior with unsupported Mode types.
+// Coverage: apply.go:67-69 and 97-98 (H5, L2)
+func TestGeminiApplyUnsupportedMode(t *testing.T) {
+	applier := NewApplier()
+	body := []byte(`{"existing": "data"}`)
+
+	tests := []struct {
+		name   string
+		model  string
+		config thinking.ThinkingConfig
+	}{
+		{"unknown mode with budget model", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}},
+		{"unknown mode with level model", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiModelInfo(tt.model)
+			result, err := applier.Apply(body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			// Unsupported modes return original body unchanged
+			if string(result) != string(body) {
+				t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result)
+			}
+		})
+	}
+}
--- a/internal/thinking/provider/geminicli/apply.go
+++ b/internal/thinking/provider/geminicli/apply.go
@@ -0,0 +1,128 @@
+// Package geminicli implements thinking configuration for Gemini CLI API format.
+//
+// Gemini CLI uses request.generationConfig.thinkingConfig.* path instead of
+// generationConfig.thinkingConfig.* used by standard Gemini API.
+package geminicli
+
+import (
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier applies thinking configuration for Gemini CLI API format.
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new Gemini CLI thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	applier := NewApplier()
+	thinking.RegisterProvider("gemini-cli", applier)
+	thinking.RegisterProvider("antigravity", applier)
+}
+
+// Apply applies thinking configuration to Gemini CLI request body.
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return a.applyCompatible(body, config)
+	}
+
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	// ModeAuto: Always use Budget format with thinkingBudget=-1
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	// For non-auto modes, choose format based on model capabilities
+	support := modelInfo.Thinking
+	if len(support.Levels) > 0 {
+		return a.applyLevelFormat(body, config)
+	}
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeAuto {
+		return a.applyBudgetFormat(body, config)
+	}
+
+	if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") {
+		return a.applyLevelFormat(body, config)
+	}
+
+	return a.applyBudgetFormat(body, config)
+}
+
+func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget")
+
+	if config.Mode == thinking.ModeNone {
+		result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false)
+		if config.Level != "" {
+			result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level))
+		}
+		return result, nil
+	}
+
+	// Only handle ModeLevel - budget conversion should be done by upper layer
+	if config.Mode != thinking.ModeLevel {
+		return body, nil
+	}
+
+	level := string(config.Level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true)
+	return result, nil
+}
+
+func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	// Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output
+	result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel")
+
+	budget := config.Budget
+	includeThoughts := false
+	switch config.Mode {
+	case thinking.ModeNone:
+		includeThoughts = false
+	case thinking.ModeAuto:
+		includeThoughts = true
+	default:
+		includeThoughts = budget > 0
+	}
+
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
+	result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts)
+	return result, nil
+}
--- a/internal/thinking/provider/geminicli/apply_test.go
+++ b/internal/thinking/provider/geminicli/apply_test.go
@@ -0,0 +1,382 @@
+// Package geminicli implements thinking configuration for Gemini CLI API format.
+package geminicli
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func TestNewApplier(t *testing.T) {
+	applier := NewApplier()
+	if applier == nil {
+		t.Fatal("NewApplier() returned nil")
+	}
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	// Compile-time check: if Applier doesn't implement the interface, this won't compile
+	var _ thinking.ProviderApplier = (*Applier)(nil)
+}
+
+// TestGeminiCLIApply tests the Gemini CLI thinking applier.
+//
+// Gemini CLI uses request.generationConfig.thinkingConfig.* path.
+// Behavior mirrors Gemini applier but with different JSON path prefix.
+func TestGeminiCLIApply(t *testing.T) {
+	applier := NewApplier()
+	tests := []struct {
+		name                string
+		model               string
+		config              thinking.ThinkingConfig
+		wantField           string
+		wantValue           interface{}
+		wantIncludeThoughts bool
+	}{
+		// Budget mode (no Levels)
+		{"budget 8k", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true},
+		{"budget zero", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false},
+		{"none mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false},
+		{"auto mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
+
+		// Level mode (has Levels)
+		{"level high", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true},
+		{"level low", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true},
+		{"level minimal", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true},
+		// ModeAuto with Levels model still uses thinkingBudget=-1
+		{"auto with levels", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiCLIModelInfo(tt.model)
+			result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotField := gjson.GetBytes(result, "request.generationConfig.thinkingConfig."+tt.wantField)
+			switch want := tt.wantValue.(type) {
+			case int:
+				if int(gotField.Int()) != want {
+					t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want)
+				}
+			case string:
+				if gotField.String() != want {
+					t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want)
+				}
+			case bool:
+				if gotField.Bool() != want {
+					t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want)
+				}
+			default:
+				t.Fatalf("unsupported wantValue type %T", tt.wantValue)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
+			if gotIncludeThoughts != tt.wantIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts)
+			}
+		})
+	}
+}
+
+// TestGeminiCLIApplyModeNoneWithLevel tests ModeNone with Level model.
+// When ModeNone is used with a model that has Levels, includeThoughts should be false.
+func TestGeminiCLIApplyModeNoneWithLevel(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiCLIModelInfo("gemini-cli-level")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeNone, Level: thinking.LevelLow}
+
+	result, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() error = %v", err)
+	}
+
+	gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
+	if gotIncludeThoughts != false {
+		t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, false)
+	}
+
+	gotLevel := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel").String()
+	if gotLevel != "low" {
+		t.Fatalf("thinkingLevel = %q, want %q", gotLevel, "low")
+	}
+}
+
+// TestGeminiCLIApplyInvalidBody tests Apply behavior with invalid body inputs.
+func TestGeminiCLIApplyInvalidBody(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiCLIModelInfo("gemini-cli-budget")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+
+	tests := []struct {
+		name string
+		body []byte
+	}{
+		{"nil body", nil},
+		{"empty body", []byte{}},
+		{"invalid json", []byte("{\"not json\"")},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply(tt.body, config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			gotBudget := int(gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Int())
+			if gotBudget != 8192 {
+				t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192)
+			}
+
+			gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool()
+			if !gotIncludeThoughts {
+				t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true)
+			}
+		})
+	}
+}
+
+// TestGeminiCLIApplyConflictingFields tests that conflicting fields are removed.
+//
+// When applying Budget format, any existing thinkingLevel should be removed.
+// When applying Level format, any existing thinkingBudget should be removed.
+func TestGeminiCLIApplyConflictingFields(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name         string
+		model        string
+		config       thinking.ThinkingConfig
+		existingBody string
+		wantField    string // expected field to exist
+		wantNoField  string // expected field to NOT exist
+	}{
+		// Budget format should remove existing thinkingLevel
+		{
+			"budget removes level",
+			"gemini-cli-budget",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192},
+			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`,
+			"thinkingBudget",
+			"thinkingLevel",
+		},
+		// Level format should remove existing thinkingBudget
+		{
+			"level removes budget",
+			"gemini-cli-level",
+			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
+			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`,
+			"thinkingLevel",
+			"thinkingBudget",
+		},
+		// ModeAuto uses budget format, should remove thinkingLevel
+		{
+			"auto removes level",
+			"gemini-cli-level",
+			thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1},
+			`{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`,
+			"thinkingBudget",
+			"thinkingLevel",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiCLIModelInfo(tt.model)
+			result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			// Verify expected field exists
+			wantPath := "request.generationConfig.thinkingConfig." + tt.wantField
+			if !gjson.GetBytes(result, wantPath).Exists() {
+				t.Fatalf("%s should exist in result: %s", tt.wantField, string(result))
+			}
+
+			// Verify conflicting field was removed
+			noPath := "request.generationConfig.thinkingConfig." + tt.wantNoField
+			if gjson.GetBytes(result, noPath).Exists() {
+				t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result))
+			}
+		})
+	}
+}
+
+// TestGeminiCLIApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil.
+func TestGeminiCLIApplyThinkingNotSupported(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+
+	// Model with nil Thinking support
+	modelInfo := &registry.ModelInfo{ID: "gemini-cli-unknown", Thinking: nil}
+
+	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err == nil {
+		t.Fatal("Apply() expected error for nil Thinking, got nil")
+	}
+
+	// Verify it's the correct error type
+	thinkErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
+	}
+	if thinkErr.Code != thinking.ErrThinkingNotSupported {
+		t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported)
+	}
+}
+
+// TestGeminiCLIApplyNilModelInfo tests Apply behavior when modelInfo is nil.
+func TestGeminiCLIApplyNilModelInfo(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"existing": "data"}`)
+
+	result, err := applier.Apply(body, config, nil)
+	if err != nil {
+		t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err)
+	}
+	if string(result) != string(body) {
+		t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result)
+	}
+}
+
+// TestGeminiCLIApplyEmptyModelID tests Apply when modelID is empty.
+func TestGeminiCLIApplyEmptyModelID(t *testing.T) {
+	applier := NewApplier()
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	modelInfo := &registry.ModelInfo{ID: "", Thinking: nil}
+
+	_, err := applier.Apply([]byte(`{}`), config, modelInfo)
+	if err == nil {
+		t.Fatal("Apply() with empty modelID and nil Thinking should error")
+	}
+	thinkErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err)
+	}
+	if thinkErr.Model != "unknown" {
+		t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown")
+	}
+}
+
+// TestGeminiCLIApplyModeBudgetWithLevels tests that ModeBudget with Levels model passes through.
+// Apply layer doesn't convert - upper layer should handle Budget→Level conversion.
+func TestGeminiCLIApplyModeBudgetWithLevels(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildGeminiCLIModelInfo("gemini-cli-level")
+	config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}
+	body := []byte(`{"existing": "data"}`)
+
+	result, err := applier.Apply(body, config, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() error = %v", err)
+	}
+	// ModeBudget with Levels model: Apply returns body unchanged (conversion is upper layer's job)
+	if string(result) != string(body) {
+		t.Fatalf("Apply() ModeBudget with Levels should return original body, got: %s", result)
+	}
+}
+
+// TestGeminiCLIApplyUnsupportedMode tests behavior with unsupported Mode types.
+func TestGeminiCLIApplyUnsupportedMode(t *testing.T) {
+	applier := NewApplier()
+	body := []byte(`{"existing": "data"}`)
+
+	tests := []struct {
+		name   string
+		model  string
+		config thinking.ThinkingConfig
+	}{
+		{"unknown mode with budget model", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}},
+		{"unknown mode with level model", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildGeminiCLIModelInfo(tt.model)
+			result, err := applier.Apply(body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			// Unsupported modes return original body unchanged
+			if string(result) != string(body) {
+				t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result)
+			}
+		})
+	}
+}
+
+// TestAntigravityUsesGeminiCLIFormat tests that antigravity provider uses gemini-cli format.
+// Antigravity is registered with the same applier as gemini-cli.
+func TestAntigravityUsesGeminiCLIFormat(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name      string
+		config    thinking.ThinkingConfig
+		modelInfo *registry.ModelInfo
+		wantField string
+	}{
+		{
+			"claude model budget",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384},
+			&registry.ModelInfo{ID: "gemini-claude-sonnet-4-5-thinking", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000}},
+			"request.generationConfig.thinkingConfig.thinkingBudget",
+		},
+		{
+			"opus model budget",
+			thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 32768},
+			&registry.ModelInfo{ID: "gemini-claude-opus-4-5-thinking", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000}},
+			"request.generationConfig.thinkingConfig.thinkingBudget",
+		},
+		{
+			"model with levels",
+			thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh},
+			&registry.ModelInfo{ID: "some-model-with-levels", Thinking: &registry.ThinkingSupport{Min: 1024, Max: 200000, Levels: []string{"low", "high"}}},
+			"request.generationConfig.thinkingConfig.thinkingLevel",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+
+			if !gjson.GetBytes(got, tt.wantField).Exists() {
+				t.Fatalf("expected field %s in output: %s", tt.wantField, string(got))
+			}
+		})
+	}
+}
+
+func buildGeminiCLIModelInfo(modelID string) *registry.ModelInfo {
+	support := &registry.ThinkingSupport{}
+	switch modelID {
+	case "gemini-cli-budget":
+		support.Min = 0
+		support.Max = 32768
+		support.ZeroAllowed = true
+		support.DynamicAllowed = true
+	case "gemini-cli-level":
+		support.Min = 128
+		support.Max = 32768
+		support.ZeroAllowed = false
+		support.DynamicAllowed = true
+		support.Levels = []string{"minimal", "low", "medium", "high"}
+	default:
+		// Unknown model - return nil Thinking to trigger error path
+		return &registry.ModelInfo{ID: modelID, Thinking: nil}
+	}
+	return &registry.ModelInfo{
+		ID:       modelID,
+		Thinking: support,
+	}
+}
--- a/internal/thinking/provider/iflow/apply.go
+++ b/internal/thinking/provider/iflow/apply.go
@@ -0,0 +1,160 @@
+// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
+//
+// iFlow models use boolean toggle semantics:
+//   - GLM models: chat_template_kwargs.enable_thinking (boolean)
+//   - MiniMax models: reasoning_split (boolean)
+//
+// Level values are converted to boolean: none=false, all others=true
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-9
+package iflow
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for iFlow models.
+//
+// iFlow-specific behavior:
+//   - GLM models: enable_thinking boolean + clear_thinking=false
+//   - MiniMax models: reasoning_split boolean
+//   - Level to boolean: none=false, others=true
+//   - No quantized support (only on/off)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new iFlow thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("iflow", NewApplier())
+}
+
+// Apply applies thinking configuration to iFlow request body.
+//
+// Expected output format (GLM):
+//
+//	{
+//	  "chat_template_kwargs": {
+//	    "enable_thinking": true,
+//	    "clear_thinking": false
+//	  }
+//	}
+//
+// Expected output format (MiniMax):
+//
+//	{
+//	  "reasoning_split": true
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		modelID := modelInfo.ID
+		if modelID == "" {
+			modelID = "unknown"
+		}
+		return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+	}
+
+	if isGLMModel(modelInfo.ID) {
+		return applyGLM(body, config), nil
+	}
+
+	if isMiniMaxModel(modelInfo.ID) {
+		return applyMiniMax(body, config), nil
+	}
+
+	return body, nil
+}
+
+// configToBoolean converts ThinkingConfig to boolean for iFlow models.
+//
+// Conversion rules:
+//   - ModeNone: false
+//   - ModeAuto: true
+//   - ModeBudget + Budget=0: false
+//   - ModeBudget + Budget>0: true
+//   - ModeLevel + Level="none": false
+//   - ModeLevel + any other level: true
+//   - Default (unknown mode): true
+func configToBoolean(config thinking.ThinkingConfig) bool {
+	switch config.Mode {
+	case thinking.ModeNone:
+		return false
+	case thinking.ModeAuto:
+		return true
+	case thinking.ModeBudget:
+		return config.Budget > 0
+	case thinking.ModeLevel:
+		return config.Level != thinking.LevelNone
+	default:
+		return true
+	}
+}
+
+// applyGLM applies thinking configuration for GLM models.
+//
+// Output format when enabled:
+//
+//	{"chat_template_kwargs": {"enable_thinking": true, "clear_thinking": false}}
+//
+// Output format when disabled:
+//
+//	{"chat_template_kwargs": {"enable_thinking": false}}
+//
+// Note: clear_thinking is only set when thinking is enabled, to preserve
+// thinking output in the response.
+func applyGLM(body []byte, config thinking.ThinkingConfig) []byte {
+	enableThinking := configToBoolean(config)
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking)
+
+	// clear_thinking only needed when thinking is enabled
+	if enableThinking {
+		result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false)
+	}
+
+	return result
+}
+
+// applyMiniMax applies thinking configuration for MiniMax models.
+//
+// Output format:
+//
+//	{"reasoning_split": true/false}
+func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte {
+	reasoningSplit := configToBoolean(config)
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_split", reasoningSplit)
+
+	return result
+}
+
+// isGLMModel determines if the model is a GLM series model.
+// GLM models use chat_template_kwargs.enable_thinking format.
+func isGLMModel(modelID string) bool {
+	return strings.HasPrefix(strings.ToLower(modelID), "glm")
+}
+
+// isMiniMaxModel determines if the model is a MiniMax series model.
+// MiniMax models use reasoning_split format.
+func isMiniMaxModel(modelID string) bool {
+	return strings.HasPrefix(strings.ToLower(modelID), "minimax")
+}
--- a/internal/thinking/provider/iflow/apply_test.go
+++ b/internal/thinking/provider/iflow/apply_test.go
@@ -0,0 +1,328 @@
+// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax).
+package iflow
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func TestNewApplier(t *testing.T) {
+	tests := []struct {
+		name string
+	}{
+		{"default"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			applier := NewApplier()
+			if applier == nil {
+				t.Fatalf("expected non-nil applier")
+			}
+		})
+	}
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	tests := []struct {
+		name    string
+		applier thinking.ProviderApplier
+	}{
+		{"default", NewApplier()},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.applier == nil {
+				t.Fatalf("expected thinking.ProviderApplier implementation")
+			}
+		})
+	}
+}
+
+func TestApplyNilModelInfo(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name string
+		body []byte
+	}{
+		{"nil body", nil},
+		{"empty body", []byte{}},
+		{"json body", []byte(`{"model":"glm-4.6"}`)},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := applier.Apply(tt.body, thinking.ThinkingConfig{}, nil)
+			if err != nil {
+				t.Fatalf("expected nil error, got %v", err)
+			}
+			if !bytes.Equal(got, tt.body) {
+				t.Fatalf("expected body unchanged, got %s", string(got))
+			}
+		})
+	}
+}
+
+func TestApplyMissingThinkingSupport(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name      string
+		modelID   string
+		wantModel string
+	}{
+		{"model id", "glm-4.6", "glm-4.6"},
+		{"empty model id", "", "unknown"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := &registry.ModelInfo{ID: tt.modelID}
+			got, err := applier.Apply([]byte(`{"model":"`+tt.modelID+`"}`), thinking.ThinkingConfig{}, modelInfo)
+			if err == nil {
+				t.Fatalf("expected error, got nil")
+			}
+			if got != nil {
+				t.Fatalf("expected nil body on error, got %s", string(got))
+			}
+			thinkingErr, ok := err.(*thinking.ThinkingError)
+			if !ok {
+				t.Fatalf("expected ThinkingError, got %T", err)
+			}
+			if thinkingErr.Code != thinking.ErrThinkingNotSupported {
+				t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code)
+			}
+			if thinkingErr.Model != tt.wantModel {
+				t.Fatalf("expected model %s, got %s", tt.wantModel, thinkingErr.Model)
+			}
+		})
+	}
+}
+
+func TestConfigToBoolean(t *testing.T) {
+	tests := []struct {
+		name   string
+		config thinking.ThinkingConfig
+		want   bool
+	}{
+		{"mode none", thinking.ThinkingConfig{Mode: thinking.ModeNone}, false},
+		{"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true},
+		{"budget zero", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false},
+		{"budget positive", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true},
+		{"level none", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false},
+		{"level minimal", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true},
+		{"level low", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true},
+		{"level medium", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true},
+		{"level high", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true},
+		{"level xhigh", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true},
+		{"zero value config", thinking.ThinkingConfig{}, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := configToBoolean(tt.config); got != tt.want {
+				t.Fatalf("configToBoolean(%+v) = %v, want %v", tt.config, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestApplyGLM(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name         string
+		modelID      string
+		body         []byte
+		config       thinking.ThinkingConfig
+		wantEnable   bool
+		wantPreserve string
+	}{
+		{"mode none", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, ""},
+		{"level none", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, ""},
+		{"mode auto", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
+		{"level minimal", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, ""},
+		{"level low", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, ""},
+		{"level medium", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, ""},
+		{"level high", "GLM-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, ""},
+		{"level xhigh", "glm-z1-preview", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, ""},
+		{"budget zero", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, ""},
+		{"budget 1000", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, ""},
+		{"preserve fields", "glm-4.6", []byte(`{"model":"glm-4.6","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "glm-4.6"},
+		{"empty body", "glm-4.6", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
+		{"malformed json", "glm-4.6", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := &registry.ModelInfo{
+				ID:       tt.modelID,
+				Thinking: &registry.ThinkingSupport{},
+			}
+			got, err := applier.Apply(tt.body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if !gjson.ValidBytes(got) {
+				t.Fatalf("expected valid JSON, got %s", string(got))
+			}
+
+			enableResult := gjson.GetBytes(got, "chat_template_kwargs.enable_thinking")
+			if !enableResult.Exists() {
+				t.Fatalf("enable_thinking missing")
+			}
+			gotEnable := enableResult.Bool()
+			if gotEnable != tt.wantEnable {
+				t.Fatalf("enable_thinking = %v, want %v", gotEnable, tt.wantEnable)
+			}
+
+			// clear_thinking only set when enable_thinking=true
+			clearResult := gjson.GetBytes(got, "chat_template_kwargs.clear_thinking")
+			if tt.wantEnable {
+				if !clearResult.Exists() {
+					t.Fatalf("clear_thinking missing when enable_thinking=true")
+				}
+				if clearResult.Bool() {
+					t.Fatalf("clear_thinking = %v, want false", clearResult.Bool())
+				}
+			} else {
+				if clearResult.Exists() {
+					t.Fatalf("clear_thinking should not exist when enable_thinking=false")
+				}
+			}
+
+			if tt.wantPreserve != "" {
+				gotModel := gjson.GetBytes(got, "model").String()
+				if gotModel != tt.wantPreserve {
+					t.Fatalf("model = %q, want %q", gotModel, tt.wantPreserve)
+				}
+				if !gjson.GetBytes(got, "extra.keep").Bool() {
+					t.Fatalf("expected extra.keep preserved")
+				}
+			}
+		})
+	}
+}
+
+func TestApplyMiniMax(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name      string
+		modelID   string
+		body      []byte
+		config    thinking.ThinkingConfig
+		wantSplit bool
+		wantModel string
+		wantKeep  bool
+	}{
+		{"mode none", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, "", false},
+		{"level none", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, "", false},
+		{"mode auto", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
+		{"level high", "MINIMAX-M2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, "", false},
+		{"level low", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, "", false},
+		{"level minimal", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, "", false},
+		{"level medium", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, "", false},
+		{"level xhigh", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, "", false},
+		{"budget zero", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, "", false},
+		{"budget 1000", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, "", false},
+		{"unknown level", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "unknown"}, true, "", false},
+		{"preserve fields", "minimax-m2", []byte(`{"model":"minimax-m2","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "minimax-m2", true},
+		{"empty body", "minimax-m2", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
+		{"malformed json", "minimax-m2", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := &registry.ModelInfo{
+				ID:       tt.modelID,
+				Thinking: &registry.ThinkingSupport{},
+			}
+			got, err := applier.Apply(tt.body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if !gjson.ValidBytes(got) {
+				t.Fatalf("expected valid JSON, got %s", string(got))
+			}
+
+			splitResult := gjson.GetBytes(got, "reasoning_split")
+			if !splitResult.Exists() {
+				t.Fatalf("reasoning_split missing")
+			}
+			// Verify JSON type is boolean, not string
+			if splitResult.Type != gjson.True && splitResult.Type != gjson.False {
+				t.Fatalf("reasoning_split should be boolean, got type %v", splitResult.Type)
+			}
+			gotSplit := splitResult.Bool()
+			if gotSplit != tt.wantSplit {
+				t.Fatalf("reasoning_split = %v, want %v", gotSplit, tt.wantSplit)
+			}
+
+			if tt.wantModel != "" {
+				gotModel := gjson.GetBytes(got, "model").String()
+				if gotModel != tt.wantModel {
+					t.Fatalf("model = %q, want %q", gotModel, tt.wantModel)
+				}
+				if tt.wantKeep && !gjson.GetBytes(got, "extra.keep").Bool() {
+					t.Fatalf("expected extra.keep preserved")
+				}
+			}
+		})
+	}
+}
+
+// TestIsGLMModel tests the GLM model detection.
+//
+// Depends on: Epic 9 Story 9-1
+func TestIsGLMModel(t *testing.T) {
+	tests := []struct {
+		name    string
+		model   string
+		wantGLM bool
+	}{
+		{"glm-4.6", "glm-4.6", true},
+		{"glm-z1-preview", "glm-z1-preview", true},
+		{"glm uppercase", "GLM-4.7", true},
+		{"minimax-01", "minimax-01", false},
+		{"gpt-5.2", "gpt-5.2", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := isGLMModel(tt.model); got != tt.wantGLM {
+				t.Fatalf("isGLMModel(%q) = %v, want %v", tt.model, got, tt.wantGLM)
+			}
+		})
+	}
+}
+
+// TestIsMiniMaxModel tests the MiniMax model detection.
+//
+// Depends on: Epic 9 Story 9-1
+func TestIsMiniMaxModel(t *testing.T) {
+	tests := []struct {
+		name        string
+		model       string
+		wantMiniMax bool
+	}{
+		{"minimax-01", "minimax-01", true},
+		{"minimax uppercase", "MINIMAX-M2", true},
+		{"glm-4.6", "glm-4.6", false},
+		{"gpt-5.2", "gpt-5.2", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := isMiniMaxModel(tt.model); got != tt.wantMiniMax {
+				t.Fatalf("isMiniMaxModel(%q) = %v, want %v", tt.model, got, tt.wantMiniMax)
+			}
+		})
+	}
+}
--- a/internal/thinking/provider/openai/apply.go
+++ b/internal/thinking/provider/openai/apply.go
@@ -0,0 +1,135 @@
+// Package openai implements thinking configuration for OpenAI/Codex models.
+//
+// OpenAI models use the reasoning_effort format with discrete levels
+// (low/medium/high). Some models support xhigh and none levels.
+// See: _bmad-output/planning-artifacts/architecture.md#Epic-8
+package openai
+
+import (
+	"strings"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// Applier implements thinking.ProviderApplier for OpenAI models.
+//
+// OpenAI-specific behavior:
+//   - Output format: reasoning_effort (string: low/medium/high/xhigh)
+//   - Level-only mode: no numeric budget support
+//   - Some models support ZeroAllowed (gpt-5.1, gpt-5.2)
+type Applier struct{}
+
+var _ thinking.ProviderApplier = (*Applier)(nil)
+
+// NewApplier creates a new OpenAI thinking applier.
+func NewApplier() *Applier {
+	return &Applier{}
+}
+
+func init() {
+	thinking.RegisterProvider("openai", NewApplier())
+}
+
+// Apply applies thinking configuration to OpenAI request body.
+//
+// Expected output format:
+//
+//	{
+//	  "reasoning_effort": "high"
+//	}
+func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) {
+	if modelInfo == nil {
+		return body, nil
+	}
+	if modelInfo.Thinking == nil {
+		if modelInfo.Type == "" {
+			modelID := modelInfo.ID
+			if modelID == "" {
+				modelID = "unknown"
+			}
+			return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID)
+		}
+		return applyCompatibleOpenAI(body, config)
+	}
+
+	// Only handle ModeLevel and ModeNone; other modes pass through unchanged.
+	if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone {
+		return body, nil
+	}
+
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	if config.Mode == thinking.ModeLevel {
+		result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level))
+		return result, nil
+	}
+
+	effort := ""
+	support := modelInfo.Thinking
+	if config.Budget == 0 {
+		if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) {
+			effort = string(thinking.LevelNone)
+		}
+	}
+	if effort == "" && config.Level != "" {
+		effort = string(config.Level)
+	}
+	if effort == "" && len(support.Levels) > 0 {
+		effort = support.Levels[0]
+	}
+	if effort == "" {
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
+	return result, nil
+}
+
+func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte, error) {
+	if len(body) == 0 || !gjson.ValidBytes(body) {
+		body = []byte(`{}`)
+	}
+
+	var effort string
+	switch config.Mode {
+	case thinking.ModeLevel:
+		if config.Level == "" {
+			return body, nil
+		}
+		effort = string(config.Level)
+	case thinking.ModeNone:
+		effort = string(thinking.LevelNone)
+		if config.Level != "" {
+			effort = string(config.Level)
+		}
+	case thinking.ModeAuto:
+		// Auto mode for user-defined models: pass through as "auto"
+		effort = string(thinking.LevelAuto)
+	case thinking.ModeBudget:
+		// Budget mode: convert budget to level using threshold mapping
+		level, ok := thinking.ConvertBudgetToLevel(config.Budget)
+		if !ok {
+			return body, nil
+		}
+		effort = level
+	default:
+		return body, nil
+	}
+
+	result, _ := sjson.SetBytes(body, "reasoning_effort", effort)
+	return result, nil
+}
+
+func hasLevel(levels []string, target string) bool {
+	for _, level := range levels {
+		if strings.EqualFold(strings.TrimSpace(level), target) {
+			return true
+		}
+	}
+	return false
+}
--- a/internal/thinking/provider/openai/apply_test.go
+++ b/internal/thinking/provider/openai/apply_test.go
@@ -0,0 +1,343 @@
+// Package openai implements thinking configuration for OpenAI/Codex models.
+package openai
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/tidwall/gjson"
+)
+
+func buildOpenAIModelInfo(modelID string) *registry.ModelInfo {
+	info := registry.LookupStaticModelInfo(modelID)
+	if info != nil {
+		return info
+	}
+	// Fallback with complete ThinkingSupport matching real OpenAI model capabilities
+	return &registry.ModelInfo{
+		ID: modelID,
+		Thinking: &registry.ThinkingSupport{
+			Min:         1024,
+			Max:         32768,
+			ZeroAllowed: true,
+			Levels:      []string{"none", "low", "medium", "high", "xhigh"},
+		},
+	}
+}
+
+func TestNewApplier(t *testing.T) {
+	applier := NewApplier()
+	if applier == nil {
+		t.Fatalf("expected non-nil applier")
+	}
+}
+
+func TestApplierImplementsInterface(t *testing.T) {
+	_, ok := interface{}(NewApplier()).(thinking.ProviderApplier)
+	if !ok {
+		t.Fatalf("expected Applier to implement thinking.ProviderApplier")
+	}
+}
+
+func TestApplyNilModelInfo(t *testing.T) {
+	applier := NewApplier()
+	body := []byte(`{"model":"gpt-5.2"}`)
+	got, err := applier.Apply(body, thinking.ThinkingConfig{}, nil)
+	if err != nil {
+		t.Fatalf("expected nil error, got %v", err)
+	}
+	if string(got) != string(body) {
+		t.Fatalf("expected body unchanged, got %s", string(got))
+	}
+}
+
+func TestApplyMissingThinkingSupport(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := &registry.ModelInfo{ID: "gpt-5.2"}
+	got, err := applier.Apply([]byte(`{"model":"gpt-5.2"}`), thinking.ThinkingConfig{}, modelInfo)
+	if err == nil {
+		t.Fatalf("expected error, got nil")
+	}
+	if got != nil {
+		t.Fatalf("expected nil body on error, got %s", string(got))
+	}
+	thinkingErr, ok := err.(*thinking.ThinkingError)
+	if !ok {
+		t.Fatalf("expected ThinkingError, got %T", err)
+	}
+	if thinkingErr.Code != thinking.ErrThinkingNotSupported {
+		t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code)
+	}
+	if thinkingErr.Model != "gpt-5.2" {
+		t.Fatalf("expected model gpt-5.2, got %s", thinkingErr.Model)
+	}
+}
+
+// TestApplyLevel tests Apply with ModeLevel (unit test, no ValidateConfig).
+func TestApplyLevel(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildOpenAIModelInfo("gpt-5.2")
+
+	tests := []struct {
+		name  string
+		level thinking.ThinkingLevel
+		want  string
+	}{
+		{"high", thinking.LevelHigh, "high"},
+		{"medium", thinking.LevelMedium, "medium"},
+		{"low", thinking.LevelLow, "low"},
+		{"xhigh", thinking.LevelXHigh, "xhigh"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: tt.level}, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
+				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+// TestApplyModeNone tests Apply with ModeNone (unit test).
+func TestApplyModeNone(t *testing.T) {
+	applier := NewApplier()
+
+	tests := []struct {
+		name      string
+		config    thinking.ThinkingConfig
+		modelInfo *registry.ModelInfo
+		want      string
+	}{
+		{"zero allowed", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, &registry.ModelInfo{ID: "gpt-5.2", Thinking: &registry.ThinkingSupport{ZeroAllowed: true, Levels: []string{"none", "low"}}}, "none"},
+		{"clamped to level", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 128, Level: thinking.LevelLow}, &registry.ModelInfo{ID: "gpt-5", Thinking: &registry.ThinkingSupport{Levels: []string{"minimal", "low"}}}, "low"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
+				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+// TestApplyPassthrough tests that unsupported modes pass through unchanged.
+func TestApplyPassthrough(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildOpenAIModelInfo("gpt-5.2")
+
+	tests := []struct {
+		name   string
+		config thinking.ThinkingConfig
+	}{
+		{"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}},
+		{"mode budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			body := []byte(`{"model":"gpt-5.2"}`)
+			result, err := applier.Apply(body, tt.config, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if string(result) != string(body) {
+				t.Fatalf("Apply() result = %s, want %s", string(result), string(body))
+			}
+		})
+	}
+}
+
+// TestApplyInvalidBody tests Apply with invalid body input.
+func TestApplyInvalidBody(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildOpenAIModelInfo("gpt-5.2")
+
+	tests := []struct {
+		name string
+		body []byte
+	}{
+		{"nil body", nil},
+		{"empty body", []byte{}},
+		{"invalid json", []byte(`{"not json"`)},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := applier.Apply(tt.body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if !gjson.ValidBytes(result) {
+				t.Fatalf("Apply() result is not valid JSON: %s", string(result))
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "high" {
+				t.Fatalf("reasoning_effort = %q, want %q", got, "high")
+			}
+		})
+	}
+}
+
+// TestApplyPreservesFields tests that existing body fields are preserved.
+func TestApplyPreservesFields(t *testing.T) {
+	applier := NewApplier()
+	modelInfo := buildOpenAIModelInfo("gpt-5.2")
+
+	body := []byte(`{"model":"gpt-5.2","messages":[]}`)
+	result, err := applier.Apply(body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, modelInfo)
+	if err != nil {
+		t.Fatalf("Apply() error = %v", err)
+	}
+	if got := gjson.GetBytes(result, "model").String(); got != "gpt-5.2" {
+		t.Fatalf("model = %q, want %q", got, "gpt-5.2")
+	}
+	if !gjson.GetBytes(result, "messages").Exists() {
+		t.Fatalf("messages missing from result: %s", string(result))
+	}
+	if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "low" {
+		t.Fatalf("reasoning_effort = %q, want %q", got, "low")
+	}
+}
+
+// TestHasLevel tests the hasLevel helper function.
+func TestHasLevel(t *testing.T) {
+	tests := []struct {
+		name   string
+		levels []string
+		target string
+		want   bool
+	}{
+		{"exact match", []string{"low", "medium", "high"}, "medium", true},
+		{"case insensitive", []string{"low", "medium", "high"}, "MEDIUM", true},
+		{"with spaces", []string{"low", " medium ", "high"}, "medium", true},
+		{"not found", []string{"low", "medium", "high"}, "xhigh", false},
+		{"empty levels", []string{}, "medium", false},
+		{"none level", []string{"none", "low", "medium"}, "none", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := hasLevel(tt.levels, tt.target); got != tt.want {
+				t.Fatalf("hasLevel(%v, %q) = %v, want %v", tt.levels, tt.target, got, tt.want)
+			}
+		})
+	}
+}
+
+// --- End-to-End Tests (ValidateConfig → Apply) ---
+
+// TestE2EApply tests the full flow: ValidateConfig → Apply.
+func TestE2EApply(t *testing.T) {
+	tests := []struct {
+		name   string
+		model  string
+		config thinking.ThinkingConfig
+		want   string
+	}{
+		{"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high"},
+		{"level medium", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium"},
+		{"level low", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "low"},
+		{"level xhigh", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, "xhigh"},
+		{"mode none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "none"},
+		{"budget to level", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "medium"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildOpenAIModelInfo(tt.model)
+			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want {
+				t.Fatalf("reasoning_effort = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+// TestE2EApplyOutputFormat tests the full flow with exact JSON output verification.
+func TestE2EApplyOutputFormat(t *testing.T) {
+	tests := []struct {
+		name     string
+		model    string
+		config   thinking.ThinkingConfig
+		wantJSON string
+	}{
+		{"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, `{"reasoning_effort":"high"}`},
+		{"level none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, `{"reasoning_effort":"none"}`},
+		{"budget converted", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, `{"reasoning_effort":"medium"}`},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildOpenAIModelInfo(tt.model)
+			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if string(result) != tt.wantJSON {
+				t.Fatalf("Apply() result = %s, want %s", string(result), tt.wantJSON)
+			}
+		})
+	}
+}
+
+// TestE2EApplyWithExistingBody tests the full flow with existing body fields.
+func TestE2EApplyWithExistingBody(t *testing.T) {
+	tests := []struct {
+		name       string
+		body       string
+		config     thinking.ThinkingConfig
+		wantEffort string
+		wantModel  string
+	}{
+		{"empty body", `{}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high", ""},
+		{"preserve fields", `{"model":"gpt-5.2","messages":[]}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium", "gpt-5.2"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			modelInfo := buildOpenAIModelInfo("gpt-5.2")
+			normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking)
+			if err != nil {
+				t.Fatalf("ValidateConfig() error = %v", err)
+			}
+
+			applier := NewApplier()
+			result, err := applier.Apply([]byte(tt.body), *normalized, modelInfo)
+			if err != nil {
+				t.Fatalf("Apply() error = %v", err)
+			}
+			if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.wantEffort {
+				t.Fatalf("reasoning_effort = %q, want %q", got, tt.wantEffort)
+			}
+			if tt.wantModel != "" {
+				if got := gjson.GetBytes(result, "model").String(); got != tt.wantModel {
+					t.Fatalf("model = %q, want %q", got, tt.wantModel)
+				}
+			}
+		})
+	}
+}