sub2api/backend/internal/service/kimi_cli_gateway.go

package service

import (
	"bufio"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net/http"
	"os/exec"
	"strings"
	"time"

	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
	"github.com/gin-gonic/gin"
	"github.com/tidwall/gjson"
)

const (
	kimiCLICommand        = "kimi"
	kimiCLITimeout        = 10 * time.Minute
	kimiCLIStreamJSONFlag = "stream-json"
)

// KimiCLIGateway handles forwarding chat completions requests through the local kimi-cli.
type KimiCLIGateway struct {
	// cliPath is the resolved path to the kimi binary. If empty, "kimi" is used.
	cliPath string
}

// NewKimiCLIGateway creates a new CLI gateway, attempting to resolve the kimi binary.
func NewKimiCLIGateway() *KimiCLIGateway {
	path := resolveKimiCLI()
	return &KimiCLIGateway{cliPath: path}
}

// IsAvailable returns true if the kimi CLI is found on the system.
func (g *KimiCLIGateway) IsAvailable() bool {
	return g.cliPath != ""
}

// ForwardChatCompletions forwards an OpenAI Chat Completions request through kimi-cli.
func (g *KimiCLIGateway) ForwardChatCompletions(
	ctx context.Context,
	c *gin.Context,
	account *Account,
	body []byte,
) (*ForwardResult, error) {
	startTime := time.Now()

	if !g.IsAvailable() {
		return nil, errors.New("kimi CLI not found. Please install it: uv tool install --python 3.13 kimi-cli")
	}

	// 1. Parse request
	reqStream := gjson.GetBytes(body, "stream").Bool()
	originalModel := gjson.GetBytes(body, "model").String()
	mappedModel := account.GetMappedModel(originalModel)
	if mappedModel == "" {
		mappedModel = originalModel
	}

	// 2. Build NDJSON messages for stdin
	ndjsonInput, err := buildKimiNDJSONMessages(body)
	if err != nil {
		return nil, fmt.Errorf("build messages: %w", err)
	}

	// 3. Build CLI args
	// Note: kimi-cli does not support --model; it uses the default model.
	// The API only exposes K2.6, but the actual model is determined by the CLI.
	args := []string{
		"--print",
		"--output-format", kimiCLIStreamJSONFlag,
		"--input-format", "stream-json",
	}

	// 4. Run CLI (CLI manages its own OAuth auth via `kimi login`)
	cmd := exec.CommandContext(ctx, g.cliPath, args...)
	cmd.Stdin = strings.NewReader(ndjsonInput)

	stdout, err := cmd.StdoutPipe()
	if err != nil {
		return nil, fmt.Errorf("stdout pipe: %w", err)
	}
	stderr, err := cmd.StderrPipe()
	if err != nil {
		return nil, fmt.Errorf("stderr pipe: %w", err)
	}

	if err := cmd.Start(); err != nil {
		return nil, fmt.Errorf("start cli: %w", err)
	}

	// Drain stderr in background for debugging
	go func() {
		slurp, _ := io.ReadAll(stderr)
		if len(slurp) > 0 {
			logger.LegacyPrintf("service.kimi_cli", "stderr: %s", string(slurp))
		}
	}()

	// 5. Parse output and forward
	var result *ForwardResult
	if reqStream {
		result, err = g.handleStreamingResponse(stdout, c, originalModel, mappedModel, startTime)
	} else {
		result, err = g.handleNonStreamingResponse(stdout, c, originalModel, mappedModel, startTime)
	}

	// Wait for process to finish
	if waitErr := cmd.Wait(); waitErr != nil && result == nil {
		return nil, fmt.Errorf("cli exited: %w", waitErr)
	}

	return result, err
}

// handleStreamingResponse reads the single NDJSON line from kimi-cli and sends it as SSE.
func (g *KimiCLIGateway) handleStreamingResponse(
	stdout io.Reader,
	c *gin.Context,
	originalModel, mappedModel string,
	startTime time.Time,
) (*ForwardResult, error) {
	if c == nil || c.Writer == nil {
		return nil, errors.New("gin context or writer is nil")
	}

	c.Writer.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
	c.Writer.Header().Set("Cache-Control", "no-cache")
	c.Writer.Header().Set("Connection", "keep-alive")
	c.Writer.WriteHeader(http.StatusOK)

	flusher, ok := c.Writer.(http.Flusher)
	if !ok {
		return nil, errors.New("streaming not supported")
	}

	msg, err := parseKimiCLIMessage(stdout)
	if err != nil {
		return nil, fmt.Errorf("parse cli output: %w", err)
	}

	var firstTokenTime *time.Duration
	chatID := generateChatCompletionID()
	created := time.Now().Unix()

	// Extract text content
	text := extractKimiTextContent(msg)
	thinking := extractKimiThinkingContent(msg)

	// Build full content (prepend thinking if present)
	fullContent := text
	if thinking != "" {
		fullContent = fmt.Sprintf("<think>\n%s\n</think>\n\n%s", thinking, text)
	}

	// Send role delta
	if firstTokenTime == nil {
		elapsed := time.Since(startTime)
		firstTokenTime = &elapsed
	}
	chunk := buildSSEChunk(chatID, created, mappedModel, 0, &kimiDelta{Role: "assistant"}, nil)
	fmt.Fprintf(c.Writer, "data: %s\n\n", chunk)
	flusher.Flush()

	// Send content delta (simplified: send all at once since CLI gives full response)
	chunk = buildSSEChunk(chatID, created, mappedModel, 0, &kimiDelta{Content: fullContent}, nil)
	fmt.Fprintf(c.Writer, "data: %s\n\n", chunk)
	flusher.Flush()

	// Send finish
	chunk = buildSSEChunk(chatID, created, mappedModel, 0, &kimiDelta{}, stringPtr("stop"))
	fmt.Fprintf(c.Writer, "data: %s\n\n", chunk)
	flusher.Flush()

	// [DONE]
	fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
	flusher.Flush()

	var firstTokenMs *int
	if firstTokenTime != nil {
		ms := int(firstTokenTime.Milliseconds())
		firstTokenMs = &ms
	}

	return &ForwardResult{
		UpstreamModel: mappedModel,
		FirstTokenMs:  firstTokenMs,
	}, nil
}

// handleNonStreamingResponse reads the single NDJSON line and returns a complete OpenAI response.
func (g *KimiCLIGateway) handleNonStreamingResponse(
	stdout io.Reader,
	c *gin.Context,
	originalModel, mappedModel string,
	startTime time.Time,
) (*ForwardResult, error) {
	msg, err := parseKimiCLIMessage(stdout)
	if err != nil {
		return nil, fmt.Errorf("parse cli output: %w", err)
	}

	text := extractKimiTextContent(msg)
	thinking := extractKimiThinkingContent(msg)

	fullContent := text
	if thinking != "" {
		fullContent = fmt.Sprintf("<think>\n%s\n</think>\n\n%s", thinking, text)
	}

	resp := kimiChatCompletionResponse{
		ID:      generateChatCompletionID(),
		Object:  "chat.completion",
		Created: time.Now().Unix(),
		Model:   mappedModel,
		Choices: []kimiChoice{
			{
				Index: 0,
				Message: kimiMessage{
					Role:    "assistant",
					Content: fullContent,
				},
				FinishReason: stringPtr("stop"),
			},
		},
	}

	respBytes, _ := json.Marshal(resp)

	if c != nil && c.Writer != nil {
		c.Writer.Header().Set("Content-Type", "application/json")
		c.Writer.WriteHeader(http.StatusOK)
		_, _ = c.Writer.Write(respBytes)
	}

	return &ForwardResult{
		UpstreamModel: mappedModel,
	}, nil
}

// --- Helpers ---

// resolveKimiCLI attempts to find the kimi binary in PATH.
func resolveKimiCLI() string {
	if path, err := exec.LookPath(kimiCLICommand); err == nil && path != "" {
		return path
	}
	return ""
}

// buildKimiNDJSONMessages converts an OpenAI Chat Completions request body into
// NDJSON lines suitable for kimi-cli --input-format stream-json.
func buildKimiNDJSONMessages(body []byte) (string, error) {
	messagesResult := gjson.GetBytes(body, "messages")
	if !messagesResult.Exists() || !messagesResult.IsArray() {
		return "", errors.New("missing or invalid messages array")
	}

	var lines []string
	messagesResult.ForEach(func(_, msg gjson.Result) bool {
		role := msg.Get("role").String()
		content := msg.Get("content").String()

		// Handle array content (e.g. vision messages with multiple parts)
		if content == "" && msg.Get("content").IsArray() {
			var texts []string
			msg.Get("content").ForEach(func(_, item gjson.Result) bool {
				if item.Get("type").String() == "text" {
					texts = append(texts, item.Get("text").String())
				}
				return true
			})
			content = strings.Join(texts, "\n")
		}

		if role == "" || content == "" {
			return true // skip empty
		}

		line := fmt.Sprintf(`{"role":%q,"content":%q}`, role, content)
		lines = append(lines, line)
		return true
	})

	if len(lines) == 0 {
		return "", errors.New("no valid messages found")
	}

	return strings.Join(lines, "\n") + "\n", nil
}

// parseKimiCLIMessage reads all NDJSON lines from stdout and returns the last valid one.
// When multiple messages are piped via stdin, kimi-cli outputs one response per turn;
// we only need the last one (the response to the final user message).
func parseKimiCLIMessage(stdout io.Reader) (map[string]interface{}, error) {
	scanner := bufio.NewScanner(stdout)
	scanner.Buffer(make([]byte, 4096), 1024*1024)

	var lastMsg map[string]interface{}
	for scanner.Scan() {
		line := strings.TrimSpace(scanner.Text())
		if line == "" {
			continue
		}
		// Skip the "To resume this session" line
		if strings.HasPrefix(line, "To resume this session") {
			continue
		}

		var msg map[string]interface{}
		if err := json.Unmarshal([]byte(line), &msg); err != nil {
			continue // skip non-JSON lines
		}
		lastMsg = msg
	}

	if err := scanner.Err(); err != nil {
		return nil, fmt.Errorf("scan cli output: %w", err)
	}

	if lastMsg == nil {
		return nil, errors.New("no valid JSON output from kimi-cli")
	}
	return lastMsg, nil
}

// extractKimiTextContent extracts text from a kimi-cli message.
func extractKimiTextContent(msg map[string]interface{}) string {
	content, ok := msg["content"]
	if !ok {
		return ""
	}

	// String content
	if s, ok := content.(string); ok {
		return s
	}

	// Array content
	arr, ok := content.([]interface{})
	if !ok {
		return ""
	}

	var texts []string
	for _, item := range arr {
		block, ok := item.(map[string]interface{})
		if !ok {
			continue
		}
		if block["type"] == "text" {
			if t, ok := block["text"].(string); ok {
				texts = append(texts, t)
			}
		}
	}
	return strings.Join(texts, "\n")
}

// extractKimiThinkingContent extracts thinking content from a kimi-cli message.
func extractKimiThinkingContent(msg map[string]interface{}) string {
	content, ok := msg["content"]
	if !ok {
		return ""
	}

	arr, ok := content.([]interface{})
	if !ok {
		return ""
	}

	var thinks []string
	for _, item := range arr {
		block, ok := item.(map[string]interface{})
		if !ok {
			continue
		}
		if block["type"] == "think" {
			if t, ok := block["think"].(string); ok {
				thinks = append(thinks, t)
			}
		}
	}
	return strings.Join(thinks, "\n")
}

// --- OpenAI-compatible response structs ---

type kimiDelta struct {
	Role    string `json:"role,omitempty"`
	Content string `json:"content,omitempty"`
}

type kimiChoice struct {
	Index        int         `json:"index"`
	Message      kimiMessage `json:"message,omitempty"`
	Delta        *kimiDelta  `json:"delta,omitempty"`
	FinishReason *string     `json:"finish_reason"`
}

type kimiMessage struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type kimiChatCompletionResponse struct {
	ID      string       `json:"id"`
	Object  string       `json:"object"`
	Created int64        `json:"created"`
	Model   string       `json:"model"`
	Choices []kimiChoice `json:"choices"`
}

func buildSSEChunk(id string, created int64, model string, index int, delta *kimiDelta, finishReason *string) string {
	chunk := kimiChatCompletionResponse{
		ID:      id,
		Object:  "chat.completion.chunk",
		Created: created,
		Model:   model,
		Choices: []kimiChoice{
			{
				Index:        index,
				Delta:        delta,
				FinishReason: finishReason,
			},
		},
	}
	b, _ := json.Marshal(chunk)
	return string(b)
}

func generateChatCompletionID() string {
	return fmt.Sprintf("chatcmpl-%d", time.Now().UnixNano())
}

func stringPtr(s string) *string {
	return &s
}