Files
sub2api/backend/internal/service/kimi_cli_gateway.go
openclaw e746e82c39
Some checks failed
CI / test (push) Has been cancelled
CI / frontend (push) Has been cancelled
CI / golangci-lint (push) Has been cancelled
Security Scan / backend-security (push) Has been cancelled
Security Scan / frontend-security (push) Has been cancelled
feat(kimi): add Kimi CLI forward mode support
- Add AccountTypeCLI domain constant
- Add KimiCLIGateway to forward requests through local kimi-cli binary
- Route CLI accounts in ForwardKimiChatCompletions to cli gateway
- Handle CLI type in GetAccessToken (no token needed)
- Fix Gin oneof binding to accept 'cli' type (Create/Update Account)
- Fix validateDataAccount to accept bedrock and cli types
- Remove unsupported --model arg from kimi-cli invocation
- Frontend: CLI account creation UI with model mapping, pool mode
- Frontend: CLI edit modal support
- Frontend: UseKeyModal shows OpenAI examples for kimi platform
- Add i18n strings for CLI account type

[缅因猫/Codex🐾]
2026-04-24 01:54:59 +08:00

443 lines
11 KiB
Go

package service
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os/exec"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/gin-gonic/gin"
"github.com/tidwall/gjson"
)
const (
kimiCLICommand = "kimi"
kimiCLITimeout = 10 * time.Minute
kimiCLIStreamJSONFlag = "stream-json"
)
// KimiCLIGateway handles forwarding chat completions requests through the local kimi-cli.
type KimiCLIGateway struct {
// cliPath is the resolved path to the kimi binary. If empty, "kimi" is used.
cliPath string
}
// NewKimiCLIGateway creates a new CLI gateway, attempting to resolve the kimi binary.
func NewKimiCLIGateway() *KimiCLIGateway {
path := resolveKimiCLI()
return &KimiCLIGateway{cliPath: path}
}
// IsAvailable returns true if the kimi CLI is found on the system.
func (g *KimiCLIGateway) IsAvailable() bool {
return g.cliPath != ""
}
// ForwardChatCompletions forwards an OpenAI Chat Completions request through kimi-cli.
func (g *KimiCLIGateway) ForwardChatCompletions(
ctx context.Context,
c *gin.Context,
account *Account,
body []byte,
) (*ForwardResult, error) {
startTime := time.Now()
if !g.IsAvailable() {
return nil, errors.New("kimi CLI not found. Please install it: uv tool install --python 3.13 kimi-cli")
}
// 1. Parse request
reqStream := gjson.GetBytes(body, "stream").Bool()
originalModel := gjson.GetBytes(body, "model").String()
mappedModel := account.GetMappedModel(originalModel)
if mappedModel == "" {
mappedModel = originalModel
}
// 2. Build NDJSON messages for stdin
ndjsonInput, err := buildKimiNDJSONMessages(body)
if err != nil {
return nil, fmt.Errorf("build messages: %w", err)
}
// 3. Build CLI args
// Note: kimi-cli does not support --model; it uses the default model.
// The API only exposes K2.6, but the actual model is determined by the CLI.
args := []string{
"--print",
"--output-format", kimiCLIStreamJSONFlag,
"--input-format", "stream-json",
}
// 4. Run CLI (CLI manages its own OAuth auth via `kimi login`)
cmd := exec.CommandContext(ctx, g.cliPath, args...)
cmd.Stdin = strings.NewReader(ndjsonInput)
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("stdout pipe: %w", err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, fmt.Errorf("stderr pipe: %w", err)
}
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("start cli: %w", err)
}
// Drain stderr in background for debugging
go func() {
slurp, _ := io.ReadAll(stderr)
if len(slurp) > 0 {
logger.LegacyPrintf("service.kimi_cli", "stderr: %s", string(slurp))
}
}()
// 5. Parse output and forward
var result *ForwardResult
if reqStream {
result, err = g.handleStreamingResponse(stdout, c, originalModel, mappedModel, startTime)
} else {
result, err = g.handleNonStreamingResponse(stdout, c, originalModel, mappedModel, startTime)
}
// Wait for process to finish
if waitErr := cmd.Wait(); waitErr != nil && result == nil {
return nil, fmt.Errorf("cli exited: %w", waitErr)
}
return result, err
}
// handleStreamingResponse reads the single NDJSON line from kimi-cli and sends it as SSE.
func (g *KimiCLIGateway) handleStreamingResponse(
stdout io.Reader,
c *gin.Context,
originalModel, mappedModel string,
startTime time.Time,
) (*ForwardResult, error) {
if c == nil || c.Writer == nil {
return nil, errors.New("gin context or writer is nil")
}
c.Writer.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
c.Writer.Header().Set("Cache-Control", "no-cache")
c.Writer.Header().Set("Connection", "keep-alive")
c.Writer.WriteHeader(http.StatusOK)
flusher, ok := c.Writer.(http.Flusher)
if !ok {
return nil, errors.New("streaming not supported")
}
msg, err := parseKimiCLIMessage(stdout)
if err != nil {
return nil, fmt.Errorf("parse cli output: %w", err)
}
var firstTokenTime *time.Duration
chatID := generateChatCompletionID()
created := time.Now().Unix()
// Extract text content
text := extractKimiTextContent(msg)
thinking := extractKimiThinkingContent(msg)
// Build full content (prepend thinking if present)
fullContent := text
if thinking != "" {
fullContent = fmt.Sprintf("<think>\n%s\n</think>\n\n%s", thinking, text)
}
// Send role delta
if firstTokenTime == nil {
elapsed := time.Since(startTime)
firstTokenTime = &elapsed
}
chunk := buildSSEChunk(chatID, created, mappedModel, 0, &kimiDelta{Role: "assistant"}, nil)
fmt.Fprintf(c.Writer, "data: %s\n\n", chunk)
flusher.Flush()
// Send content delta (simplified: send all at once since CLI gives full response)
chunk = buildSSEChunk(chatID, created, mappedModel, 0, &kimiDelta{Content: fullContent}, nil)
fmt.Fprintf(c.Writer, "data: %s\n\n", chunk)
flusher.Flush()
// Send finish
chunk = buildSSEChunk(chatID, created, mappedModel, 0, &kimiDelta{}, stringPtr("stop"))
fmt.Fprintf(c.Writer, "data: %s\n\n", chunk)
flusher.Flush()
// [DONE]
fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
flusher.Flush()
var firstTokenMs *int
if firstTokenTime != nil {
ms := int(firstTokenTime.Milliseconds())
firstTokenMs = &ms
}
return &ForwardResult{
UpstreamModel: mappedModel,
FirstTokenMs: firstTokenMs,
}, nil
}
// handleNonStreamingResponse reads the single NDJSON line and returns a complete OpenAI response.
func (g *KimiCLIGateway) handleNonStreamingResponse(
stdout io.Reader,
c *gin.Context,
originalModel, mappedModel string,
startTime time.Time,
) (*ForwardResult, error) {
msg, err := parseKimiCLIMessage(stdout)
if err != nil {
return nil, fmt.Errorf("parse cli output: %w", err)
}
text := extractKimiTextContent(msg)
thinking := extractKimiThinkingContent(msg)
fullContent := text
if thinking != "" {
fullContent = fmt.Sprintf("<think>\n%s\n</think>\n\n%s", thinking, text)
}
resp := kimiChatCompletionResponse{
ID: generateChatCompletionID(),
Object: "chat.completion",
Created: time.Now().Unix(),
Model: mappedModel,
Choices: []kimiChoice{
{
Index: 0,
Message: kimiMessage{
Role: "assistant",
Content: fullContent,
},
FinishReason: stringPtr("stop"),
},
},
}
respBytes, _ := json.Marshal(resp)
if c != nil && c.Writer != nil {
c.Writer.Header().Set("Content-Type", "application/json")
c.Writer.WriteHeader(http.StatusOK)
_, _ = c.Writer.Write(respBytes)
}
return &ForwardResult{
UpstreamModel: mappedModel,
}, nil
}
// --- Helpers ---
// resolveKimiCLI attempts to find the kimi binary in PATH.
func resolveKimiCLI() string {
if path, err := exec.LookPath(kimiCLICommand); err == nil && path != "" {
return path
}
return ""
}
// buildKimiNDJSONMessages converts an OpenAI Chat Completions request body into
// NDJSON lines suitable for kimi-cli --input-format stream-json.
func buildKimiNDJSONMessages(body []byte) (string, error) {
messagesResult := gjson.GetBytes(body, "messages")
if !messagesResult.Exists() || !messagesResult.IsArray() {
return "", errors.New("missing or invalid messages array")
}
var lines []string
messagesResult.ForEach(func(_, msg gjson.Result) bool {
role := msg.Get("role").String()
content := msg.Get("content").String()
// Handle array content (e.g. vision messages with multiple parts)
if content == "" && msg.Get("content").IsArray() {
var texts []string
msg.Get("content").ForEach(func(_, item gjson.Result) bool {
if item.Get("type").String() == "text" {
texts = append(texts, item.Get("text").String())
}
return true
})
content = strings.Join(texts, "\n")
}
if role == "" || content == "" {
return true // skip empty
}
line := fmt.Sprintf(`{"role":%q,"content":%q}`, role, content)
lines = append(lines, line)
return true
})
if len(lines) == 0 {
return "", errors.New("no valid messages found")
}
return strings.Join(lines, "\n") + "\n", nil
}
// parseKimiCLIMessage reads all NDJSON lines from stdout and returns the last valid one.
// When multiple messages are piped via stdin, kimi-cli outputs one response per turn;
// we only need the last one (the response to the final user message).
func parseKimiCLIMessage(stdout io.Reader) (map[string]interface{}, error) {
scanner := bufio.NewScanner(stdout)
scanner.Buffer(make([]byte, 4096), 1024*1024)
var lastMsg map[string]interface{}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
// Skip the "To resume this session" line
if strings.HasPrefix(line, "To resume this session") {
continue
}
var msg map[string]interface{}
if err := json.Unmarshal([]byte(line), &msg); err != nil {
continue // skip non-JSON lines
}
lastMsg = msg
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("scan cli output: %w", err)
}
if lastMsg == nil {
return nil, errors.New("no valid JSON output from kimi-cli")
}
return lastMsg, nil
}
// extractKimiTextContent extracts text from a kimi-cli message.
func extractKimiTextContent(msg map[string]interface{}) string {
content, ok := msg["content"]
if !ok {
return ""
}
// String content
if s, ok := content.(string); ok {
return s
}
// Array content
arr, ok := content.([]interface{})
if !ok {
return ""
}
var texts []string
for _, item := range arr {
block, ok := item.(map[string]interface{})
if !ok {
continue
}
if block["type"] == "text" {
if t, ok := block["text"].(string); ok {
texts = append(texts, t)
}
}
}
return strings.Join(texts, "\n")
}
// extractKimiThinkingContent extracts thinking content from a kimi-cli message.
func extractKimiThinkingContent(msg map[string]interface{}) string {
content, ok := msg["content"]
if !ok {
return ""
}
arr, ok := content.([]interface{})
if !ok {
return ""
}
var thinks []string
for _, item := range arr {
block, ok := item.(map[string]interface{})
if !ok {
continue
}
if block["type"] == "think" {
if t, ok := block["think"].(string); ok {
thinks = append(thinks, t)
}
}
}
return strings.Join(thinks, "\n")
}
// --- OpenAI-compatible response structs ---
type kimiDelta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
}
type kimiChoice struct {
Index int `json:"index"`
Message kimiMessage `json:"message,omitempty"`
Delta *kimiDelta `json:"delta,omitempty"`
FinishReason *string `json:"finish_reason"`
}
type kimiMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type kimiChatCompletionResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []kimiChoice `json:"choices"`
}
func buildSSEChunk(id string, created int64, model string, index int, delta *kimiDelta, finishReason *string) string {
chunk := kimiChatCompletionResponse{
ID: id,
Object: "chat.completion.chunk",
Created: created,
Model: model,
Choices: []kimiChoice{
{
Index: index,
Delta: delta,
FinishReason: finishReason,
},
},
}
b, _ := json.Marshal(chunk)
return string(b)
}
func generateChatCompletionID() string {
return fmt.Sprintf("chatcmpl-%d", time.Now().UnixNano())
}
func stringPtr(s string) *string {
return &s
}