feat: optimize ping interval to 30s and SSH sync to 10min with timeout protection

- Change ping interval from 60s to 30s (configurable via PING_INTERVAL)
- Change SSH info sync from every ping to every 10 minutes (via ssh_synced_at)
- Add SSH command timeout (8s) to prevent hanging on unresponsive hosts
- Add concurrency limit (5) for SSH sync operations
- Change frontend UI refresh interval from 10s to 30s
- Fix: remove hardcoded 30s step, use configured interval directly
- Fix: ensure ping loop continues even if individual machine fails
This commit is contained in:
shirainbown
2026-06-19 18:08:06 +08:00
parent 21a2d2dff3
commit 74bab47a5b
4 changed files with 60 additions and 12 deletions

View File

@@ -29,10 +29,10 @@ services:
- ENCRYPT_KEY=lan-manager-default-key-change-in-production
# 功能配置
- PING_INTERVAL=60
- PING_INTERVAL=30
- SSH_TIMEOUT=10
- LOG_RETENTION_DAYS=0
- UI_REFRESH_INTERVAL=10000
- UI_REFRESH_INTERVAL=30000
# 日志级别
- LOG_LEVEL=info

View File

@@ -38,7 +38,7 @@ func Load() *Config {
AdminPass: getEnv("ADMIN_PASS", "admin"),
SessionSecret: getEnv("SESSION_SECRET", "lan-manager-secret-change-in-production"),
LogRetentionDays: getEnvInt("LOG_RETENTION_DAYS", 0),
UIRefreshInterval: getEnvInt("UI_REFRESH_INTERVAL", 10000),
UIRefreshInterval: getEnvInt("UI_REFRESH_INTERVAL", 30000),
EncryptKey: getEnv("ENCRYPT_KEY", ""),
}

View File

@@ -2,6 +2,7 @@ package services
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"lan-manager/server/db"
@@ -165,12 +166,21 @@ func saveSSHResult(mid int64, mip string, mport int, result *models.SSHInfoResul
}
}
func handlePingResult(m *machinePing, res PingResult) {
// shouldSyncSSH checks if SSH sync is needed based on last sync time.
// Returns true if never synced or last sync was more than 10 minutes ago.
func shouldSyncSSH(lastSync *time.Time) bool {
if lastSync == nil {
return true
}
return time.Since(*lastSync) > 10*time.Minute
}
func handlePingResult(m *machinePing, res PingResult, lastSSHSync *time.Time) {
online := res.Online
reason := res.Reason
var sshResult *models.SSHInfoResult
// 如果网络层检测失败,尝试通过 SSH 获取系统信息来判定在线
// If network ping fails, try SSH as a fallback to determine online status
if !online && m.sshUsername != "" && m.sshPassword != "" {
plainPass, decryptErr := utils.Decrypt(m.sshPassword)
if decryptErr == nil {
@@ -234,11 +244,15 @@ func handlePingResult(m *machinePing, res PingResult) {
fmt.Printf("[Ping] %s -> online=%v\n", m.ip, online)
}
if online && m.sshUsername != "" && m.sshPassword != "" {
// SSH sync: only if machine is online, has SSH credentials, and hasn't been synced in the last 10 minutes
if online && m.sshUsername != "" && m.sshPassword != "" && shouldSyncSSH(lastSSHSync) {
if sshResult != nil {
saveSSHResult(m.id, m.ip, m.sshPort, sshResult)
} else {
// Use semaphore to limit concurrent SSH sync operations
sshSyncSem <- struct{}{}
go func(mid int64, mip string, mport int, muser, mpass string) {
defer func() { <-sshSyncSem }()
plainPass, decryptErr := utils.Decrypt(mpass)
if decryptErr != nil {
fmt.Printf("[SSH] decrypt failed for %s:%d: %v\n", mip, mport, decryptErr)
@@ -255,14 +269,20 @@ func handlePingResult(m *machinePing, res PingResult) {
}
}
// sshSyncSem limits concurrent SSH sync operations to prevent resource exhaustion
var sshSyncSem = make(chan struct{}, 5)
func StartPingService(interval int) {
const step = 30 * time.Second
if interval <= 0 {
interval = 30
}
step := time.Duration(interval) * time.Second
go func() {
time.Sleep(2 * time.Second)
for {
rows, err := db.DB.Query(`SELECT id, ip, ssh_port, ssh_username, ssh_password, is_online FROM machines ORDER BY id ASC`)
rows, err := db.DB.Query(`SELECT id, ip, ssh_port, ssh_username, ssh_password, is_online, ssh_synced_at FROM machines ORDER BY id ASC`)
if err != nil {
fmt.Printf("[Ping] query error: %v\n", err)
time.Sleep(step)
@@ -270,12 +290,18 @@ func StartPingService(interval int) {
}
list := []machinePing{}
lastSyncMap := make(map[int64]*time.Time)
for rows.Next() {
var m machinePing
var isOnline int
if err := rows.Scan(&m.id, &m.ip, &m.sshPort, &m.sshUsername, &m.sshPassword, &isOnline); err == nil {
var sshSyncedAt sql.NullTime
if err := rows.Scan(&m.id, &m.ip, &m.sshPort, &m.sshUsername, &m.sshPassword, &isOnline, &sshSyncedAt); err == nil {
m.wasOnline = isOnline == 1
list = append(list, m)
if sshSyncedAt.Valid {
t := sshSyncedAt.Time
lastSyncMap[m.id] = &t
}
}
}
rows.Close()
@@ -289,7 +315,7 @@ func StartPingService(interval int) {
for i := range list {
res := PingHostRepeated(list[i].ip, list[i].sshPort, 3)
handlePingResult(&list[i], res)
handlePingResult(&list[i], res, lastSyncMap[list[i].id])
time.Sleep(step)
}
}

View File

@@ -3,6 +3,7 @@ package services
import (
"bufio"
"bytes"
"context"
"fmt"
"lan-manager/server/config"
"lan-manager/server/models"
@@ -13,6 +14,9 @@ import (
"golang.org/x/crypto/ssh"
)
// SSH command timeout for each command execution
const sshCommandTimeout = 8 * time.Second
func GetSSHInfo(ip string, port int, user, pass string) (*models.SSHInfoResult, error) {
if port <= 0 {
port = 22
@@ -85,10 +89,28 @@ func runSSHCommand(client *ssh.Client, cmd string) (string, error) {
return "", err
}
defer session.Close()
var b bytes.Buffer
session.Stdout = &b
err = session.Run(cmd)
// Use context to enforce command timeout and prevent hanging
ctx, cancel := context.WithTimeout(context.Background(), sshCommandTimeout)
defer cancel()
done := make(chan error, 1)
go func() {
done <- session.Run(cmd)
}()
select {
case err := <-done:
return b.String(), err
case <-ctx.Done():
// Force close session to unblock Run()
session.Signal(ssh.SIGKILL)
session.Close()
return "", fmt.Errorf("ssh command timed out after %v: %s", sshCommandTimeout, cmd)
}
}
func getCPUInfo(client *ssh.Client) (usage string, cores string, err error) {