feat: optimize ping interval to 30s and SSH sync to 10min with timeout protection
- Change ping interval from 60s to 30s (configurable via PING_INTERVAL) - Change SSH info sync from every ping to every 10 minutes (via ssh_synced_at) - Add SSH command timeout (8s) to prevent hanging on unresponsive hosts - Add concurrency limit (5) for SSH sync operations - Change frontend UI refresh interval from 10s to 30s - Fix: remove hardcoded 30s step, use configured interval directly - Fix: ensure ping loop continues even if individual machine fails
This commit is contained in:
@@ -29,10 +29,10 @@ services:
|
||||
- ENCRYPT_KEY=lan-manager-default-key-change-in-production
|
||||
|
||||
# 功能配置
|
||||
- PING_INTERVAL=60
|
||||
- PING_INTERVAL=30
|
||||
- SSH_TIMEOUT=10
|
||||
- LOG_RETENTION_DAYS=0
|
||||
- UI_REFRESH_INTERVAL=10000
|
||||
- UI_REFRESH_INTERVAL=30000
|
||||
|
||||
# 日志级别
|
||||
- LOG_LEVEL=info
|
||||
|
||||
@@ -38,7 +38,7 @@ func Load() *Config {
|
||||
AdminPass: getEnv("ADMIN_PASS", "admin"),
|
||||
SessionSecret: getEnv("SESSION_SECRET", "lan-manager-secret-change-in-production"),
|
||||
LogRetentionDays: getEnvInt("LOG_RETENTION_DAYS", 0),
|
||||
UIRefreshInterval: getEnvInt("UI_REFRESH_INTERVAL", 10000),
|
||||
UIRefreshInterval: getEnvInt("UI_REFRESH_INTERVAL", 30000),
|
||||
EncryptKey: getEnv("ENCRYPT_KEY", ""),
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ package services
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"lan-manager/server/db"
|
||||
@@ -165,12 +166,21 @@ func saveSSHResult(mid int64, mip string, mport int, result *models.SSHInfoResul
|
||||
}
|
||||
}
|
||||
|
||||
func handlePingResult(m *machinePing, res PingResult) {
|
||||
// shouldSyncSSH checks if SSH sync is needed based on last sync time.
|
||||
// Returns true if never synced or last sync was more than 10 minutes ago.
|
||||
func shouldSyncSSH(lastSync *time.Time) bool {
|
||||
if lastSync == nil {
|
||||
return true
|
||||
}
|
||||
return time.Since(*lastSync) > 10*time.Minute
|
||||
}
|
||||
|
||||
func handlePingResult(m *machinePing, res PingResult, lastSSHSync *time.Time) {
|
||||
online := res.Online
|
||||
reason := res.Reason
|
||||
var sshResult *models.SSHInfoResult
|
||||
|
||||
// 如果网络层检测失败,尝试通过 SSH 获取系统信息来判定在线
|
||||
// If network ping fails, try SSH as a fallback to determine online status
|
||||
if !online && m.sshUsername != "" && m.sshPassword != "" {
|
||||
plainPass, decryptErr := utils.Decrypt(m.sshPassword)
|
||||
if decryptErr == nil {
|
||||
@@ -234,11 +244,15 @@ func handlePingResult(m *machinePing, res PingResult) {
|
||||
fmt.Printf("[Ping] %s -> online=%v\n", m.ip, online)
|
||||
}
|
||||
|
||||
if online && m.sshUsername != "" && m.sshPassword != "" {
|
||||
// SSH sync: only if machine is online, has SSH credentials, and hasn't been synced in the last 10 minutes
|
||||
if online && m.sshUsername != "" && m.sshPassword != "" && shouldSyncSSH(lastSSHSync) {
|
||||
if sshResult != nil {
|
||||
saveSSHResult(m.id, m.ip, m.sshPort, sshResult)
|
||||
} else {
|
||||
// Use semaphore to limit concurrent SSH sync operations
|
||||
sshSyncSem <- struct{}{}
|
||||
go func(mid int64, mip string, mport int, muser, mpass string) {
|
||||
defer func() { <-sshSyncSem }()
|
||||
plainPass, decryptErr := utils.Decrypt(mpass)
|
||||
if decryptErr != nil {
|
||||
fmt.Printf("[SSH] decrypt failed for %s:%d: %v\n", mip, mport, decryptErr)
|
||||
@@ -255,14 +269,20 @@ func handlePingResult(m *machinePing, res PingResult) {
|
||||
}
|
||||
}
|
||||
|
||||
// sshSyncSem limits concurrent SSH sync operations to prevent resource exhaustion
|
||||
var sshSyncSem = make(chan struct{}, 5)
|
||||
|
||||
func StartPingService(interval int) {
|
||||
const step = 30 * time.Second
|
||||
if interval <= 0 {
|
||||
interval = 30
|
||||
}
|
||||
step := time.Duration(interval) * time.Second
|
||||
|
||||
go func() {
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
for {
|
||||
rows, err := db.DB.Query(`SELECT id, ip, ssh_port, ssh_username, ssh_password, is_online FROM machines ORDER BY id ASC`)
|
||||
rows, err := db.DB.Query(`SELECT id, ip, ssh_port, ssh_username, ssh_password, is_online, ssh_synced_at FROM machines ORDER BY id ASC`)
|
||||
if err != nil {
|
||||
fmt.Printf("[Ping] query error: %v\n", err)
|
||||
time.Sleep(step)
|
||||
@@ -270,12 +290,18 @@ func StartPingService(interval int) {
|
||||
}
|
||||
|
||||
list := []machinePing{}
|
||||
lastSyncMap := make(map[int64]*time.Time)
|
||||
for rows.Next() {
|
||||
var m machinePing
|
||||
var isOnline int
|
||||
if err := rows.Scan(&m.id, &m.ip, &m.sshPort, &m.sshUsername, &m.sshPassword, &isOnline); err == nil {
|
||||
var sshSyncedAt sql.NullTime
|
||||
if err := rows.Scan(&m.id, &m.ip, &m.sshPort, &m.sshUsername, &m.sshPassword, &isOnline, &sshSyncedAt); err == nil {
|
||||
m.wasOnline = isOnline == 1
|
||||
list = append(list, m)
|
||||
if sshSyncedAt.Valid {
|
||||
t := sshSyncedAt.Time
|
||||
lastSyncMap[m.id] = &t
|
||||
}
|
||||
}
|
||||
}
|
||||
rows.Close()
|
||||
@@ -289,7 +315,7 @@ func StartPingService(interval int) {
|
||||
|
||||
for i := range list {
|
||||
res := PingHostRepeated(list[i].ip, list[i].sshPort, 3)
|
||||
handlePingResult(&list[i], res)
|
||||
handlePingResult(&list[i], res, lastSyncMap[list[i].id])
|
||||
time.Sleep(step)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package services
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"lan-manager/server/config"
|
||||
"lan-manager/server/models"
|
||||
@@ -13,6 +14,9 @@ import (
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
// SSH command timeout for each command execution
|
||||
const sshCommandTimeout = 8 * time.Second
|
||||
|
||||
func GetSSHInfo(ip string, port int, user, pass string) (*models.SSHInfoResult, error) {
|
||||
if port <= 0 {
|
||||
port = 22
|
||||
@@ -85,10 +89,28 @@ func runSSHCommand(client *ssh.Client, cmd string) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
defer session.Close()
|
||||
|
||||
var b bytes.Buffer
|
||||
session.Stdout = &b
|
||||
err = session.Run(cmd)
|
||||
|
||||
// Use context to enforce command timeout and prevent hanging
|
||||
ctx, cancel := context.WithTimeout(context.Background(), sshCommandTimeout)
|
||||
defer cancel()
|
||||
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- session.Run(cmd)
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
return b.String(), err
|
||||
case <-ctx.Done():
|
||||
// Force close session to unblock Run()
|
||||
session.Signal(ssh.SIGKILL)
|
||||
session.Close()
|
||||
return "", fmt.Errorf("ssh command timed out after %v: %s", sshCommandTimeout, cmd)
|
||||
}
|
||||
}
|
||||
|
||||
func getCPUInfo(client *ssh.Client) (usage string, cores string, err error) {
|
||||
|
||||
Reference in New Issue
Block a user