feat: independent SSH sync service with concurrency limit

- Split SSH sync from ping loop into independent service
- Ping service: serial polling, 30s interval, only updates online status
- SSH sync service: runs every 10 minutes for all online machines
- Global semaphore limits concurrent SSH to 2 (prevents resource exhaustion)
- SSH command timeout 8s prevents hanging on unresponsive hosts
- Offline machines are skipped for SSH sync
- Ping fallback: if ping fails but SSH succeeds, mark as online (SSH sync handles info)
This commit is contained in:
shirainbown
2026-06-19 18:21:36 +08:00
parent 74bab47a5b
commit 1c0fce6a17
2 changed files with 83 additions and 51 deletions

View File

@@ -166,6 +166,9 @@ func main() {
// Start ping service
services.StartPingService(cfg.PingInterval)
// Start SSH sync service (independent 10-minute timer per machine)
services.StartSSHSyncService()
// Start log cleanup
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

View File

@@ -2,7 +2,6 @@ package services
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"lan-manager/server/db"
@@ -13,6 +12,7 @@ import (
"os/exec"
"runtime"
"strings"
"sync"
"time"
"golang.org/x/net/icmp"
@@ -166,30 +166,20 @@ func saveSSHResult(mid int64, mip string, mport int, result *models.SSHInfoResul
}
}
// shouldSyncSSH checks if SSH sync is needed based on last sync time.
// Returns true if never synced or last sync was more than 10 minutes ago.
func shouldSyncSSH(lastSync *time.Time) bool {
if lastSync == nil {
return true
}
return time.Since(*lastSync) > 10*time.Minute
}
func handlePingResult(m *machinePing, res PingResult, lastSSHSync *time.Time) {
func handlePingResult(m *machinePing, res PingResult) {
online := res.Online
reason := res.Reason
var sshResult *models.SSHInfoResult
// If network ping fails, try SSH as a fallback to determine online status
if !online && m.sshUsername != "" && m.sshPassword != "" {
plainPass, decryptErr := utils.Decrypt(m.sshPassword)
if decryptErr == nil {
result, err := GetSSHInfo(m.ip, m.sshPort, m.sshUsername, plainPass)
_, err := GetSSHInfo(m.ip, m.sshPort, m.sshUsername, plainPass)
if err == nil {
online = true
reason = ""
sshResult = result
fmt.Printf("[Ping] %s -> network unreachable, but SSH info fetched successfully, treating as online\n", m.ip)
// Note: SSH sync service will handle periodic info updates
}
}
}
@@ -230,9 +220,6 @@ func handlePingResult(m *machinePing, res PingResult, lastSSHSync *time.Time) {
_, _ = db.DB.Exec(`UPDATE machines SET total_offline_seconds=total_offline_seconds+? WHERE id=?`, duration, m.id)
}
m.wasOnline = true
if sshResult != nil {
saveSSHResult(m.id, m.ip, m.sshPort, sshResult)
}
return
}
@@ -243,35 +230,12 @@ func handlePingResult(m *machinePing, res PingResult, lastSSHSync *time.Time) {
} else {
fmt.Printf("[Ping] %s -> online=%v\n", m.ip, online)
}
// SSH sync: only if machine is online, has SSH credentials, and hasn't been synced in the last 10 minutes
if online && m.sshUsername != "" && m.sshPassword != "" && shouldSyncSSH(lastSSHSync) {
if sshResult != nil {
saveSSHResult(m.id, m.ip, m.sshPort, sshResult)
} else {
// Use semaphore to limit concurrent SSH sync operations
sshSyncSem <- struct{}{}
go func(mid int64, mip string, mport int, muser, mpass string) {
defer func() { <-sshSyncSem }()
plainPass, decryptErr := utils.Decrypt(mpass)
if decryptErr != nil {
fmt.Printf("[SSH] decrypt failed for %s:%d: %v\n", mip, mport, decryptErr)
return
}
result, err := GetSSHInfo(mip, mport, muser, plainPass)
if err != nil {
fmt.Printf("[SSH] auto-fetch failed for %s:%d: %v\n", mip, mport, err)
return
}
saveSSHResult(mid, mip, mport, result)
}(m.id, m.ip, m.sshPort, m.sshUsername, m.sshPassword)
}
}
}
// sshSyncSem limits concurrent SSH sync operations to prevent resource exhaustion
var sshSyncSem = make(chan struct{}, 5)
var sshSyncSem = make(chan struct{}, 2)
// StartPingService starts the ping loop with configurable interval
func StartPingService(interval int) {
if interval <= 0 {
interval = 30
@@ -282,7 +246,7 @@ func StartPingService(interval int) {
time.Sleep(2 * time.Second)
for {
rows, err := db.DB.Query(`SELECT id, ip, ssh_port, ssh_username, ssh_password, is_online, ssh_synced_at FROM machines ORDER BY id ASC`)
rows, err := db.DB.Query(`SELECT id, ip, ssh_port, ssh_username, ssh_password, is_online FROM machines ORDER BY id ASC`)
if err != nil {
fmt.Printf("[Ping] query error: %v\n", err)
time.Sleep(step)
@@ -290,18 +254,12 @@ func StartPingService(interval int) {
}
list := []machinePing{}
lastSyncMap := make(map[int64]*time.Time)
for rows.Next() {
var m machinePing
var isOnline int
var sshSyncedAt sql.NullTime
if err := rows.Scan(&m.id, &m.ip, &m.sshPort, &m.sshUsername, &m.sshPassword, &isOnline, &sshSyncedAt); err == nil {
if err := rows.Scan(&m.id, &m.ip, &m.sshPort, &m.sshUsername, &m.sshPassword, &isOnline); err == nil {
m.wasOnline = isOnline == 1
list = append(list, m)
if sshSyncedAt.Valid {
t := sshSyncedAt.Time
lastSyncMap[m.id] = &t
}
}
}
rows.Close()
@@ -315,13 +273,84 @@ func StartPingService(interval int) {
for i := range list {
res := PingHostRepeated(list[i].ip, list[i].sshPort, 3)
handlePingResult(&list[i], res, lastSyncMap[list[i].id])
handlePingResult(&list[i], res)
time.Sleep(step)
}
}
}()
}
// sshSyncMachine holds SSH sync state for a single machine
type sshSyncMachine struct {
id int64
ip string
sshPort int
username string
password string
}
// StartSSHSyncService starts an independent SSH sync service.
// Each machine has its own 10-minute timer, but global concurrency is limited.
func StartSSHSyncService() {
go func() {
time.Sleep(5 * time.Second)
for {
rows, err := db.DB.Query(`SELECT id, ip, ssh_port, ssh_username, ssh_password FROM machines WHERE is_online=1 AND ssh_username != '' AND ssh_password != '' ORDER BY id ASC`)
if err != nil {
fmt.Printf("[SSH] query error: %v\n", err)
time.Sleep(30 * time.Second)
continue
}
list := []sshSyncMachine{}
for rows.Next() {
var m sshSyncMachine
if err := rows.Scan(&m.id, &m.ip, &m.sshPort, &m.username, &m.password); err == nil {
list = append(list, m)
}
}
rows.Close()
if len(list) == 0 {
time.Sleep(30 * time.Second)
continue
}
fmt.Printf("[SSH] start sync round, %d online machines\n", len(list))
var wg sync.WaitGroup
for _, m := range list {
wg.Add(1)
go func(machine sshSyncMachine) {
defer wg.Done()
// Acquire semaphore to limit concurrent SSH connections
sshSyncSem <- struct{}{}
defer func() { <-sshSyncSem }()
plainPass, decryptErr := utils.Decrypt(machine.password)
if decryptErr != nil {
fmt.Printf("[SSH] decrypt failed for %s:%d: %v\n", machine.ip, machine.sshPort, decryptErr)
return
}
result, err := GetSSHInfo(machine.ip, machine.sshPort, machine.username, plainPass)
if err != nil {
fmt.Printf("[SSH] sync failed for %s:%d: %v\n", machine.ip, machine.sshPort, err)
return
}
saveSSHResult(machine.id, machine.ip, machine.sshPort, result)
}(m)
}
wg.Wait()
fmt.Printf("[SSH] sync round completed, next in 10 minutes\n")
time.Sleep(10 * time.Minute)
}
}()
}
func CleanupLogs(ctx context.Context, retentionDays int) {
if retentionDays <= 0 {
return