package main
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"syscall"
"time"
)
type Config struct {
CPUThreshold float64
MemThreshold float64
DiskThreshold float64
Interval int
AlertCooldown int
AIBaseURL string
AIAPIKey string
AIModel string
}
func loadConfig() *Config {
return &Config{
CPUThreshold: 5.0,
MemThreshold: 25.0,
DiskThreshold: 90.0,
Interval: 30,
AlertCooldown: 300,
AIBaseURL: "https://api.example.com/v1/chat/completions",
AIAPIKey: "xxxxxxxxxxx",
AIModel: "/maas/deepseek-ai/DeepSeek-V3.2",
}
}
type Metrics struct {
Timestamp time.Time
CPUPercent float64
MemoryPercent float64
MemoryUsedGB float64
MemoryTotalGB float64
DiskPercent float64
DiskUsedGB float64
DiskTotalGB float64
NetBytesSent uint64
NetBytesRecv uint64
}
func (m *Metrics) String() string {
return fmt.Sprintf("CPU: %.1f%% | Memory: %.1f%% (%.1fGB/%.1fGB) | Disk: %.1f%% (%.1fGB/%.1fGB) | Net: sent=%dMB recv=%dMB", m.CPUPercent, m.MemoryPercent, m.MemoryUsedGB, m.MemoryTotalGB, m.DiskPercent, m.DiskUsedGB, m.DiskTotalGB, m.NetBytesSent/1024/1024, m.NetBytesRecv/1024/1024)
}
func collectMetrics() (*Metrics, error) {
m := &Metrics{Timestamp: time.Now()}
if err := collectCPU(m); err != nil {
return nil, fmt.Errorf("cpu: %w", err)
}
if err := collectMemory(m); err != nil {
return nil, fmt.Errorf("memory: %w", err)
}
if err := collectDisk(m); err != nil {
return nil, fmt.Errorf("disk: %w", err)
}
if err := collectNetwork(m); err != nil {
return nil, fmt.Errorf("network: %w", err)
}
return m, nil
}
type cpuStat struct {
user, nice, system, idle, iowait, irq, softirq uint64
}
func readCPUStat() (*cpuStat, error) {
f, err := os.Open("/proc/stat")
if err != nil {
return nil, err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "cpu ") {
continue
}
fields := strings.Fields(line)
if len(fields) < 8 {
return nil, fmt.Errorf("unexpected /proc/stat format")
}
parse := func(i int) uint64 {
v, _ := strconv.ParseUint(fields[i], 10, 64)
return v
}
return &cpuStat{
user: parse(1),
nice: parse(2),
system: parse(3),
idle: parse(4),
iowait: parse(5),
irq: parse(6),
softirq: parse(7),
}, nil
}
return nil, fmt.Errorf("cpu line not found in /proc/stat")
}
func collectCPU(m *Metrics) error {
s1, err := readCPUStat()
if err != nil {
return err
}
time.Sleep(500 * time.Millisecond)
s2, err := readCPUStat()
if err != nil {
return err
}
idle1 := s1.idle + s1.iowait
idle2 := s2.idle + s2.iowait
total1 := s1.user + s1.nice + s1.system + s1.idle + s1.iowait + s1.irq + s1.softirq
total2 := s2.user + s2.nice + s2.system + s2.idle + s2.iowait + s2.irq + s2.softirq
totalDiff := float64(total2 - total1)
idleDiff := float64(idle2 - idle1)
if totalDiff == 0 {
m.CPUPercent = 0
} else {
m.CPUPercent = (1.0 - idleDiff/totalDiff)*100.0
}
return nil
}
func collectMemory(m *Metrics) error {
f, err := os.Open("/proc/meminfo")
if err != nil {
return err
}
defer f.Close()
vals := make(map[string]uint64)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
fields := strings.Fields(scanner.Text())
if len(fields) >= 2 {
key := strings.TrimSuffix(fields[0], ":")
v, _ := strconv.ParseUint(fields[1], 10, 64)
vals[key] = v
}
}
total := vals["MemTotal"]
available := vals["MemAvailable"]
if total == 0 {
return fmt.Errorf("MemTotal not found")
}
used := total - available
m.MemoryTotalGB = float64(total) / 1024 / 1024
m.MemoryUsedGB = float64(used) / 1024 / 1024
m.MemoryPercent = float64(used) / float64(total) * 100.0
return nil
}
func collectDisk(m *Metrics) error {
var stat syscall.Statfs_t
if err := syscall.Statfs("/", &stat); err != nil {
return err
}
total := stat.Blocks * uint64(stat.Bsize)
free := stat.Bfree * uint64(stat.Bsize)
used := total - free
m.DiskTotalGB = float64(total) / 1024 / 1024 / 1024
m.DiskUsedGB = float64(used) / 1024 / 1024 / 1024
if total > 0 {
m.DiskPercent = float64(used) / float64(total) * 100.0
}
return nil
}
func collectNetwork(m *Metrics) error {
f, err := os.Open("/proc/net/dev")
if err != nil {
return err
}
defer f.Close()
var totalSent, totalRecv uint64
scanner := bufio.NewScanner(f)
scanner.Scan()
scanner.Scan()
for scanner.Scan() {
line := scanner.Text()
colonIdx := strings.Index(line, ":")
if colonIdx < 0 {
continue
}
iface := strings.TrimSpace(line[:colonIdx])
if iface == "lo" {
continue
}
fields := strings.Fields(line[colonIdx+1:])
if len(fields) < 9 {
continue
}
recv, _ := strconv.ParseUint(fields[0], 10, 64)
sent, _ := strconv.ParseUint(fields[8], 10, 64)
totalRecv += recv
totalSent += sent
}
m.NetBytesSent = totalSent
m.NetBytesRecv = totalRecv
return nil
}
type chatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type chatRequest struct {
Model string `json:"model"`
Messages []chatMessage `json:"messages"`
}
type chatChoice struct {
Message chatMessage `json:"message"`
}
type chatResponse struct {
Choices []chatChoice `json:"choices"`
}
func analyzeWithAI(cfg *Config, m *Metrics, anomalies []string) (string, error) {
anomalyList := ""
for _, a := range anomalies {
anomalyList += "- " + a + "\n"
}
prompt := fmt.Sprintf(`Server metrics at %s:
- CPU Usage: %.1f%%
- Memory Usage: %.1f%% (%.1f GB / %.1f GB)
- Disk Usage: %.1f%% (%.1f GB / %.1f GB)
- Network: Sent %d MB, Received %d MB
Detected anomalies: %s
Please analyze these anomalies and provide recommendations.`, m.Timestamp.Format("2006-01-02 15:04:05"), m.CPUPercent, m.MemoryPercent, m.MemoryUsedGB, m.MemoryTotalGB, m.DiskPercent, m.DiskUsedGB, m.DiskTotalGB, m.NetBytesSent/1024/1024, m.NetBytesRecv/1024/1024, anomalyList)
reqBody := chatRequest{
Model: cfg.AIModel,
Messages: []chatMessage{
{Role: "system", Content: "You are a server monitoring expert. Analyze the provided metrics and anomalies, then give a brief summary and 2-3 actionable recommendations."},
{Role: "user", Content: prompt},
},
}
data, err := json.Marshal(reqBody)
if err != nil {
return "", err
}
client := &http.Client{Timeout: 30 * time.Second}
req, err := http.NewRequest("POST", cfg.AIBaseURL, bytes.NewReader(data))
if err != nil {
return "", err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+cfg.AIAPIKey)
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("API request failed: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("API error %d: %s", resp.StatusCode, string(body))
}
var chatResp chatResponse
if err := json.Unmarshal(body, &chatResp); err != nil {
return "", fmt.Errorf("parse response: %w", err)
}
if len(chatResp.Choices) == 0 {
return "", fmt.Errorf("empty response from AI")
}
return chatResp.Choices[0].Message.Content, nil
}
type Alerter struct {
cfg *Config
lastAlert map[string]time.Time
}
func newAlerter(cfg *Config) *Alerter {
return &Alerter{
cfg: cfg,
lastAlert: make(map[string]time.Time),
}
}
func (a *Alerter) check(m *Metrics) {
var anomalies []string
if m.CPUPercent > a.cfg.CPUThreshold {
anomalies = append(anomalies, fmt.Sprintf("CPU %.1f%% > threshold %.1f%%", m.CPUPercent, a.cfg.CPUThreshold))
}
if m.MemoryPercent > a.cfg.MemThreshold {
anomalies = append(anomalies, fmt.Sprintf("Memory %.1f%% > threshold %.1f%%", m.MemoryPercent, a.cfg.MemThreshold))
}
if m.DiskPercent > a.cfg.DiskThreshold {
anomalies = append(anomalies, fmt.Sprintf("Disk %.1f%% > threshold %.1f%%", m.DiskPercent, a.cfg.DiskThreshold))
}
if len(anomalies) == 0 {
return
}
key := strings.Join(anomalies, "|")
if len(key) > 40 {
key = key[:40]
}
if last, ok := a.lastAlert[key]; ok {
if time.Since(last) < time.Duration(a.cfg.AlertCooldown)*time.Second {
return
}
}
a.lastAlert[key] = time.Now()
fmt.Println("\n==================================================")
fmt.Printf("[ALERT] %s\n", time.Now().Format("2006-01-02 15:04:05"))
fmt.Println("Anomalies detected:")
for _, anomaly := range anomalies {
fmt.Printf(" ! %s\n", anomaly)
}
fmt.Println("\nCalling AI for analysis...")
analysis, err := analyzeWithAI(a.cfg, m, anomalies)
if err != nil {
fmt.Printf("AI analysis failed: %v\n", err)
} else {
fmt.Println("\n--- AI Analysis ---")
fmt.Println(analysis)
}
fmt.Println("==================================================")
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func runMonitor(cfg *Config) {
alerter := newAlerter(cfg)
fmt.Printf("Server Monitor started (interval: %ds | CPU>%.0f%% Mem>%.0f%% Disk>%.0f%%)\n", cfg.Interval, cfg.CPUThreshold, cfg.MemThreshold, cfg.DiskThreshold)
tick := func() {
m, err := collectMetrics()
if err != nil {
fmt.Printf("[ERROR] %v\n", err)
return
}
fmt.Printf("[%s] %s\n", m.Timestamp.Format("15:04:05"), m.String())
alerter.check(m)
}
tick()
ticker := time.NewTicker(time.Duration(cfg.Interval) * time.Second)
defer ticker.Stop()
for range ticker.C {
tick()
}
}
func main() {
cfg := loadConfig()
runMonitor(cfg)
}