Skip to main content

Мониторинг системы

Комплексное руководство по мониторингу платформы маркетплейсов.

Обзор мониторинга

Ключевые метрики

  • Производительность: время отклика, пропускная способность
  • Доступность: uptime, health checks
  • Ресурсы: CPU, память, диск, сеть
  • Бизнес-метрики: количество заказов, конверсия, доходы
  • Безопасность: попытки атак, аномальная активность

Prometheus + Grafana

Установка Prometheus

# docker-compose.monitoring.yml
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'

grafana:
image: grafana/grafana:latest
ports:
- "3001:3000"
volumes:
- grafana_data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin

node-exporter:
image: prom/node-exporter:latest
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro

volumes:
prometheus_data:
grafana_data:

Конфигурация Prometheus

# prometheus.yml
global:
scrape_interval: 15s

scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']

- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']

- job_name: 'marketplace-app'
static_configs:
- targets: ['app:3000']
metrics_path: '/metrics'

- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']

- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']

Метрики приложения

Express.js метрики

const prometheus = require('prom-client');

// Создание метрик
const httpRequestDuration = new prometheus.Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method', 'route', 'status_code'],
buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
});

const httpRequestsTotal = new prometheus.Counter({
name: 'http_requests_total',
help: 'Total number of HTTP requests',
labelNames: ['method', 'route', 'status_code']
});

const activeConnections = new prometheus.Gauge({
name: 'active_connections',
help: 'Number of active connections'
});

// Middleware для сбора метрик
const metricsMiddleware = (req, res, next) => {
const start = Date.now();

res.on('finish', () => {
const duration = (Date.now() - start) / 1000;
const route = req.route ? req.route.path : req.path;

httpRequestDuration
.labels(req.method, route, res.statusCode)
.observe(duration);

httpRequestsTotal
.labels(req.method, route, res.statusCode)
.inc();
});

next();
};

app.use(metricsMiddleware);

// Endpoint для метрик
app.get('/metrics', (req, res) => {
res.set('Content-Type', prometheus.register.contentType);
res.end(prometheus.register.metrics());
});

Бизнес-метрики

// Метрики заказов
const ordersTotal = new prometheus.Counter({
name: 'orders_total',
help: 'Total number of orders',
labelNames: ['marketplace_id', 'status']
});

const orderValue = new prometheus.Histogram({
name: 'order_value_amount',
help: 'Order value distribution',
labelNames: ['marketplace_id', 'currency'],
buckets: [10, 50, 100, 500, 1000, 5000, 10000]
});

const activeMarketplaces = new prometheus.Gauge({
name: 'active_marketplaces_total',
help: 'Number of active marketplaces'
});

// Использование в коде
app.post('/api/orders', async (req, res) => {
try {
const order = await createOrder(req.body);

// Обновление метрик
ordersTotal.labels(order.marketplaceId, 'created').inc();
orderValue.labels(order.marketplaceId, order.currency).observe(order.amount);

res.json(order);
} catch (error) {
ordersTotal.labels(req.body.marketplaceId, 'failed').inc();
res.status(500).json({ error: error.message });
}
});

Dashboards Grafana

Dashboard производительности

{
"dashboard": {
"title": "Marketplace Platform Performance",
"panels": [
{
"title": "Request Rate",
"type": "graph",
"targets": [
{
"expr": "rate(http_requests_total[5m])",
"legendFormat": "{{method}} {{route}}"
}
]
},
{
"title": "Response Time",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
},
{
"expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket[5m]))",
"legendFormat": "50th percentile"
}
]
},
{
"title": "Error Rate",
"type": "singlestat",
"targets": [
{
"expr": "rate(http_requests_total{status_code=~\"5..\"}[5m]) / rate(http_requests_total[5m]) * 100"
}
]
}
]
}
}

Dashboard бизнес-метрик

{
"dashboard": {
"title": "Business Metrics",
"panels": [
{
"title": "Orders per Hour",
"type": "graph",
"targets": [
{
"expr": "rate(orders_total[1h])",
"legendFormat": "{{marketplace_id}}"
}
]
},
{
"title": "Revenue",
"type": "graph",
"targets": [
{
"expr": "rate(order_value_amount_sum[1h])",
"legendFormat": "{{marketplace_id}} {{currency}}"
}
]
},
{
"title": "Active Marketplaces",
"type": "singlestat",
"targets": [
{
"expr": "active_marketplaces_total"
}
]
}
]
}
}

Алерты

Правила алертинг

# alerts.yml
groups:
- name: marketplace-alerts
rules:
- alert: HighErrorRate
expr: rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate detected"
description: "Error rate is {{ $value | humanizePercentage }}"

- alert: HighResponseTime
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "High response time"
description: "95th percentile response time is {{ $value }}s"

- alert: DatabaseConnectionsHigh
expr: pg_stat_activity_count > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High database connections"
description: "Database has {{ $value }} active connections"

- alert: LowDiskSpace
expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 10
for: 5m
labels:
severity: critical
annotations:
summary: "Low disk space"
description: "Disk usage is above 90%"

Alertmanager конфигурация

# alertmanager.yml
global:
smtp_smarthost: 'smtp.gmail.com:587'
smtp_from: 'alerts@example.com'
smtp_auth_username: 'alerts@example.com'
smtp_auth_password: 'password'

route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'

receivers:
- name: 'web.hook'
email_configs:
- to: 'admin@example.com'
subject: 'Alert: {{ .GroupLabels.alertname }}'
body: |
{{ range .Alerts }}
Alert: {{ .Annotations.summary }}
Description: {{ .Annotations.description }}
{{ end }}

slack_configs:
- api_url: 'https://hooks.slack.com/services/...'
channel: '#alerts'
title: 'Alert: {{ .GroupLabels.alertname }}'
text: |
{{ range .Alerts }}
{{ .Annotations.summary }}
{{ .Annotations.description }}
{{ end }}

Application Performance Monitoring (APM)

New Relic интеграция

// Установка: npm install newrelic
require('newrelic');

const newrelic = require('newrelic');

// Custom metrics
app.post('/api/orders', async (req, res) => {
const transaction = newrelic.getTransaction();

try {
const order = await createOrder(req.body);

// Custom метрика
newrelic.recordMetric('Custom/Orders/Created', 1);
newrelic.recordMetric('Custom/Orders/Value', order.amount);

// Custom атрибуты
newrelic.addCustomAttribute('marketplace_id', order.marketplaceId);
newrelic.addCustomAttribute('order_value', order.amount);

res.json(order);
} catch (error) {
newrelic.noticeError(error);
res.status(500).json({ error: error.message });
}
});

Логирование и анализ

ELK Stack

# elk-stack.yml
version: '3.8'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.15.0
environment:
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ports:
- "9200:9200"
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data

logstash:
image: docker.elastic.co/logstash/logstash:7.15.0
volumes:
- ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
ports:
- "5044:5044"
depends_on:
- elasticsearch

kibana:
image: docker.elastic.co/kibana/kibana:7.15.0
ports:
- "5601:5601"
environment:
ELASTICSEARCH_HOSTS: http://elasticsearch:9200
depends_on:
- elasticsearch

volumes:
elasticsearch_data:

Structured logging

const winston = require('winston');
const { ElasticsearchTransport } = require('winston-elasticsearch');

const logger = winston.createLogger({
level: 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.errors({ stack: true }),
winston.format.json()
),
transports: [
new winston.transports.Console(),
new ElasticsearchTransport({
clientOpts: { node: 'http://elasticsearch:9200' },
index: 'marketplace-logs'
})
]
});

// Usage
logger.info('Order created', {
orderId: order.id,
marketplaceId: order.marketplaceId,
amount: order.amount,
userId: order.userId
});

Health Checks

Комплексные health checks

const healthChecks = {
async database() {
try {
await db.query('SELECT 1');
return { status: 'healthy', latency: Date.now() - start };
} catch (error) {
return { status: 'unhealthy', error: error.message };
}
},

async redis() {
try {
const start = Date.now();
await redis.ping();
return { status: 'healthy', latency: Date.now() - start };
} catch (error) {
return { status: 'unhealthy', error: error.message };
}
},

async external_apis() {
// Check payment gateway, email service, etc.
const checks = await Promise.allSettled([
checkPaymentGateway(),
checkEmailService(),
checkStorageService()
]);

return checks.map((result, index) => ({
service: ['payment', 'email', 'storage'][index],
status: result.status === 'fulfilled' ? 'healthy' : 'unhealthy',
error: result.reason?.message
}));
}
};

app.get('/health', async (req, res) => {
const health = {
status: 'ok',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
checks: {}
};

// Run all health checks
for (const [name, check] of Object.entries(healthChecks)) {
try {
health.checks[name] = await check();
} catch (error) {
health.checks[name] = { status: 'unhealthy', error: error.message };
}
}

// Determine overall status
const unhealthyChecks = Object.values(health.checks)
.filter(check => check.status === 'unhealthy' ||
(Array.isArray(check) && check.some(c => c.status === 'unhealthy')));

if (unhealthyChecks.length > 0) {
health.status = 'degraded';
res.status(503);
}

res.json(health);
});

Мониторинг безопасности

Обнаружение аномалий

const securityMetrics = {
failedLogins: new prometheus.Counter({
name: 'failed_login_attempts_total',
help: 'Total failed login attempts',
labelNames: ['ip', 'user_agent']
}),

suspiciousActivity: new prometheus.Counter({
name: 'suspicious_activity_total',
help: 'Suspicious activity detected',
labelNames: ['type', 'ip', 'user_id']
})
};

// Middleware для обнаружения аномалий
const securityMiddleware = (req, res, next) => {
const ip = req.ip;
const userAgent = req.get('User-Agent');

// Rate limiting check
if (rateLimiter.isExceeded(ip)) {
securityMetrics.suspiciousActivity
.labels('rate_limit_exceeded', ip, req.user?.id || 'anonymous')
.inc();
}

// SQL injection patterns
const sqlInjectionPattern = /(\b(SELECT|INSERT|UPDATE|DELETE|DROP|CREATE|ALTER)\b)/i;
if (sqlInjectionPattern.test(req.url) ||
Object.values(req.query).some(val => sqlInjectionPattern.test(val))) {
securityMetrics.suspiciousActivity
.labels('sql_injection_attempt', ip, req.user?.id || 'anonymous')
.inc();
}

next();
};