RSJselet/monitoring.py
2025-07-10 13:42:54 +08:00

74 lines
2.5 KiB
Python

"""
系统监控模块 - 提供性能指标和健康检查
"""
from prometheus_client import Counter, Gauge, Histogram, Summary
import time
# 定义指标
QUERY_COUNTER = Counter('student_grade_queries_total', '成绩查询总次数')
QUERY_ERRORS = Counter('student_grade_query_errors_total', '成绩查询错误次数', ['error_type'])
WAITING_ROOM_SIZE = Gauge('waiting_room_current_size', '等待室当前大小')
CONCURRENT_QUERIES_GAUGE = Gauge('concurrent_queries_current', '当前并发查询数')
CACHE_HIT_COUNTER = Counter('cache_hits_total', '缓存命中次数')
CACHE_MISS_COUNTER = Counter('cache_misses_total', '缓存未命中次数')
QUERY_LATENCY = Histogram('query_latency_seconds', '查询延迟',
buckets=[0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10.0])
DB_CONNECTION_LATENCY = Summary('db_connection_latency_seconds', '数据库连接延迟')
# 连接池指标
DB_POOL_SIZE = Gauge('db_pool_size', '数据库连接池大小')
DB_POOL_FREE = Gauge('db_pool_free', '数据库连接池空闲连接数')
REDIS_POOL_SIZE = Gauge('redis_pool_size', 'Redis连接池大小')
class Timer:
"""计时器上下文管理器"""
def __init__(self, metric):
self.metric = metric
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.metric.observe(time.time() - self.start)
def record_query_attempt():
"""记录查询尝试"""
QUERY_COUNTER.inc()
def record_query_error(error_type):
"""记录查询错误"""
QUERY_ERRORS.labels(error_type=error_type).inc()
def update_waiting_room_size(size):
"""更新等待室大小"""
WAITING_ROOM_SIZE.set(size)
def update_concurrent_queries(count):
"""更新当前并发查询数"""
CONCURRENT_QUERIES_GAUGE.set(count)
def record_cache_hit():
"""记录缓存命中"""
CACHE_HIT_COUNTER.inc()
def record_cache_miss():
"""记录缓存未命中"""
CACHE_MISS_COUNTER.inc()
def query_timer():
"""查询计时器"""
return Timer(QUERY_LATENCY)
def connection_timer():
"""连接计时器"""
return Timer(DB_CONNECTION_LATENCY)
def update_pool_metrics(db_pool, redis_client):
"""更新连接池指标"""
if db_pool:
DB_POOL_SIZE.set(db_pool.maxsize)
DB_POOL_FREE.set(db_pool.freesize)
if hasattr(redis_client, 'connection_pool'):
REDIS_POOL_SIZE.set(redis_client.connection_pool.max_connections)