""" 系统监控模块 - 提供性能指标和健康检查 """ from prometheus_client import Counter, Gauge, Histogram, Summary import time # 定义指标 QUERY_COUNTER = Counter('student_grade_queries_total', '成绩查询总次数') QUERY_ERRORS = Counter('student_grade_query_errors_total', '成绩查询错误次数', ['error_type']) WAITING_ROOM_SIZE = Gauge('waiting_room_current_size', '等待室当前大小') CONCURRENT_QUERIES_GAUGE = Gauge('concurrent_queries_current', '当前并发查询数') CACHE_HIT_COUNTER = Counter('cache_hits_total', '缓存命中次数') CACHE_MISS_COUNTER = Counter('cache_misses_total', '缓存未命中次数') QUERY_LATENCY = Histogram('query_latency_seconds', '查询延迟', buckets=[0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 10.0]) DB_CONNECTION_LATENCY = Summary('db_connection_latency_seconds', '数据库连接延迟') # 连接池指标 DB_POOL_SIZE = Gauge('db_pool_size', '数据库连接池大小') DB_POOL_FREE = Gauge('db_pool_free', '数据库连接池空闲连接数') REDIS_POOL_SIZE = Gauge('redis_pool_size', 'Redis连接池大小') class Timer: """计时器上下文管理器""" def __init__(self, metric): self.metric = metric def __enter__(self): self.start = time.time() return self def __exit__(self, exc_type, exc_val, exc_tb): self.metric.observe(time.time() - self.start) def record_query_attempt(): """记录查询尝试""" QUERY_COUNTER.inc() def record_query_error(error_type): """记录查询错误""" QUERY_ERRORS.labels(error_type=error_type).inc() def update_waiting_room_size(size): """更新等待室大小""" WAITING_ROOM_SIZE.set(size) def update_concurrent_queries(count): """更新当前并发查询数""" CONCURRENT_QUERIES_GAUGE.set(count) def record_cache_hit(): """记录缓存命中""" CACHE_HIT_COUNTER.inc() def record_cache_miss(): """记录缓存未命中""" CACHE_MISS_COUNTER.inc() def query_timer(): """查询计时器""" return Timer(QUERY_LATENCY) def connection_timer(): """连接计时器""" return Timer(DB_CONNECTION_LATENCY) def update_pool_metrics(db_pool, redis_client): """更新连接池指标""" if db_pool: DB_POOL_SIZE.set(db_pool.maxsize) DB_POOL_FREE.set(db_pool.freesize) if hasattr(redis_client, 'connection_pool'): REDIS_POOL_SIZE.set(redis_client.connection_pool.max_connections)