（三十一）— latency延迟分析处理

最后更新于：2022-04-01 20:21:21

每当提到延时统计的时候，一定想到的一个名词就是”性能测试“，没错，在Redis的redis_benchmark文件中，的确用到了延迟文件中的相关信息。在Redis中的官方解释此文件: ~~~ /* The latency monitor allows to easily observe the sources of latency * in a Redis instance using the LATENCY command. Different latency * sources are monitored, like disk I/O, execution of commands, fork * system call, and so forth. * * 延时监听器可以对Redis中很多简单的资源进行监听，比如I/O磁盘操作，执行一些指令， * fork创建子线程操作等的监听。 * ---------------------------------------------------------------------------- ~~~ 在Redis中的延时操作中，整个过程原理非常简单，他是针对每种事件维护了一个统计列表，每个列表中包括了了采集的一系列样本，每个样本包括，此样本的创建时间和此样本的延时时间。event==》对SampleSeriesList 是一个字典的映射关系。下面看看，里面关键的采集点，名叫latencySample采集点的结构定义: ~~~ /* Representation of a latency sample: the sampling time and the latency * observed in milliseconds. */ /* 延时样品例子 */ struct latencySample { //延时Sample创建的时间 int32_t time; /* We don't use time_t to force 4 bytes usage everywhere. */ //延时的具体时间，单位为毫秒 uint32_t latency; /* Latency in milliseconds. */ }; ~~~ 字典中维护的可不是一个Sample结点，而是一个结点列表结构体: ~~~ /* The latency time series for a given event. */ /* 针对某个事件采集的一系列延时sample */ struct latencyTimeSeries { //下一个延时Sample的下标 int idx; /* Index of the next sample to store. */ //最大的延时 uint32_t max; /* Max latency observed for this event. */ //最近的延时记录 struct latencySample samples[LATENCY_TS_LEN]; /* Latest history. */ }; ~~~ 在Redis代码的设计中，因为延时是用来测试和结果分析的，所以，作者还设计了用于后面分析报告中会用到的数据统计结构体； ~~~ /* Latency statistics structure. */ /* 延时sample的数据统计结果结构体 */ struct latencyStats { //绝对最高的延时时间 uint32_t all_time_high; /* Absolute max observed since latest reset. */ //平均Sample延时时间 uint32_t avg; /* Average of current samples. */ //Sample的最小延时时间 uint32_t min; /* Min of current samples. */ //Sample的最大延时时间 uint32_t max; /* Max of current samples. */ //平均相对误差，与平均延时相比 uint32_t mad; /* Mean absolute deviation. */ //samples的总数 uint32_t samples; /* Number of non-zero samples. */ //最早的延时记录点的创建时间 time_t period; /* Number of seconds since first event and now. */ }; ~~~ 意思都非常的直接，那么一个简单的Sample如何进行事件的检测呢？ ~~~ /* Start monitoring an event. We just set the current time. */ /* 对某个事件设置监听，就是设置一下当前的时间 */ #define latencyStartMonitor(var) if (server.latency_monitor_threshold) { \ var = mstime(); \ } else { \ var = 0; \ } /* End monitoring an event, compute the difference with the current time * to check the amount of time elapsed. */ /* 结束监听，算出过了多少时间 */ #define latencyEndMonitor(var) if (server.latency_monitor_threshold) { \ var = mstime() - var; \ } ~~~ 很简单，记录开始时间，记录结束时间，中间的差值就是延时时间了，如果超出给定的时间范围，就加入到延时列表中: ~~~ /* Add the sample only if the elapsed time is >= to the configured threshold. */ /* 如果延时时间超出server.latency_monitor_threshold，则将Sample加入延时列表中 */ #define latencyAddSampleIfNeeded(event,var) \ if (server.latency_monitor_threshold && \ (var) >= server.latency_monitor_threshold) \ latencyAddSample((event),(var)); ~~~ 我们重点关注一下，latencyAddSample，就是把采样结点加入到记录中，步骤如下: 1.根据传入的event事件，在server.latency_events找到key为event事件的val,即一个latencyTimeSeries 2.在这个latencyTimeSeries的struct latencySample samples[LATENCY_TS_LEN]中添加一个新的Sample 实现代码如下: ~~~ /* Add the specified sample to the specified time series "event". * This function is usually called via latencyAddSampleIfNeeded(), that * is a macro that only adds the sample if the latency is higher than * server.latency_monitor_threshold. */ /* 添加Sample到指定的Event对象的Sample列表中 */ void latencyAddSample(char *event, mstime_t latency) { //找出Event对应的延时Sample记录结构体 struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event); time_t now = time(NULL); int prev; /* Create the time series if it does not exist. */ if (ts == NULL) { ts = zmalloc(sizeof(*ts)); ts->idx = 0; ts->max = 0; memset(ts->samples,0,sizeof(ts->samples)); //如果ts为空，重新添加，一个Event，对应一个latencyTimeSeries dictAdd(server.latency_events,zstrdup(event),ts); } /* If the previous sample is in the same second, we update our old sample * if this latency is > of the old one, or just return. */ prev = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN; if (ts->samples[prev].time == now) { if (latency > ts->samples[prev].latency) ts->samples[prev].latency = latency; return; } //为Sample赋值 ts->samples[ts->idx].time = time(NULL); ts->samples[ts->idx].latency = latency; if (latency > ts->max) ts->max = latency; ts->idx++; if (ts->idx == LATENCY_TS_LEN) ts->idx = 0; } ~~~ 结点都出来之后，当然会进行结构的分析统计了，这时就用到了latencyStats结构体； ~~~ /* Analyze the samples avaialble for a given event and return a structure * populate with different metrics, average, MAD, min, max, and so forth. * Check latency.h definition of struct latenctStat for more info. * If the specified event has no elements the structure is populate with * zero values. */ /* 分析某个时间Event的延时结果，结果信息存入latencyStats结构体中 */ void analyzeLatencyForEvent(char *event, struct latencyStats *ls) { struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event); int j; uint64_t sum; //初始化延时统计结果结构体的变量 ls->all_time_high = ts ? ts->max : 0; ls->avg = 0; ls->min = 0; ls->max = 0; ls->mad = 0; ls->samples = 0; ls->period = 0; if (!ts) return; /* First pass, populate everything but the MAD. */ sum = 0; for (j = 0; j < LATENCY_TS_LEN; j++) { if (ts->samples[j].time == 0) continue; ls->samples++; if (ls->samples == 1) { ls->min = ls->max = ts->samples[j].latency; } else { //找出延时最大和最小的延时时间 if (ls->min > ts->samples[j].latency) ls->min = ts->samples[j].latency; if (ls->max < ts->samples[j].latency) ls->max = ts->samples[j].latency; } sum += ts->samples[j].latency; /* Track the oldest event time in ls->period. */ if (ls->period == 0 || ts->samples[j].time < ls->period) //最早的延时记录点的创建时间 ls->period = ts->samples[j].time; } /* So far avg is actually the sum of the latencies, and period is * the oldest event time. We need to make the first an average and * the second a range of seconds. */ if (ls->samples) { ls->avg = sum / ls->samples; ls->period = time(NULL) - ls->period; if (ls->period == 0) ls->period = 1; } /* Second pass, compute MAD. */ //计算平均相对误差，与平均延时相比 sum = 0; for (j = 0; j < LATENCY_TS_LEN; j++) { int64_t delta; if (ts->samples[j].time == 0) continue; delta = (int64_t)ls->avg - ts->samples[j].latency; if (delta < 0) delta = -delta; sum += delta; } if (ls->samples) ls->mad = sum / ls->samples; } ~~~ 当然还可以利用这些采集的点，画一个微线图，更加形象的展示出来: ~~~ #define LATENCY_GRAPH_COLS 80 /* 利用延时的Sample点，画出对应的微线图 */ sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) { int j; struct sequence *seq = createSparklineSequence(); sds graph = sdsempty(); uint32_t min = 0, max = 0; for (j = 0; j < LATENCY_TS_LEN; j++) { int i = (ts->idx + j) % LATENCY_TS_LEN; int elapsed; char *label; char buf[64]; if (ts->samples[i].time == 0) continue; /* Update min and max. */ if (seq->length == 0) { min = max = ts->samples[i].latency; } else { if (ts->samples[i].latency > max) max = ts->samples[i].latency; if (ts->samples[i].latency < min) min = ts->samples[i].latency; } /* Use as label the number of seconds / minutes / hours / days * ago the event happened. */ elapsed = time(NULL) - ts->samples[i].time; if (elapsed < 60) snprintf(buf,sizeof(buf),"%ds",elapsed); else if (elapsed < 3600) snprintf(buf,sizeof(buf),"%dm",elapsed/60); else if (elapsed < 3600*24) snprintf(buf,sizeof(buf),"%dh",elapsed/3600); else snprintf(buf,sizeof(buf),"%dd",elapsed/(3600*24)); label = zstrdup(buf); sparklineSequenceAddSample(seq,ts->samples[i].latency,label); } graph = sdscatprintf(graph, "%s - high %lu ms, low %lu ms (all time high %lu ms)\n", event, (unsigned long) max, (unsigned long) min, (unsigned long) ts->max); for (j = 0; j < LATENCY_GRAPH_COLS; j++) graph = sdscatlen(graph,"-",1); graph = sdscatlen(graph,"\n",1); //调用sparkline函数画微线图 graph = sparklineRender(graph,seq,LATENCY_GRAPH_COLS,4,SPARKLINE_FILL); freeSparklineSequence(seq); //返回微线图字符串 return graph; } ~~~ 在Redis还封装了一些命令供外部调用，这里就不分析了，就是对上述方法的复合调用: ~~~ /* ---------------------------- Latency API --------------------------------- */ void latencyMonitorInit(void) /* 延时监听初始化操作，创建Event字典对象 */ void latencyAddSample(char *event, mstime_t latency) /* 添加Sample到指定的Event对象的Sample列表中 */ int latencyResetEvent(char *event_to_reset) /* 重置Event事件的延迟，删除字典中的event的记录 */ void analyzeLatencyForEvent(char *event, struct latencyStats *ls) /* 分析某个时间Event的延时结果，结果信息存入latencyStats结构体中 */ sds createLatencyReport(void) /* 根据延时Sample的结果，创建阅读性比较好的分析报告 */ void latencyCommandReplyWithSamples(redisClient *c, struct latencyTimeSeries *ts) void latencyCommandReplyWithLatestEvents(redisClient *c) sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) void latencyCommand(redisClient *c) ~~~ Redis的延时类文件的分析也结束了，分析了这么长时间Redis的Redis代码，感觉每一块的代码都会有他的亮点存在，分析了30多期下来，还是学到了很多网上所学不到的知识，网上更多的是Redis主流思想的学习，像一些比较细小点，也只有自己品味，自己才能够真正的体会。