java 业务指标监控

2020-04-21  本文已影响0人  天草二十六_简村人

1、总体设计


image.png

业务方使用Slf4jReporter生成指标数据到日志文件里,后面就交由服务端来接收和处理并展示了。

2、引入jar包

<dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-core</artifactId>
        </dependency>

        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-annotation</artifactId>
        </dependency>

        <dependency>
            <groupId>com.github.davidb</groupId>
            <artifactId>metrics-influxdb</artifactId>
            <version>0.9.3</version>
        </dependency>

3、logback-spring.xml增加日志输入

<appender name="metricsFILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
        <rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
            <FileNamePattern>${LOG_HOME}/metrics_%d{yyyyMMdd}.log</FileNamePattern>
            <!--日志文件保留天数-->
            <MaxHistory>7</MaxHistory>
        </rollingPolicy>
        <encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
            <pattern>%d{HH:mm:ss} %-5p %c:%L - %m%n</pattern>
        </encoder>
        <!--日志文件最大的大小-->
        <triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
            <MaxFileSize>${FILE_SIZE}MB</MaxFileSize>
        </triggeringPolicy>
    </appender>

    <logger name="metrics" level="INFO" additivity="false">
        <appender-ref ref="metricsFILE" />
    </logger>

4、指标配置类MetricConfig

import com.codahale.metrics.*;
import metrics_influxdb.HttpInfluxdbProtocol;
import metrics_influxdb.InfluxdbReporter;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.util.concurrent.TimeUnit;

@Configuration
public class MetricConfig {

    @Bean
    public MetricRegistry registry() {
        return new MetricRegistry();
    }

    @Bean
    public Slf4jReporter slf4jReporter(MetricRegistry registry) {
        Slf4jReporter reporter = Slf4jReporter.forRegistry(registry)
                .convertRatesTo(TimeUnit.SECONDS)
                .convertDurationsTo(TimeUnit.MILLISECONDS)
                .build();

        reporter.start(10, TimeUnit.SECONDS);
        return reporter;
    }

    /**
     * TPS 计算器
     *
     * @param registry
     * @return
     */
    @Bean
    public Meter requestMeter(MetricRegistry registry) {
        return registry.meter("request");
    }

    /**
     * 直方图
     *
     * @param registry
     * @return
     */
    @Bean
    public Histogram responseSizes(MetricRegistry registry) {
        return registry.histogram("response-sizes");
    }

    /**
     * 计数器
     *
     * @param registry
     * @return
     */
    @Bean
    public Counter pendingJobs(MetricRegistry registry) {
        return registry.counter("requestCount");
    }

    /**
     * 计时器
     *
     * @param registry
     * @return
     */
    @Bean
    public Timer responses(MetricRegistry registry) {
        return registry.timer("executeTime");
    }


    @Bean(name = "influxdbReporter")
    public ScheduledReporter influxdbReporter(MetricRegistry registry) {
        ScheduledReporter reporter = InfluxdbReporter.forRegistry(registry)
                .protocol(new HttpInfluxdbProtocol("http",
                        "localhost",
                        8086, "", "", "metrics"))
                .convertRatesTo(TimeUnit.SECONDS)
                .convertDurationsTo(TimeUnit.MILLISECONDS)
                .filter(MetricFilter.ALL)
                .skipIdleMetrics(false)
                .build();

        reporter.start(10, TimeUnit.SECONDS);

        return reporter;
    }
}

5、使用示例:

   @Resource
    private Meter requestMeter;

    @Resource
    private Histogram responseSizes;

    @Resource
    private Counter pendingJobs;

    @Resource
    private Timer responses;

    @RequestMapping("/test/sign")
    @ResponseBody
    public String getSign(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        requestMeter.mark();

        pendingJobs.inc();

        responseSizes.update(new Random().nextInt(10));

        final Timer.Context context = responses.time();
        try {
            return "success";
        } finally {
            context.stop();
        }
    }

效果见下图:


image.png

每隔10秒打印出指标数据的日志。

19:11:37 INFO  metrics:373 - type=COUNTER, name=requestCount, count=0
19:11:37 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=0, min=0, max=0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0
19:11:37 INFO  metrics:373 - type=METER, name=request, count=0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second
19:11:37 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:11:47 INFO  metrics:373 - type=COUNTER, name=requestCount, count=1
19:11:47 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=1, min=5, max=5, mean=5.0, stddev=0.0, median=5.0, p75=5.0, p95=5.0, p98=5.0, p99=5.0, p999=5.0
19:11:47 INFO  metrics:373 - type=METER, name=request, count=1, mean_rate=0.04772524178633343, m1=0.014712537947741825, m5=0.0032510706679223173, m15=0.0011018917421948848, rate_unit=events/second
19:11:47 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:11:57 INFO  metrics:373 - type=COUNTER, name=requestCount, count=1
19:11:57 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=1, min=5, max=5, mean=5.0, stddev=0.0, median=5.0, p75=5.0, p95=5.0, p98=5.0, p99=5.0, p999=5.0
19:11:57 INFO  metrics:373 - type=METER, name=request, count=1, mean_rate=0.032306681295300385, m1=0.012453894499523116, m5=0.003144487893819254, m15=0.0010897162674033895, rate_unit=events/second
19:11:57 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:12:07 INFO  metrics:373 - type=COUNTER, name=requestCount, count=2
19:12:07 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=2, min=5, max=7, mean=6.156210465768618, stddev=0.987723792557591, median=7.0, p75=7.0, p95=7.0, p98=7.0, p99=7.0, p999=7.0
19:12:07 INFO  metrics:373 - type=METER, name=request, count=2, mean_rate=0.04883621909301433, m1=0.025254532045303885, m5=0.0062924699865950515, m15=0.002179567069063387, rate_unit=events/second
19:12:07 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:12:17 INFO  metrics:373 - type=COUNTER, name=requestCount, count=2
19:12:17 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=2, min=5, max=7, mean=6.156210465768618, stddev=0.987723792557591, median=7.0, p75=7.0, p95=7.0, p98=7.0, p99=7.0, p999=7.0
19:12:17 INFO  metrics:373 - type=METER, name=request, count=2, mean_rate=0.03925160662498755, m1=0.02137749984701412, m5=0.006086178282834525, m15=0.0021554837014424494, rate_unit=events/second
19:12:17 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:12:27 INFO  metrics:373 - type=COUNTER, name=requestCount, count=2
19:12:27 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=2, min=5, max=7, mean=6.156210465768618, stddev=0.987723792557591, median=7.0, p75=7.0, p95=7.0, p98=7.0, p99=7.0, p999=7.0
19:12:27 INFO  metrics:373 - type=METER, name=request, count=2, mean_rate=0.032810010401197694, m1=0.018095662944349354, m5=0.00588664962556148, m15=0.0021316664456582147, rate_unit=events/second
19:12:27 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:12:37 INFO  metrics:373 - type=COUNTER, name=requestCount, count=2
19:12:37 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=2, min=5, max=7, mean=6.156210465768618, stddev=0.987723792557591, median=7.0, p75=7.0, p95=7.0, p98=7.0, p99=7.0, p999=7.0
19:12:37 INFO  metrics:373 - type=METER, name=request, count=2, mean_rate=0.028187596113694386, m1=0.01531764798217201, m5=0.005693662295739435, m15=0.0021081123612784834, rate_unit=events/second
19:12:37 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:12:47 INFO  metrics:373 - type=COUNTER, name=requestCount, count=2
19:12:47 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=2, min=5, max=7, mean=6.156210465768618, stddev=0.987723792557591, median=7.0, p75=7.0, p95=7.0, p98=7.0, p99=7.0, p999=7.0
19:12:47 INFO  metrics:373 - type=METER, name=request, count=2, mean_rate=0.024705614814530406, m1=0.012966109085216199, m5=0.005507001843146522, m15=0.0020848185403616864, rate_unit=events/second
19:12:47 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:12:57 INFO  metrics:373 - type=COUNTER, name=requestCount, count=4
19:12:57 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=4, min=0, max=7, mean=2.0488679402655547, stddev=2.6066574932478366, median=1.0, p75=5.0, p95=7.0, p98=7.0, p99=7.0, p999=7.0
19:12:57 INFO  metrics:373 - type=METER, name=request, count=4, mean_rate=0.04397865428643308, m1=0.01097557438357367, m5=0.005326460848075398, m15=0.0020617821070978764, rate_unit=events/second
19:12:57 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds
19:13:07 INFO  metrics:373 - type=COUNTER, name=requestCount, count=4
19:13:07 INFO  metrics:373 - type=HISTOGRAM, name=response-sizes, count=4, min=0, max=7, mean=2.0488679402655547, stddev=2.6066574932478366, median=1.0, p75=5.0, p95=7.0, p98=7.0, p99=7.0, p999=7.0
19:13:07 INFO  metrics:373 - type=METER, name=request, count=4, mean_rate=0.03962227676951814, m1=0.03871569903135633, m5=0.011653980026690198, m15=0.0042427837018434575, rate_unit=events/second
19:13:07 INFO  metrics:373 - type=TIMER, name=executeTime, count=0, min=0.0, max=0.0, mean=0.0, stddev=0.0, median=0.0, p75=0.0, p95=0.0, p98=0.0, p99=0.0, p999=0.0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second, duration_unit=milliseconds

后期补充下后面几个技术栈。

上一篇 下一篇

猜你喜欢

热点阅读