ES集群监控总结-指标说明
一、 集群监控
集群监控主要包括两个方面的内容,分别是集群健康情况和集群的运行状态。
1、集群健康状态 GET _cluster/health?pretty
返回结果示例如下:
{
"cluster_name" : "elasticsearch",
"status" : "green",
"timed_out" : false,
"number_of_nodes" : 3,
"number_of_data_nodes" : 3,
"active_primary_shards" : 11,
"active_shards" : 22,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 0,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 0,
"number_of_in_flight_fetch" : 0,
"task_max_waiting_in_queue_millis" : 0,
"active_shards_percent_as_number" : 100.0
}
关键指标说明:
status:集群状态,分为green、yellow和red。
number_of_nodes/number_of_data_nodes:集群的节点数和数据节点数。
active_primary_shards:集群中所有活跃的主分片数。
active_shards:集群中所有活跃的分片数。
relocating_shards:当前节点迁往其他节点的分片数量,通常为0,当有节点加入或者退出时该值会增加。
initializing_shards:正在初始化的分片。
unassigned_shards:未分配的分片数,通常为0,当有某个节点的副本分片丢失该值就会增加。
number_of_pending_tasks:是指主节点创建索引并分配shards等任务,如果该指标数值一直未减小代表集群存在不稳定因素
active_shards_percent_as_number:集群分片健康度,活跃分片数占总分片数比例。
number_of_pending_tasks:pending task只能由主节点来进行处理,这些任务包括创建索引并将shards分配给节点。
2、集群状态信息 GET _cluster/stats?pretty
{
"_nodes" : {
"total" : 3,
"successful" : 3,
"failed" : 0
},
"cluster_name" : "elasticsearch",
"cluster_uuid" : "1sDfT-5dTD-M3u2ciLpgTw",
"timestamp" : 1597126502693,
"status" : "green",
"indices" : {
"count" : 11,
"shards" : {
"total" : 22,
"primaries" : 11,
"replication" : 1.0,
"index" : {
"shards" : {
"min" : 2,
"max" : 2,
"avg" : 2.0
},
"primaries" : {
"min" : 1,
"max" : 1,
"avg" : 1.0
},
"replication" : {
"min" : 1.0,
"max" : 1.0,
"avg" : 1.0
}
}
},
"docs" : {
"count" : 497528,
"deleted" : 9
},
"store" : {
"size_in_bytes" : 256478188
},
"fielddata" : {
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"query_cache" : {
"memory_size_in_bytes" : 0,
"total_count" : 126,
"hit_count" : 0,
"miss_count" : 126,
"cache_size" : 0,
"cache_count" : 0,
"evictions" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 75,
"memory_in_bytes" : 527588,
"terms_memory_in_bytes" : 356640,
"stored_fields_memory_in_bytes" : 39560,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 38592,
"points_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 92796,
"index_writer_memory_in_bytes" : 10861380,
"version_map_memory_in_bytes" : 5593,
"fixed_bit_set_memory_in_bytes" : 1248,
"max_unsafe_auto_id_timestamp" : 1596331158556,
"file_sizes" : { }
},
"mappings" : {
"field_types" : [
{
"name" : "binary",
"count" : 3,
"index_count" : 1
},
{
"name" : "boolean",
"count" : 31,
"index_count" : 6
},
{
"name" : "date",
"count" : 65,
"index_count" : 11
},
{
"name" : "flattened",
"count" : 1,
"index_count" : 1
},
{
"name" : "float",
"count" : 6,
"index_count" : 2
},
{
"name" : "geo_shape",
"count" : 1,
"index_count" : 1
},
{
"name" : "integer",
"count" : 27,
"index_count" : 2
},
{
"name" : "keyword",
"count" : 384,
"index_count" : 11
},
{
"name" : "long",
"count" : 118,
"index_count" : 8
},
{
"name" : "nested",
"count" : 10,
"index_count" : 4
},
{
"name" : "object",
"count" : 238,
"index_count" : 8
},
{
"name" : "text",
"count" : 176,
"index_count" : 10
}
]
},
"analysis" : {
"char_filter_types" : [ ],
"tokenizer_types" : [ ],
"filter_types" : [ ],
"analyzer_types" : [ ],
"built_in_char_filters" : [ ],
"built_in_tokenizers" : [ ],
"built_in_filters" : [ ],
"built_in_analyzers" : [ ]
}
},
"nodes" : {
"count" : {
"total" : 3,
"coordinating_only" : 0,
"data" : 3,
"ingest" : 3,
"master" : 3,
"ml" : 3,
"remote_cluster_client" : 3,
"transform" : 3,
"voting_only" : 0
},
"versions" : [
"7.8.0"
],
"os" : {
"available_processors" : 12,
"allocated_processors" : 12,
"names" : [
{
"name" : "Linux",
"count" : 3
}
],
"pretty_names" : [
{
"pretty_name" : "CentOS Linux 7 (Core)",
"count" : 3
}
],
"mem" : {
"total_in_bytes" : 49973366784,
"free_in_bytes" : 17576009728,
"used_in_bytes" : 32397357056,
"free_percent" : 35,
"used_percent" : 65
}
},
"process" : {
"cpu" : {
"percent" : 0
},
"open_file_descriptors" : {
"min" : 378,
"max" : 396,
"avg" : 387
}
},
"jvm" : {
"max_uptime_in_millis" : 3382064258,
"versions" : [
{
"version" : "13.0.2",
"vm_name" : "Java HotSpot(TM) 64-Bit Server VM",
"vm_version" : "13.0.2+8",
"vm_vendor" : "Oracle Corporation",
"bundled_jdk" : true,
"using_bundled_jdk" : false,
"count" : 3
}
],
"mem" : {
"heap_used_in_bytes" : 3488524616,
"heap_max_in_bytes" : 25665208320
},
"threads" : 187
},
"fs" : {
"total_in_bytes" : 160982630400,
"free_in_bytes" : 132680425472,
"available_in_bytes" : 132680425472
},
"plugins" : [ ],
"network_types" : {
"transport_types" : {
"security4" : 3
},
"http_types" : {
"security4" : 3
}
},
"discovery_types" : {
"zen" : 3
},
"packaging_types" : [
{
"flavor" : "default",
"type" : "tar",
"count" : 3
}
],
"ingest" : {
"number_of_pipelines" : 2,
"processor_stats" : {
"gsub" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"script" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
}
}
}
}
}
关键指标说明:
indices.count:索引总数。
indices.shards.total:分片总数。
indices.shards.primaries:主分片数量。
docs.count:文档总数。
store.size_in_bytes:数据总存储容量。
segments.count:段总数。
nodes.count.total:总节点数。
nodes.count.data:数据节点数。
nodes. process. cpu.percent:节点CPU使用率。
fs.total_in_bytes:文件系统使用总容量。
fs.free_in_bytes:文件系统剩余总容量。
3、节点监控 GET _cluster/stats?pretty
{
"_nodes" : {
"total" : 3,
"successful" : 3,
"failed" : 0
},
"cluster_name" : "elasticsearch",
"cluster_uuid" : "1sDfT-5dTD-M3u2ciLpgTw",
"timestamp" : 1597126634825,
"status" : "green",
"indices" : {
"count" : 11,
"shards" : {
"total" : 22,
"primaries" : 11,
"replication" : 1.0,
"index" : {
"shards" : {
"min" : 2,
"max" : 2,
"avg" : 2.0
},
"primaries" : {
"min" : 1,
"max" : 1,
"avg" : 1.0
},
"replication" : {
"min" : 1.0,
"max" : 1.0,
"avg" : 1.0
}
}
},
"docs" : {
"count" : 497533,
"deleted" : 10
},
"store" : {
"size_in_bytes" : 256526834
},
"fielddata" : {
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"query_cache" : {
"memory_size_in_bytes" : 0,
"total_count" : 126,
"hit_count" : 0,
"miss_count" : 126,
"cache_size" : 0,
"cache_count" : 0,
"evictions" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 69,
"memory_in_bytes" : 520076,
"terms_memory_in_bytes" : 352416,
"stored_fields_memory_in_bytes" : 36632,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 38592,
"points_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 92436,
"index_writer_memory_in_bytes" : 10876816,
"version_map_memory_in_bytes" : 7259,
"fixed_bit_set_memory_in_bytes" : 960,
"max_unsafe_auto_id_timestamp" : 1596331158556,
"file_sizes" : { }
},
"mappings" : {
"field_types" : [
{
"name" : "binary",
"count" : 3,
"index_count" : 1
},
{
"name" : "boolean",
"count" : 31,
"index_count" : 6
},
{
"name" : "date",
"count" : 65,
"index_count" : 11
},
{
"name" : "flattened",
"count" : 1,
"index_count" : 1
},
{
"name" : "float",
"count" : 6,
"index_count" : 2
},
{
"name" : "geo_shape",
"count" : 1,
"index_count" : 1
},
{
"name" : "integer",
"count" : 27,
"index_count" : 2
},
{
"name" : "keyword",
"count" : 384,
"index_count" : 11
},
{
"name" : "long",
"count" : 118,
"index_count" : 8
},
{
"name" : "nested",
"count" : 10,
"index_count" : 4
},
{
"name" : "object",
"count" : 238,
"index_count" : 8
},
{
"name" : "text",
"count" : 176,
"index_count" : 10
}
]
},
"analysis" : {
"char_filter_types" : [ ],
"tokenizer_types" : [ ],
"filter_types" : [ ],
"analyzer_types" : [ ],
"built_in_char_filters" : [ ],
"built_in_tokenizers" : [ ],
"built_in_filters" : [ ],
"built_in_analyzers" : [ ]
}
},
"nodes" : {
"count" : {
"total" : 3,
"coordinating_only" : 0,
"data" : 3,
"ingest" : 3,
"master" : 3,
"ml" : 3,
"remote_cluster_client" : 3,
"transform" : 3,
"voting_only" : 0
},
"versions" : [
"7.8.0"
],
"os" : {
"available_processors" : 12,
"allocated_processors" : 12,
"names" : [
{
"name" : "Linux",
"count" : 3
}
],
"pretty_names" : [
{
"pretty_name" : "CentOS Linux 7 (Core)",
"count" : 3
}
],
"mem" : {
"total_in_bytes" : 49973366784,
"free_in_bytes" : 17574735872,
"used_in_bytes" : 32398630912,
"free_percent" : 35,
"used_percent" : 65
}
},
"process" : {
"cpu" : {
"percent" : 0
},
"open_file_descriptors" : {
"min" : 378,
"max" : 396,
"avg" : 387
}
},
"jvm" : {
"max_uptime_in_millis" : 3382196420,
"versions" : [
{
"version" : "13.0.2",
"vm_name" : "Java HotSpot(TM) 64-Bit Server VM",
"vm_version" : "13.0.2+8",
"vm_vendor" : "Oracle Corporation",
"bundled_jdk" : true,
"using_bundled_jdk" : false,
"count" : 3
}
],
"mem" : {
"heap_used_in_bytes" : 3655595384,
"heap_max_in_bytes" : 25665208320
},
"threads" : 187
},
"fs" : {
"total_in_bytes" : 160982630400,
"free_in_bytes" : 132679999488,
"available_in_bytes" : 132679999488
},
"plugins" : [ ],
"network_types" : {
"transport_types" : {
"security4" : 3
},
"http_types" : {
"security4" : 3
}
},
"discovery_types" : {
"zen" : 3
},
"packaging_types" : [
{
"flavor" : "default",
"type" : "tar",
"count" : 3
}
],
"ingest" : {
"number_of_pipelines" : 2,
"processor_stats" : {
"gsub" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
},
"script" : {
"count" : 0,
"failed" : 0,
"current" : 0,
"time_in_millis" : 0
}
}
}
}
}
关键指标说明:
name:节点名。
roles:节点角色。
indices.docs.count:索引文档数。
segments.count:段总数。
jvm.heap_used_percent:内存使用百分比。
thread_pool.{bulk, index, get, search}.{active, queue, rejected}:线程池的一些信息,包括bulk、index、get和search线程池,主要指标有active(激活)线程数,线程queue(队列)数和rejected(拒绝)线程数量。
4、GET _cat/indices 查看索引信息
green open .kibana-event-log-7.8.0-000001 93qZ6-tUT1GJa9w-beZtqQ 1 1 5 0 51.5kb 25.7kb
green open .kibana-event-log-7.8.0-000002 aVeWX45vRqK5bi5ANvz3EA 1 1 0 0 416b 208b
green open .apm-custom-link qMdSK3MAQxa4Ip37SPkaaw 1 1 0 0 416b 208b
5、 GET _cat/nodes?pretty 查看节点信息
XX.XX.XX.XX 16 64 0 0.06 0.04 0.05 dilmrt * node-1
XX.XX.XX.XX 11 64 0 0.00 0.01 0.05 dilmrt - node-2
XX.XX.XX.XX 9 67 0 0.00 0.01 0.05 dilmrt - node-3