ES工作总结
2019-12-21 本文已影响0人
后来丶_a24d
常用命令
1. 查看所有索引 http://localhost:9200/_cat/indices?v
2. 查看索引模板 http://localhost:9200/_template/active-logs?pretty
3. 查看索引配置具体信息 http://localhost:9200/active-logs-2019.12.09?pretty
4. 查看索引文档具体信息 http://localhost:9200/active-logs-2019.12.10/log/_search?pretty
5. 多索引信息查看 http://localhost:9200/active-logs-2019.12.09,active-logs-2019.12.10/log/_search?pretty
6. 别名查看 http://localhost:9200/*/_alias/active-logs?pretty
7. 查看集群的节点 /_cat/nodes?v
8. 查看mapping /test/_mapping/_doc
9. 查看setting /test/_settings
10. 将别名指向索引put /test/_alias/daily-test
in查询
- 在使用filter context(must not也是不算分)时,ElasticSearch 不需要做相关性的计算,Filter的搜索结果可以被缓存
- 域名/索引名称/_search?pretty
{
"query": {
"bool": {
"must": [
{"match": { "某个字段": 3658229 }}
],
"should": [
{
"match": {
"某个字段": "4"
}
},
{
"match": {
"某个字段": "6"
}
}
],
"minimum_should_match":1
}
}
}
# 带filter的
{
"query": {
"bool": {
"must": {"match": {}},
"filter": {"range": {}}
}
}
}
创建索引
- 域名/索引名称/_search?pretty
settings
- 具体含义,在代码里面有解释
{
"settings": {
"index": {
# 分片数量
"number_of_shards": "5",
# 相似度相关配置
"similarity": {
"zeroTf_similarity": {
# 使用BM25,k1,k2,b为调节因子
# k表示词频对TF score的影响, 避免TF过大造成的影响
# k1表示L对TF socore评分的影响,L表示文档长度与平均值, L越小TF score越大,越短的文章,相关性确定的越快
"type": "BM25",
"b": "0",
"k1": "0"
}
},
# 分词相关配置
"analysis": {
# 过滤器
"filter": {
// 匹配过滤器
"_pattern": {
"type": "pattern_capture",
"preserve_original": "true",
"patterns": [
"([0-9])",
"([a-z])"
]
},
# 标点过滤器
"punctuation_filter": {
"pattern": "[\\pP\\s]",
"type": "pattern_replace",
"replacement": ""
},
# 长度过滤器
"lengthFilter": {
"type": "length",
"min": "1"
}
},
# 分词器
"analyzer": {
# search分词器,粗粒度的拆分,搜索时用
"ik_search_filter": {
"filter": [
"asciifolding",
"kstem",
"punctuation_filter",
"lengthFilter"
],
"tokenizer": "ik_smart"
},
# pinyin分词器,细粒度划分,索引时用,索引时最大化的将文章内容分词
"ik_pinyin": {
"filter": [
"asciifolding",
"kstem",
"punctuation_filter",
"lengthFilter"
],
"tokenizer": "ik_max_word"
}
}
},
# 副本数量
"number_of_replicas": "1"
}
}
}
mappings
- 具体含义,在代码里面有解释
"mappings": {
# 遇到陌生字段,就报错
"dynamic": "strict",
"properties": {
"createdAt": {
"type": "long"
},
"description": {
# 不分词
"type": "keyword"
},
"descriptions": {
"type": "text",
# 使用settings配置的相关性分析
"similarity": "zeroTf_similarity",
# 索引时细粒度索引
"analyzer": "ik_pinyin",
# 搜索时粗粒度分词
"search_analyzer": "ik_search_filter"
},
"title": {
# 同上
"type": "text",
"similarity": "zeroTf_similarity",
"analyzer": "ik_pinyin",
"search_analyzer": "ik_search_filter"
}
}
}
term查询
- 精确查询,搜索前不会再对搜索词进行分词,所以我们的搜索词必须是文档分词集合中的一个
{
"query":{
"term":{
"title":"北京奥运"
}
}
}
match查询
- match搜索会先对搜索词进行分词,对于最基本的match搜索来说,只要搜索词的分词集合中的一个或多个存在于文档中即可,例如,当我们搜索中国杭州,搜索词会先分词为中国和杭州,只要文档中包含搜索和杭州任意一个词,都会被搜索到
{
"query": {
"match": {
"content": "中国杭州"
}
}
}
按天建索引
- 当前已有索引,只是要将它按天分,其实用reindex可以迁移数据,实现现有索引重命名,并把当前索引作为别名使用(需删除之前索引), 但是这种方式在数据量大时不合适,所以放弃。
- 创建索引模板 查看索引模板 /_template/test-template?pretty
{
"test-template" : {
"index_patterns" : [
"test-*"
],
"settings" : {
"index" : {
"number_of_shards" : "5",
"number_of_replicas" : "1"
}
},
"mappings" : {
"_doc" : {
"properties" : {
"client_appid" : {
"type" : "text",
"fields" : {
"keyword" : {
"ignore_above" : 256,
"type" : "keyword"
}
}
},
"isSuccess" : {
"type" : "text",
"fields" : {
"keyword" : {
"ignore_above" : 256,
"type" : "keyword"
}
}
},
"requestJson" : {
"type" : "text",
"fields" : {
"keyword" : {
"ignore_above" : 256,
"type" : "keyword"
}
}
}
}
}
}
}
- 将别名指向索引put /test/_alias/test-aliases, 查看别名 /*/_alias/test-aliases
- 按天rollover建索引
//目前是单个索引按天创建daily-test是test别名
DateTime dt = new DateTime();
String date = dt.toString("yyyy.MM.dd");
RestHighLevelClient client = EsClient.getClient();
RolloverRequest resizeRequest = new RolloverRequest("test", "test-" + date);
resizeRequest.addMaxIndexAgeCondition(TimeValue.parseTimeValue("1d", "max_age"));
RolloverResponse result = client.indices().rollover(resizeRequest, RequestOptions.DEFAULT);
使用RestHighLevelClient提供读的接口
- 创建es连接
public class EsClient {
private static final String PROD_HOST = "集群名称";
private static final String TEST_HOST = "****";
private static final String SCHEMA = "http";
private static final String USERNAME = "username";
private static final String AUTHORIZATION_CODE = "pwd";
private static String HOST = "***";
private static int PORT = 8080;
private static RestHighLevelClient restClient;
public static RestHighLevelClient getClient() {
return restClient;
}
static {
//从配置中心读取环境
if (Foundation.server().getEnv().isPRO()) {
HOST = "***";
PORT = 80;
}
HttpHost httpHost = new HttpHost(HOST, PORT, "http");
restClient = new RestHighLevelClient(RestClient.builder(new HttpHost[]{httpHost}).setHttpClientConfigCallback(new HttpClientConfigCallback() {
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
CredentialsProvider provider = new BasicCredentialsProvider();
AuthScope scope = new AuthScope(AuthScope.ANY_HOST, -1, AuthScope.ANY_REALM);
UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(USERNAME, AUTHORIZATION_CODE);
provider.setCredentials(scope, credentials);
return httpClientBuilder.setDefaultCredentialsProvider(provider).setMaxConnTotal(500).setMaxConnPerRoute(100).setKeepAliveStrategy(new ConnectionKeepAliveStrategy() {
public long getKeepAliveDuration(HttpResponse response, HttpContext context) {
return 60000L;
}
});
}
}));
}
}
- 提供读接口, 查询条件按照直连客户端的dsl方式查
public JSONObject adapterNewEsRequest(AdapterRequestDto adapterRequestDto) throws IOException {
RestHighLevelClient client = EsClient.getClient();
//设置请求的索引和类型
SearchRequest searchRequest = new SearchRequest(adapterRequestDto.getIndex().split(","));
searchRequest.types(StringUtils.isEmpty(adapterRequestDto.getType()) ? "_doc" : adapterRequestDto.getType());
//设置请求的DSL
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
String dslStr = JSONObject.toJSONString(adapterRequestDto.getBody().get("request_body"));
SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList());
XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(
new NamedXContentRegistry(searchModule.getNamedXContents()), DeprecationHandler.THROW_UNSUPPORTED_OPERATION, dslStr);
sourceBuilder.parseXContent(parser);
sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
searchRequest.source(sourceBuilder);
//获取es服务端响应并封装响应信息
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
ESSearchResponse esSearchResponse = new ESSearchResponse(searchResponse);
Gson gson = new Gson();
return JSONObject.parseObject(gson.toJson(esSearchResponse));
}
public class ESSearchResponse {
private Shards shards;
private ESSearchHits hits;
private long took;
private Clusters clusters;
private boolean timeOut;
public ESSearchResponse(SearchResponse searchResponse) {
if (Objects.isNull(searchResponse)) {
return;
}
took = searchResponse.getTook().getMillis();
timeOut = searchResponse.isTimedOut();
clusters = searchResponse.getClusters();
//hits解析
hits = new ESSearchHits(searchResponse.getHits());
//shards
shards = new Shards(searchResponse);
}
class Shards{
int total;
int successful;
int failed;
int skipped;
public Shards(SearchResponse searchResponse){
total = searchResponse.getTotalShards();
successful = searchResponse.getSuccessfulShards();
failed = searchResponse.getFailedShards();
skipped = searchResponse.getSkippedShards();
}
public int getTotal() {
return total;
}
public void setTotal(int total) {
this.total = total;
}
public int getSuccessful() {
return successful;
}
public void setSuccessful(int successful) {
this.successful = successful;
}
public int getFailed() {
return failed;
}
public void setFailed(int failed) {
this.failed = failed;
}
public int getSkipped() {
return skipped;
}
public void setSkipped(int skipped) {
this.skipped = skipped;
}
}
class ESSearchHits{
private List<ESSearchHit> hits = new ArrayList<>();
public long totalHits;
ESSearchHits(SearchHits searchHits){
Arrays.asList(searchHits.getHits()).stream().forEach(item -> hits.add(new ESSearchHit(item)));
totalHits = searchHits.getTotalHits();
}
public List<ESSearchHit> getHits() {
return hits;
}
public void setHits(List<ESSearchHit> hits) {
this.hits = hits;
}
public long getTotalHits() {
return totalHits;
}
public void setTotalHits(long totalHits) {
this.totalHits = totalHits;
}
}
class ESSearchHit{
private String id;
private String type;
/**
* 为了兼容前端而没用驼峰命名
*/
private Map<String, Object> _source;
ESSearchHit(SearchHit searchHit){
_source = searchHit.getSourceAsMap();
id = searchHit.getId();
type = searchHit.getType();
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public Map<String, Object> get_source() {
return _source;
}
public void set_source(Map<String, Object> _source) {
this._source = _source;
}
}
public Shards getShards() {
return shards;
}
public void setShards(Shards shards) {
this.shards = shards;
}
public ESSearchHits getHits() {
return hits;
}
public void setHits(ESSearchHits hits) {
this.hits = hits;
}
public long getTook() {
return took;
}
public void setTook(long took) {
this.took = took;
}
public Clusters getClusters() {
return clusters;
}
public void setClusters(Clusters clusters) {
this.clusters = clusters;
}
public boolean isTimeOut() {
return timeOut;
}
public void setTimeOut(boolean timeOut) {
this.timeOut = timeOut;
}
}
- 上线之后遇到 IO Reactor has stop, 大致意思就是RHLC客户端被stop了,查看线上日志时需要根据时间查看,并且除了观察与当前相关的错误日志,还要看下其他错误日志。看了日志发现es客户端执行前端传的dsl查询语句时正则解析爆栈了,在RHLC初始化时会初始化CloseableHttpAsyncClientBase,并初始化线程用作 I/O reactors ,并在后续启动。排查问题可根据报错信息排查。
[2019-12-21T14:02:32,839][DEBUG][o.e.a.s.TransportSearchAction] [node-1] All shards failed for phase: [query]
org.elasticsearch.index.query.QueryShardException: failed to create query: {
"regexp" : {
"test" : {
"value" : "t{1,9500}",
"flags_value" : 65535,
"max_determinized_states" : 10000,
"boost" : 1.0
}
}
}
at org.elasticsearch.index.query.QueryShardContext.toQuery(QueryShardContext.java:324) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.index.query.QueryShardContext.toQuery(QueryShardContext.java:307) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.parseSource(SearchService.java:769) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.createContext(SearchService.java:620) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.createAndPutContext(SearchService.java:595) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:386) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.access$100(SearchService.java:125) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService$2.onResponse(SearchService.java:358) [elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService$2.onResponse(SearchService.java:354) [elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService$4.doRun(SearchService.java:1085) [elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:41) [elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:751) [elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-6.7.0.jar:6.7.0]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_151]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_151]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_151]
Caused by: java.lang.IllegalArgumentException: input automaton is too large: 1001
//省略1000行重复的
at org.apache.lucene.util.automaton.Operations.isFinite(Operations.java:1037) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
at org.apache.lucene.util.automaton.CompiledAutomaton.<init>(CompiledAutomaton.java:217) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
at org.apache.lucene.search.AutomatonQuery.<init>(AutomatonQuery.java:104) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
at org.apache.lucene.search.AutomatonQuery.<init>(AutomatonQuery.java:81) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
at org.apache.lucene.search.RegexpQuery.<init>(RegexpQuery.java:107) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
at org.apache.lucene.search.RegexpQuery.<init>(RegexpQuery.java:92) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
at org.elasticsearch.index.query.RegexpQueryBuilder.doToQuery(RegexpQueryBuilder.java:260) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.index.query.AbstractQueryBuilder.toQuery(AbstractQueryBuilder.java:105) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.index.query.QueryShardContext.lambda$toQuery$2(QueryShardContext.java:308) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.index.query.QueryShardContext.toQuery(QueryShardContext.java:320) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.index.query.QueryShardContext.toQuery(QueryShardContext.java:307) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.parseSource(SearchService.java:769) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.createContext(SearchService.java:620) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.createAndPutContext(SearchService.java:595) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:386) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService.access$100(SearchService.java:125) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService$2.onResponse(SearchService.java:358) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService$2.onResponse(SearchService.java:354) [elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.search.SearchService$4.doRun(SearchService.java:1085) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:41) ~[elasticsearch-6.7.0.jar:6.7.0]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:751) ~[elasticsearch-6.7.0.jar:6.7.0]