ES工作总结

2019-12-21 本文已影响0人后来丶_a24d

常用命令

1. 查看所有索引 http://localhost:9200/_cat/indices?v
2. 查看索引模板 http://localhost:9200/_template/active-logs?pretty
3. 查看索引配置具体信息 http://localhost:9200/active-logs-2019.12.09?pretty
4. 查看索引文档具体信息 http://localhost:9200/active-logs-2019.12.10/log/_search?pretty
5. 多索引信息查看 http://localhost:9200/active-logs-2019.12.09,active-logs-2019.12.10/log/_search?pretty
6. 别名查看 http://localhost:9200/*/_alias/active-logs?pretty
7. 查看集群的节点 /_cat/nodes?v
8. 查看mapping /test/_mapping/_doc
9. 查看setting /test/_settings
10. 将别名指向索引put /test/_alias/daily-test

in查询

在使用filter context(must not也是不算分)时，ElasticSearch 不需要做相关性的计算，Filter的搜索结果可以被缓存
域名/索引名称/_search?pretty

{
  "query": {
    "bool": {
      "must": [
        {"match": { "某个字段": 3658229 }}
      ],
      "should": [
        {
          "match": {
            "某个字段": "4"
          }
        },
        {
          "match": {
            "某个字段": "6"
          }
        }
      ],
      "minimum_should_match":1
    }
  }
}
# 带filter的
{
    "query": {
        "bool": {
            "must": {"match": {}},
            "filter": {"range": {}}
        }
    }
}

创建索引

域名/索引名称/_search?pretty

settings

具体含义，在代码里面有解释

{
    "settings": {
        "index": {
            # 分片数量
            "number_of_shards": "5",
            # 相似度相关配置
            "similarity": {
                "zeroTf_similarity": {
                    # 使用BM25，k1，k2，b为调节因子
                    # k表示词频对TF score的影响, 避免TF过大造成的影响
                    # k1表示L对TF socore评分的影响，L表示文档长度与平均值, L越小TF score越大，越短的文章，相关性确定的越快
                    "type": "BM25",
                    "b": "0",
                    "k1": "0"
                }
            },
            # 分词相关配置
            "analysis": {
                # 过滤器
                "filter": {
                    // 匹配过滤器
                    "_pattern": {
                        "type": "pattern_capture",
                        "preserve_original": "true",
                        "patterns": [
                            "([0-9])",
                            "([a-z])"
                        ]
                    },
                    # 标点过滤器
                    "punctuation_filter": {
                        "pattern": "[\\pP\\s]",
                        "type": "pattern_replace",
                        "replacement": ""
                    },
                    # 长度过滤器
                    "lengthFilter": {
                        "type": "length",
                        "min": "1"
                    }
                },
                # 分词器
                "analyzer": {
                    # search分词器，粗粒度的拆分，搜索时用
                    "ik_search_filter": {
                        "filter": [
                            "asciifolding",
                            "kstem",
                            "punctuation_filter",
                            "lengthFilter"
                        ],
                        "tokenizer": "ik_smart"
                    },
                    # pinyin分词器，细粒度划分，索引时用，索引时最大化的将文章内容分词
                    "ik_pinyin": {
                        "filter": [
                            "asciifolding",
                            "kstem",
                            "punctuation_filter",
                            "lengthFilter"
                        ],
                        "tokenizer": "ik_max_word"
                    }
                }
            },
            # 副本数量
            "number_of_replicas": "1"
        }
    }
}

mappings

具体含义，在代码里面有解释

"mappings": {
    # 遇到陌生字段，就报错
    "dynamic": "strict",
    "properties": {
        "createdAt": {
            "type": "long"
        },
        "description": {
            # 不分词
            "type": "keyword"
        },
        "descriptions": {
            "type": "text",
            # 使用settings配置的相关性分析
            "similarity": "zeroTf_similarity",
            # 索引时细粒度索引
            "analyzer": "ik_pinyin",
            # 搜索时粗粒度分词
            "search_analyzer": "ik_search_filter"
        },
        "title": {
            # 同上
            "type": "text",
            "similarity": "zeroTf_similarity",
            "analyzer": "ik_pinyin",
            "search_analyzer": "ik_search_filter"
        }
    }
}

term查询

精确查询，搜索前不会再对搜索词进行分词，所以我们的搜索词必须是文档分词集合中的一个

{
  "query":{
    "term":{
        "title":"北京奥运"
    }
  }
}

match查询

match搜索会先对搜索词进行分词，对于最基本的match搜索来说，只要搜索词的分词集合中的一个或多个存在于文档中即可，例如，当我们搜索中国杭州，搜索词会先分词为中国和杭州,只要文档中包含搜索和杭州任意一个词，都会被搜索到

{
    "query": {
        "match": {
            "content": "中国杭州"
        }
    }
}

按天建索引

当前已有索引，只是要将它按天分，其实用reindex可以迁移数据，实现现有索引重命名，并把当前索引作为别名使用(需删除之前索引), 但是这种方式在数据量大时不合适，所以放弃。
创建索引模板查看索引模板 /_template/test-template?pretty

{
  "test-template" : {
    "index_patterns" : [
      "test-*"
    ],
    "settings" : {
      "index" : {
        "number_of_shards" : "5",
        "number_of_replicas" : "1"
      }
    },
    "mappings" : {
      "_doc" : {
        "properties" : {
          "client_appid" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "ignore_above" : 256,
                "type" : "keyword"
              }
            }
          },

          "isSuccess" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "ignore_above" : 256,
                "type" : "keyword"
              }
            }
          },
          "requestJson" : {
            "type" : "text",
            "fields" : {
              "keyword" : {
                "ignore_above" : 256,
                "type" : "keyword"
              }
            }
          }
        }
      }
    }
}

将别名指向索引put /test/_alias/test-aliases, 查看别名 /*/_alias/test-aliases
按天rollover建索引

 //目前是单个索引按天创建daily-test是test别名
        DateTime dt = new DateTime();
        String date = dt.toString("yyyy.MM.dd");
        RestHighLevelClient client = EsClient.getClient();
        RolloverRequest resizeRequest = new RolloverRequest("test", "test-" + date);
        resizeRequest.addMaxIndexAgeCondition(TimeValue.parseTimeValue("1d", "max_age"));
        RolloverResponse result = client.indices().rollover(resizeRequest, RequestOptions.DEFAULT);

使用RestHighLevelClient提供读的接口

创建es连接

public class EsClient {
    private static final String PROD_HOST = "集群名称";
    private static final String TEST_HOST = "****";
    private static final String SCHEMA = "http";
    private static final String USERNAME = "username";
    private static final String AUTHORIZATION_CODE = "pwd";
    private static String HOST = "***";
    private static int PORT = 8080;
    private static RestHighLevelClient restClient;

 
    public static RestHighLevelClient getClient() {
        return restClient;
    }

    static {
        //从配置中心读取环境
        if (Foundation.server().getEnv().isPRO()) {
            HOST = "***";
            PORT = 80;
        }

        HttpHost httpHost = new HttpHost(HOST, PORT, "http");
        restClient = new RestHighLevelClient(RestClient.builder(new HttpHost[]{httpHost}).setHttpClientConfigCallback(new HttpClientConfigCallback() {
            public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
                CredentialsProvider provider = new BasicCredentialsProvider();
                AuthScope scope = new AuthScope(AuthScope.ANY_HOST, -1, AuthScope.ANY_REALM);
                UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(USERNAME, AUTHORIZATION_CODE);
                provider.setCredentials(scope, credentials);
                return httpClientBuilder.setDefaultCredentialsProvider(provider).setMaxConnTotal(500).setMaxConnPerRoute(100).setKeepAliveStrategy(new ConnectionKeepAliveStrategy() {
                    public long getKeepAliveDuration(HttpResponse response, HttpContext context) {
                        return 60000L;
                    }
                });
            }
        }));
    }
}

提供读接口, 查询条件按照直连客户端的dsl方式查

 public JSONObject adapterNewEsRequest(AdapterRequestDto adapterRequestDto) throws IOException {
        RestHighLevelClient client = EsClient.getClient();

        //设置请求的索引和类型
        SearchRequest searchRequest = new SearchRequest(adapterRequestDto.getIndex().split(","));
        searchRequest.types(StringUtils.isEmpty(adapterRequestDto.getType()) ? "_doc" : adapterRequestDto.getType());

        //设置请求的DSL
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        String dslStr = JSONObject.toJSONString(adapterRequestDto.getBody().get("request_body"));
        SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList());
        XContentParser parser = XContentFactory.xContent(XContentType.JSON).createParser(
                new NamedXContentRegistry(searchModule.getNamedXContents()), DeprecationHandler.THROW_UNSUPPORTED_OPERATION, dslStr);
        sourceBuilder.parseXContent(parser);
        sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
        searchRequest.source(sourceBuilder);

        //获取es服务端响应并封装响应信息
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        ESSearchResponse esSearchResponse = new ESSearchResponse(searchResponse);
        
        Gson gson = new Gson();
        return JSONObject.parseObject(gson.toJson(esSearchResponse));
    }

public class ESSearchResponse {
    private Shards shards;
    private ESSearchHits hits;
    private long took;
    private Clusters clusters;
    private boolean timeOut;

    public ESSearchResponse(SearchResponse searchResponse) {
        if (Objects.isNull(searchResponse)) {
            return;
        }
        took = searchResponse.getTook().getMillis();
        timeOut = searchResponse.isTimedOut();
        clusters = searchResponse.getClusters();
        //hits解析
        hits = new ESSearchHits(searchResponse.getHits());
        //shards
        shards = new Shards(searchResponse);
    }

    class Shards{
        int total;
        int successful;
        int failed;
        int skipped;

        public Shards(SearchResponse searchResponse){
            total = searchResponse.getTotalShards();
            successful = searchResponse.getSuccessfulShards();
            failed = searchResponse.getFailedShards();
            skipped = searchResponse.getSkippedShards();
        }

        public int getTotal() {
            return total;
        }

        public void setTotal(int total) {
            this.total = total;
        }

        public int getSuccessful() {
            return successful;
        }

        public void setSuccessful(int successful) {
            this.successful = successful;
        }

        public int getFailed() {
            return failed;
        }

        public void setFailed(int failed) {
            this.failed = failed;
        }

        public int getSkipped() {
            return skipped;
        }

        public void setSkipped(int skipped) {
            this.skipped = skipped;
        }
    }

    class ESSearchHits{
        private List<ESSearchHit> hits = new ArrayList<>();
        public long totalHits;

        ESSearchHits(SearchHits searchHits){
            Arrays.asList(searchHits.getHits()).stream().forEach(item -> hits.add(new ESSearchHit(item)));
            totalHits = searchHits.getTotalHits();
        }

        public List<ESSearchHit> getHits() {
            return hits;
        }

        public void setHits(List<ESSearchHit> hits) {
            this.hits = hits;
        }

        public long getTotalHits() {
            return totalHits;
        }

        public void setTotalHits(long totalHits) {
            this.totalHits = totalHits;
        }
    }

    class ESSearchHit{
        private String id;
        private String type;
        /**
         * 为了兼容前端而没用驼峰命名
         */
        private Map<String, Object> _source;

        ESSearchHit(SearchHit searchHit){
            _source = searchHit.getSourceAsMap();
            id = searchHit.getId();
            type = searchHit.getType();
        }

        public String getId() {
            return id;
        }

        public void setId(String id) {
            this.id = id;
        }

        public String getType() {
            return type;
        }

        public void setType(String type) {
            this.type = type;
        }

        public Map<String, Object> get_source() {
            return _source;
        }

        public void set_source(Map<String, Object> _source) {
            this._source = _source;
        }
    }

    public Shards getShards() {
        return shards;
    }

    public void setShards(Shards shards) {
        this.shards = shards;
    }

    public ESSearchHits getHits() {
        return hits;
    }

    public void setHits(ESSearchHits hits) {
        this.hits = hits;
    }

    public long getTook() {
        return took;
    }

    public void setTook(long took) {
        this.took = took;
    }

    public Clusters getClusters() {
        return clusters;
    }

    public void setClusters(Clusters clusters) {
        this.clusters = clusters;
    }

    public boolean isTimeOut() {
        return timeOut;
    }

    public void setTimeOut(boolean timeOut) {
        this.timeOut = timeOut;
    }
}

上线之后遇到 IO Reactor has stop, 大致意思就是RHLC客户端被stop了，查看线上日志时需要根据时间查看，并且除了观察与当前相关的错误日志，还要看下其他错误日志。看了日志发现es客户端执行前端传的dsl查询语句时正则解析爆栈了，在RHLC初始化时会初始化CloseableHttpAsyncClientBase，并初始化线程用作 I/O reactors ，并在后续启动。排查问题可根据报错信息排查。

[2019-12-21T14:02:32,839][DEBUG][o.e.a.s.TransportSearchAction] [node-1] All shards failed for phase: [query]
org.elasticsearch.index.query.QueryShardException: failed to create query: {
  "regexp" : {
    "test" : {
      "value" : "t{1,9500}",
      "flags_value" : 65535,
      "max_determinized_states" : 10000,
      "boost" : 1.0
    }
  }
}
    at org.elasticsearch.index.query.QueryShardContext.toQuery(QueryShardContext.java:324) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.index.query.QueryShardContext.toQuery(QueryShardContext.java:307) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.parseSource(SearchService.java:769) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.createContext(SearchService.java:620) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.createAndPutContext(SearchService.java:595) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:386) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.access$100(SearchService.java:125) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService$2.onResponse(SearchService.java:358) [elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService$2.onResponse(SearchService.java:354) [elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService$4.doRun(SearchService.java:1085) [elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:41) [elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:751) [elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-6.7.0.jar:6.7.0]
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_151]
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_151]
    at java.lang.Thread.run(Thread.java:748) [?:1.8.0_151]
Caused by: java.lang.IllegalArgumentException: input automaton is too large: 1001
    //省略1000行重复的
    at org.apache.lucene.util.automaton.Operations.isFinite(Operations.java:1037) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
    at org.apache.lucene.util.automaton.CompiledAutomaton.<init>(CompiledAutomaton.java:217) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
    at org.apache.lucene.search.AutomatonQuery.<init>(AutomatonQuery.java:104) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
    at org.apache.lucene.search.AutomatonQuery.<init>(AutomatonQuery.java:81) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
    at org.apache.lucene.search.RegexpQuery.<init>(RegexpQuery.java:107) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
    at org.apache.lucene.search.RegexpQuery.<init>(RegexpQuery.java:92) ~[lucene-core-7.7.0.jar:7.7.0 8c831daf4eb41153c25ddb152501ab5bae3ea3d5 - jimczi - 2019-02-04 23:16:28]
    at org.elasticsearch.index.query.RegexpQueryBuilder.doToQuery(RegexpQueryBuilder.java:260) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.index.query.AbstractQueryBuilder.toQuery(AbstractQueryBuilder.java:105) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.index.query.QueryShardContext.lambda$toQuery$2(QueryShardContext.java:308) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.index.query.QueryShardContext.toQuery(QueryShardContext.java:320) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.index.query.QueryShardContext.toQuery(QueryShardContext.java:307) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.parseSource(SearchService.java:769) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.createContext(SearchService.java:620) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.createAndPutContext(SearchService.java:595) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.executeQueryPhase(SearchService.java:386) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService.access$100(SearchService.java:125) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService$2.onResponse(SearchService.java:358) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService$2.onResponse(SearchService.java:354) [elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.search.SearchService$4.doRun(SearchService.java:1085) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.common.util.concurrent.TimedRunnable.doRun(TimedRunnable.java:41) ~[elasticsearch-6.7.0.jar:6.7.0]
    at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:751) ~[elasticsearch-6.7.0.jar:6.7.0]

其中，参考了模糊查询导致Elasticsearch服务宕机, 有限状态机与 Lucene 的那些事（开篇）

参考

elasticsearch RestHighLevelClient Client 配置

ES工作总结

常用命令

in查询

创建索引

settings

mappings

term查询

match查询

按天建索引

使用RestHighLevelClient提供读的接口

其中，参考了模糊查询导致Elasticsearch服务宕机, 有限状态机与 Lucene 的那些事（开篇）

参考

猜你喜欢

热点阅读

ES工作总结

常用命令

in查询

创建索引

settings

mappings

term查询

match查询

按天建索引

使用RestHighLevelClient提供读的接口

其中，参考了 模糊查询导致Elasticsearch服务宕机, 有限状态机与 Lucene 的那些事（开篇）

参考

猜你喜欢

热点阅读

其中，参考了模糊查询导致Elasticsearch服务宕机, 有限状态机与 Lucene 的那些事（开篇）