关于全文搜索elasticsearch中matchQuery和termQuery的区别
2016-07-22 18:10
676 查看
如题,本文主要讲关于全文搜索elasticsearch中matchQuery和termQuery的区别,附带一点其它查询用法。
准备工作,下载相关jar包,本文所有jar包列表:
apache-log4j-extras-1.2.17.jar
commons-cli-1.3.1.jar
compiler-0.8.13.jar
compress-lzf-1.0.2.jar
elasticsearch-2.3.1.jar
guava-18.0.jar
HdrHistogram-2.1.6.jar
hppc-0.7.1.jar
jackson-core-2.6.2.jar
jackson-dataformat-cbor-2.6.2.jar
jackson-dataformat-smile-2.6.2.jar
jackson-dataformat-yaml-2.6.2.jar
jna-4.1.0.jar
joda-convert-1.2.jar
joda-time-2.8.2.jar
jsr166e-1.1.0.jar
jts-1.13.jar
log4j-1.2.17.jar
lucene-analyzers-common-5.5.0.jar
lucene-backward-codecs-5.5.0.jar
lucene-core-5.5.0.jar
lucene-grouping-5.5.0.jar
lucene-highlighter-5.5.0.jar
lucene-join-5.5.0.jar
lucene-memory-5.5.0.jar
lucene-misc-5.5.0.jar
lucene-queries-5.5.0.jar
lucene-queryparser-5.5.0.jar
lucene-sandbox-5.5.0.jar
lucene-spatial3d-5.5.0.jar
lucene-spatial-5.5.0.jar
lucene-suggest-5.5.0.jar
netty-3.10.5.Final.jar
securesm-1.0.jar
snakeyaml-1.15.jar
spatial4j-0.5.jar
t-digest-3.0.jar
获取java客户端工具类:
package com.syz.es.util;
import java.net.InetAddress;
import java.net.UnknownHostException;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
public class EsUtil {
private static Client client = null;
public static Client getTransportClient() throws UnknownHostException {
if (client == null
|| ((TransportClient) client).connectedNodes().isEmpty()) {
synchronized (EsUtil.class) {
if (client == null
|| ((TransportClient) client).connectedNodes()
.isEmpty()) {
Settings settings = Settings.settingsBuilder()
.put("cluster.name", "elasticsearch")
.build();
client = TransportClient.builder().settings(settings)
.build()
.addTransportAddresses(
new InetSocketTransportAddress(InetAddress
.getByName("localhost"), 9300));
}
}
}
return client;
}
public static void close(Client client) {
if (client != null) {
client.close();
}
}
}
一个pojo类
package com.syz.es.pojo;
public class User {
private String id;
private String id2;
private String name;
private int age;
private double salary;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getId2() {
return id2;
}
public void setId2(String id2) {
this.id2 = id2;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public double getSalary() {
return salary;
}
public void setSalary(double salary) {
this.salary = salary;
}
}
创建索引,类型基本上与User对应,其中id为默认分词,id2设置为不分词。
package com.syz.es.indices;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import com.syz.es.util.EsUtil;
public class IndicesCreate {
public static void main(String[] args) {
Client client = null;
try {
client = EsUtil.getTransportClient();
// delete
client.admin().indices().prepareDelete("product").execute()
.actionGet();
// create
client.admin().indices().prepareCreate("product").execute()
.actionGet();
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject().startObject("properties").startObject("id")
.field("type", "string").endObject().startObject("id2")
.field("type", "string").field("index", "not_analyzed")
.endObject().startObject("name").field("type", "string")
.endObject().startObject("age").field("type", "integer")
.endObject().startObject("salary").field("type", "double")
.endObject().endObject();
PutMappingRequest paramPutMappingRequest = Requests
.putMappingRequest("product").type("user").source(mapping);
client.admin().indices().putMapping(paramPutMappingRequest)
.actionGet();
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
索引数据
package com.syz.es.indices;
import java.io.IOException;
import java.util.Random;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import com.syz.es.pojo.User;
import com.syz.es.util.EsUtil;
public class IndicesIndex {
private static final Random r = new Random();
private static final String pool = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
private static final String symbol = ".*&?-+@%";
public static void main(String[] args) {
Client client = null;
try {
client = EsUtil.getTransportClient();
System.out.println("number--------");
indexNumber(client);
System.out.println("lower--------");
indexLower(client);
System.out.println("upper--------");
indexUpper(client);
System.out.println("mixed--------");
indexMixed(client);
System.out.println("symbol--------");
indexSymbol(client);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
private static void indexNumber(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = 10000 + i + "";
String id2 = id;
String name = "张三";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String json = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static void indexLower(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = getRandomLower();
String id2 = id;
String name = "李四";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String json = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static void indexUpper(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = getRandomUpper();
String id2 = id;
String name = "王二麻子";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String json = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static void indexMixed(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = getRandomMixed();
String id2 = id;
String name = "店小二";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String j
4000
son = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static void indexSymbol(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = getRandomSymbol();
String id2 = id;
String name = "屠夫";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String json = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static String getRandomLower() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 20; i++) {
int cidx = r.nextInt(26);
char c = (char) ('a' + cidx);
sb.append(c);
}
return sb.toString();
}
private static String getRandomUpper() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 20; i++) {
int cidx = r.nextInt(26);
char c = (char) ('A' + cidx);
sb.append(c);
}
return sb.toString();
}
private static String getRandomMixed() {
StringBuilder sb = new StringBuilder();
int psize = pool.length();
for (int i = 0; i < 20; i++) {
int cidx = r.nextInt(psize);
char c = pool.charAt(cidx);
sb.append(c);
}
return sb.toString();
}
private static String getRandomSymbol() {
StringBuilder sb = new StringBuilder();
int psize = pool.length();
for (int i = 0; i < 5; i++) {
int cidx = r.nextInt(psize);
char c = pool.charAt(cidx);
sb.append(c);
}
int cidx2 = r.nextInt(symbol.length());
char c2 = symbol.charAt(cidx2);
sb.append(c2);
for (int i = 0; i < 14; i++) {
int cidx = r.nextInt(psize);
char c = pool.charAt(cidx);
sb.append(c);
}
return sb.toString();
}
private static String generateJson(User user) {
String json = "";
try {
XContentBuilder contentBuilder = XContentFactory.jsonBuilder()
.startObject();
contentBuilder.field("id", user.getId());
contentBuilder.field("id2", user.getId2());
contentBuilder.field("name", user.getName());
contentBuilder.field("age", user.getAge());
contentBuilder.field("salary", user.getSalary());
json = contentBuilder.endObject().string();
} catch (IOException e) {
e.printStackTrace();
}
return json;
}
}
列举一刚才建的这5种数据:
辅助类:
package com.syz.es.search;
import java.util.Map;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
public class Printer {
public static void print(SearchHits hits) {
for (SearchHit hit : hits) {
Map<String, Object> map = hit.getSource();
System.out.println(map);
}
}
}
matchQuery:
package com.syz.es.search;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import com.syz.es.util.EsUtil;
public class Match {
public static void main(String[] args) {
matchAll();
matchAllSize();
matchQuery();
}
private static void matchAll() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.matchAllQuery();
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
private static void matchAllSize() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.matchAllQuery();
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).setSize(3).get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
/**
* 默认的standard analyzer分词规则:<br>
* 去掉大部分标点符号,并以此分割原词为多个词,把分分割后的词转为小写放入token组中。<br>
* 对于not-analyzed的词,直接把原词放入token组中。<br>
* matchQuery的机制是:先检查字段类型是否是analyzed,如果是,则先分词,再去去匹配token;如果不是,则直接去匹配token。<br>
* id=id2,默认分词,id2不分词。<br>
* 以wwIF5-vP3J4l3GJ6VN3h为例:<br>
* id是的token组是[wwif5,vp3j4l3gj6vn3h]<br>
* id2的token组是[wwIF5-vP3J4l3GJ6VN3h]<br>
* 可以预计以下结果:<br>
* 1.matchQuery("id", "字符串"),"字符串"分词后有[wwif5,vp3j4l3gj6vn3h]其中之一时,有值。<br>
* 如:wwIF5-vP3J4l3GJ6VN3h,wwif5-vp3j4l3gj6vn3h,wwIF5,wwif5,wwIF5-6666等等。<br>
* 2.matchQuery("id2", "wwIF5-vP3J4l3GJ6VN3h"),有值。<br>
* 特别说明:<br>
* 在创建索引时,如果没有指定"index":"not_analyzed"<br>
* 会使用默认的analyzer进行分词。当然你可以指定analyzer。<br>
* 在浏览器中输入:<br>
* http://localhost:9200/_analyze?pretty&analyzer=standard&text=J4Kz1%26L * bvjoQFE9gHC7H<br>
* 可以看到J4Kz1&LbvjoQFE9gHC7H被分成了:j4kz1和lbvjoqfe9ghc7h<br>
* %26是&符号,&?等符号是浏览器特殊符号,你懂的,可以用其它符号代替查看结果。<br>
*/
private static void matchQuery() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.matchQuery("id",
"wwif56,vp3j4l3gj6vn3h");
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).execute().actionGet();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
termQuery:
package com.syz.es.search;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import com.syz.es.util.EsUtil;
public class Term {
public static void main(String[] args) {
termQuery();
}
/**
* 默认的standard analyzer分词规则:<br>
* 去掉大部分标点符号,并以此分割原词为多个词,把分分割后的词转为小写放入token组中。<br>
* 对于not-analyzed的词,直接把原词放入token组中。<br>
* termQuery的机制是:直接去匹配token。<br>
* id=id2,默认分词,id2不分词。<br>
* 以wwIF5-vP3J4l3GJ6VN3h为例:<br>
* id是的token组是[wwif5,vp3j4l3gj6vn3h]<br>
* id2的token组是[wwIF5-vP3J4l3GJ6VN3h]<br>
* 可以预计以下结果:<br>
* 1.termQuery("id", "wwif5"),有值。<br>
* 2.termQuery("id", "vp3j4l3gj6vn3h"),有值。<br>
* 3.termQuery("id2", "wwIF5-vP3J4l3GJ6VN3h"),有值。<br>
*/
private static void termQuery() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.termQuery("id", "wwif5");
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).execute().actionGet();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
说明:
1.为了方便在全文文本字段中进行这些类型的查询,Elasticsearch首先对文本分析(analyzes),然后使用结果建立一个倒排索引。
2.Elasticsearch使用一种叫做倒排索引(inverted index)的结构来做快速的全文搜索。倒排索引由在文档中出现的唯一的单词列表,以及对于每个单词在文档中的位置组成。
一些其它查询:
package com.syz.es.search;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import com.syz.es.util.EsUtil;
public class MatchBoolean {
public static void main(String[] args) {
matchBoolean1();
matchBoolean2();
}
private static void matchBoolean1() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.boolQuery()
.must(QueryBuilders.matchQuery("age", 16))
.must(QueryBuilders.matchQuery("name", "李四"));
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
private static void matchBoolean2() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.boolQuery()
.should(QueryBuilders.matchQuery("age", 16))
.must(QueryBuilders.matchQuery("name", "李四"));
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
package com.syz.es.search;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import com.syz.es.util.EsUtil;
public class Prefix {
public static void main(String[] args) {
prefixQuery();
}
/**
* 以b3IAs@HhVBZPC6tVfyaK为例
*/
private static void prefixQuery() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.prefixQuery("id2",
"b3IAs@HhVBZPC6tVfyaK");
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).execute().actionGet();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
准备工作,下载相关jar包,本文所有jar包列表:
apache-log4j-extras-1.2.17.jar
commons-cli-1.3.1.jar
compiler-0.8.13.jar
compress-lzf-1.0.2.jar
elasticsearch-2.3.1.jar
guava-18.0.jar
HdrHistogram-2.1.6.jar
hppc-0.7.1.jar
jackson-core-2.6.2.jar
jackson-dataformat-cbor-2.6.2.jar
jackson-dataformat-smile-2.6.2.jar
jackson-dataformat-yaml-2.6.2.jar
jna-4.1.0.jar
joda-convert-1.2.jar
joda-time-2.8.2.jar
jsr166e-1.1.0.jar
jts-1.13.jar
log4j-1.2.17.jar
lucene-analyzers-common-5.5.0.jar
lucene-backward-codecs-5.5.0.jar
lucene-core-5.5.0.jar
lucene-grouping-5.5.0.jar
lucene-highlighter-5.5.0.jar
lucene-join-5.5.0.jar
lucene-memory-5.5.0.jar
lucene-misc-5.5.0.jar
lucene-queries-5.5.0.jar
lucene-queryparser-5.5.0.jar
lucene-sandbox-5.5.0.jar
lucene-spatial3d-5.5.0.jar
lucene-spatial-5.5.0.jar
lucene-suggest-5.5.0.jar
netty-3.10.5.Final.jar
securesm-1.0.jar
snakeyaml-1.15.jar
spatial4j-0.5.jar
t-digest-3.0.jar
获取java客户端工具类:
package com.syz.es.util;
import java.net.InetAddress;
import java.net.UnknownHostException;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
public class EsUtil {
private static Client client = null;
public static Client getTransportClient() throws UnknownHostException {
if (client == null
|| ((TransportClient) client).connectedNodes().isEmpty()) {
synchronized (EsUtil.class) {
if (client == null
|| ((TransportClient) client).connectedNodes()
.isEmpty()) {
Settings settings = Settings.settingsBuilder()
.put("cluster.name", "elasticsearch")
.build();
client = TransportClient.builder().settings(settings)
.build()
.addTransportAddresses(
new InetSocketTransportAddress(InetAddress
.getByName("localhost"), 9300));
}
}
}
return client;
}
public static void close(Client client) {
if (client != null) {
client.close();
}
}
}
一个pojo类
package com.syz.es.pojo;
public class User {
private String id;
private String id2;
private String name;
private int age;
private double salary;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getId2() {
return id2;
}
public void setId2(String id2) {
this.id2 = id2;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public double getSalary() {
return salary;
}
public void setSalary(double salary) {
this.salary = salary;
}
}
创建索引,类型基本上与User对应,其中id为默认分词,id2设置为不分词。
package com.syz.es.indices;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import com.syz.es.util.EsUtil;
public class IndicesCreate {
public static void main(String[] args) {
Client client = null;
try {
client = EsUtil.getTransportClient();
// delete
client.admin().indices().prepareDelete("product").execute()
.actionGet();
// create
client.admin().indices().prepareCreate("product").execute()
.actionGet();
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject().startObject("properties").startObject("id")
.field("type", "string").endObject().startObject("id2")
.field("type", "string").field("index", "not_analyzed")
.endObject().startObject("name").field("type", "string")
.endObject().startObject("age").field("type", "integer")
.endObject().startObject("salary").field("type", "double")
.endObject().endObject();
PutMappingRequest paramPutMappingRequest = Requests
.putMappingRequest("product").type("user").source(mapping);
client.admin().indices().putMapping(paramPutMappingRequest)
.actionGet();
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
索引数据
package com.syz.es.indices;
import java.io.IOException;
import java.util.Random;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import com.syz.es.pojo.User;
import com.syz.es.util.EsUtil;
public class IndicesIndex {
private static final Random r = new Random();
private static final String pool = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
private static final String symbol = ".*&?-+@%";
public static void main(String[] args) {
Client client = null;
try {
client = EsUtil.getTransportClient();
System.out.println("number--------");
indexNumber(client);
System.out.println("lower--------");
indexLower(client);
System.out.println("upper--------");
indexUpper(client);
System.out.println("mixed--------");
indexMixed(client);
System.out.println("symbol--------");
indexSymbol(client);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
private static void indexNumber(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = 10000 + i + "";
String id2 = id;
String name = "张三";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String json = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static void indexLower(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = getRandomLower();
String id2 = id;
String name = "李四";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String json = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static void indexUpper(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = getRandomUpper();
String id2 = id;
String name = "王二麻子";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String json = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static void indexMixed(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = getRandomMixed();
String id2 = id;
String name = "店小二";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String j
4000
son = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static void indexSymbol(Client client) {
for (int i = 0; i < 10; i++) {
User u = new User();
String id = getRandomSymbol();
String id2 = id;
String name = "屠夫";
int age = r.nextInt(100);
double salary = r.nextDouble() * 10000;
u.setId(id);
u.setId2(id2);
u.setName(name);
u.setAge(age);
u.setSalary(salary);
String json = generateJson(u);
System.out.println(i + "==data index begin:" + json);
IndexResponse response = client.prepareIndex("product", "user")
.setSource(json).get();
System.out.println(i + "==data index end:" + response.getId());
}
}
private static String getRandomLower() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 20; i++) {
int cidx = r.nextInt(26);
char c = (char) ('a' + cidx);
sb.append(c);
}
return sb.toString();
}
private static String getRandomUpper() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 20; i++) {
int cidx = r.nextInt(26);
char c = (char) ('A' + cidx);
sb.append(c);
}
return sb.toString();
}
private static String getRandomMixed() {
StringBuilder sb = new StringBuilder();
int psize = pool.length();
for (int i = 0; i < 20; i++) {
int cidx = r.nextInt(psize);
char c = pool.charAt(cidx);
sb.append(c);
}
return sb.toString();
}
private static String getRandomSymbol() {
StringBuilder sb = new StringBuilder();
int psize = pool.length();
for (int i = 0; i < 5; i++) {
int cidx = r.nextInt(psize);
char c = pool.charAt(cidx);
sb.append(c);
}
int cidx2 = r.nextInt(symbol.length());
char c2 = symbol.charAt(cidx2);
sb.append(c2);
for (int i = 0; i < 14; i++) {
int cidx = r.nextInt(psize);
char c = pool.charAt(cidx);
sb.append(c);
}
return sb.toString();
}
private static String generateJson(User user) {
String json = "";
try {
XContentBuilder contentBuilder = XContentFactory.jsonBuilder()
.startObject();
contentBuilder.field("id", user.getId());
contentBuilder.field("id2", user.getId2());
contentBuilder.field("name", user.getName());
contentBuilder.field("age", user.getAge());
contentBuilder.field("salary", user.getSalary());
json = contentBuilder.endObject().string();
} catch (IOException e) {
e.printStackTrace();
}
return json;
}
}
列举一刚才建的这5种数据:
辅助类:
package com.syz.es.search;
import java.util.Map;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
public class Printer {
public static void print(SearchHits hits) {
for (SearchHit hit : hits) {
Map<String, Object> map = hit.getSource();
System.out.println(map);
}
}
}
matchQuery:
package com.syz.es.search;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import com.syz.es.util.EsUtil;
public class Match {
public static void main(String[] args) {
matchAll();
matchAllSize();
matchQuery();
}
private static void matchAll() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.matchAllQuery();
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
private static void matchAllSize() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.matchAllQuery();
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).setSize(3).get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
/**
* 默认的standard analyzer分词规则:<br>
* 去掉大部分标点符号,并以此分割原词为多个词,把分分割后的词转为小写放入token组中。<br>
* 对于not-analyzed的词,直接把原词放入token组中。<br>
* matchQuery的机制是:先检查字段类型是否是analyzed,如果是,则先分词,再去去匹配token;如果不是,则直接去匹配token。<br>
* id=id2,默认分词,id2不分词。<br>
* 以wwIF5-vP3J4l3GJ6VN3h为例:<br>
* id是的token组是[wwif5,vp3j4l3gj6vn3h]<br>
* id2的token组是[wwIF5-vP3J4l3GJ6VN3h]<br>
* 可以预计以下结果:<br>
* 1.matchQuery("id", "字符串"),"字符串"分词后有[wwif5,vp3j4l3gj6vn3h]其中之一时,有值。<br>
* 如:wwIF5-vP3J4l3GJ6VN3h,wwif5-vp3j4l3gj6vn3h,wwIF5,wwif5,wwIF5-6666等等。<br>
* 2.matchQuery("id2", "wwIF5-vP3J4l3GJ6VN3h"),有值。<br>
* 特别说明:<br>
* 在创建索引时,如果没有指定"index":"not_analyzed"<br>
* 会使用默认的analyzer进行分词。当然你可以指定analyzer。<br>
* 在浏览器中输入:<br>
* http://localhost:9200/_analyze?pretty&analyzer=standard&text=J4Kz1%26L * bvjoQFE9gHC7H<br>
* 可以看到J4Kz1&LbvjoQFE9gHC7H被分成了:j4kz1和lbvjoqfe9ghc7h<br>
* %26是&符号,&?等符号是浏览器特殊符号,你懂的,可以用其它符号代替查看结果。<br>
*/
private static void matchQuery() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.matchQuery("id",
"wwif56,vp3j4l3gj6vn3h");
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).execute().actionGet();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
termQuery:
package com.syz.es.search;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import com.syz.es.util.EsUtil;
public class Term {
public static void main(String[] args) {
termQuery();
}
/**
* 默认的standard analyzer分词规则:<br>
* 去掉大部分标点符号,并以此分割原词为多个词,把分分割后的词转为小写放入token组中。<br>
* 对于not-analyzed的词,直接把原词放入token组中。<br>
* termQuery的机制是:直接去匹配token。<br>
* id=id2,默认分词,id2不分词。<br>
* 以wwIF5-vP3J4l3GJ6VN3h为例:<br>
* id是的token组是[wwif5,vp3j4l3gj6vn3h]<br>
* id2的token组是[wwIF5-vP3J4l3GJ6VN3h]<br>
* 可以预计以下结果:<br>
* 1.termQuery("id", "wwif5"),有值。<br>
* 2.termQuery("id", "vp3j4l3gj6vn3h"),有值。<br>
* 3.termQuery("id2", "wwIF5-vP3J4l3GJ6VN3h"),有值。<br>
*/
private static void termQuery() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.termQuery("id", "wwif5");
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).execute().actionGet();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
说明:
1.为了方便在全文文本字段中进行这些类型的查询,Elasticsearch首先对文本分析(analyzes),然后使用结果建立一个倒排索引。
2.Elasticsearch使用一种叫做倒排索引(inverted index)的结构来做快速的全文搜索。倒排索引由在文档中出现的唯一的单词列表,以及对于每个单词在文档中的位置组成。
一些其它查询:
package com.syz.es.search;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import com.syz.es.util.EsUtil;
public class MatchBoolean {
public static void main(String[] args) {
matchBoolean1();
matchBoolean2();
}
private static void matchBoolean1() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.boolQuery()
.must(QueryBuilders.matchQuery("age", 16))
.must(QueryBuilders.matchQuery("name", "李四"));
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
private static void matchBoolean2() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.boolQuery()
.should(QueryBuilders.matchQuery("age", 16))
.must(QueryBuilders.matchQuery("name", "李四"));
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).get();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
package com.syz.es.search; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHits; import com.syz.es.util.EsUtil; public class Terms { public static void main(String[] args) { termsQuery(); } private static void termsQuery() { Client client = null; try { client = EsUtil.getTransportClient(); // termsQuery的第二个参数可以是数组,也可以是集合 QueryBuilder query = QueryBuilders.termsQuery("age", new int[] { 11, 16 }); SearchResponse response = client.prepareSearch("product") .setTypes("user").setQuery(query).execute().actionGet(); SearchHits hits = response.getHits(); long total = hits.getTotalHits(); System.out.println("total:" + total); int len = hits.getHits().length; System.out.println("len:" + len); Printer.print(hits); } catch (Exception e) { e.printStackTrace(); } finally { EsUtil.close(client); } } }
package com.syz.es.search;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHits;
import com.syz.es.util.EsUtil;
public class Prefix {
public static void main(String[] args) {
prefixQuery();
}
/**
* 以b3IAs@HhVBZPC6tVfyaK为例
*/
private static void prefixQuery() {
Client client = null;
try {
client = EsUtil.getTransportClient();
QueryBuilder query = QueryBuilders.prefixQuery("id2",
"b3IAs@HhVBZPC6tVfyaK");
SearchResponse response = client.prepareSearch("product")
.setTypes("user").setQuery(query).execute().actionGet();
SearchHits hits = response.getHits();
long total = hits.getTotalHits();
System.out.println("total:" + total);
int len = hits.getHits().length;
System.out.println("len:" + len);
Printer.print(hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
EsUtil.close(client);
}
}
}
package com.syz.es.search; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHits; import com.syz.es.util.EsUtil; public class Range { public static void main(String[] args) { rangeQuery(); } private static void rangeQuery() { Client client = null; try { client = EsUtil.getTransportClient(); QueryBuilder query = QueryBuilders.rangeQuery("age").gt(10).lt(20); SearchResponse response = client.prepareSearch("product") .setTypes("user").setQuery(query).execute().actionGet(); SearchHits hits = response.getHits(); long total = hits.getTotalHits(); System.out.println("total:" + total); int len = hits.getHits().length; System.out.println("len:" + len); Printer.print(hits); } catch (Exception e) { e.printStackTrace(); } finally { EsUtil.close(client); } } }
package com.syz.es.search; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHits; import com. c3b7 syz.es.util.EsUtil; public class Regexp { public static void main(String[] args) { regexpQuery(); } private static void regexpQuery() { Client client = null; try { client = EsUtil.getTransportClient(); QueryBuilder query = QueryBuilders.regexpQuery("id2", "[a-z,A-Z]{5}\\+.*"); SearchResponse response = client.prepareSearch("product") .setTypes("user").setQuery(query).setFrom(0).setSize(50) .execute().actionGet(); SearchHits hits = response.getHits(); long total = hits.getTotalHits(); System.out.println("total:" + total); int len = hits.getHits().length; System.out.println("len:" + len); Printer.print(hits); } catch (Exception e) { e.printStackTrace(); } finally { EsUtil.close(client); } } }
package com.syz.es.search; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHits; import com.syz.es.util.EsUtil; public class Wildcard { public static void main(String[] args) { wildcardQuery1(); wildcardQuery2(); } private static void wildcardQuery1() { Client client = null; try { client = EsUtil.getTransportClient(); QueryBuilder query = QueryBuilders.wildcardQuery("id", "1*"); SearchResponse response = client.prepareSearch("product") .setTypes("user").setQuery(query).execute().actionGet(); SearchHits hits = response.getHits(); long total = hits.getTotalHits(); System.out.println("total:" + total); int len = hits.getHits().length; System.out.println("len:" + len); Printer.print(hits); } catch (Exception e) { e.printStackTrace(); } finally { EsUtil.close(client); } } private static void wildcardQuery2() { Client client = null; try { client = EsUtil.getTransportClient(); QueryBuilder query = QueryBuilders.wildcardQuery("id", "1000?"); SearchResponse response = client.prepareSearch("product") .setTypes("user").setQuery(query).execute().actionGet(); SearchHits hits = response.getHits(); long total = hits.getTotalHits(); System.out.println("total:" + total); int len = hits.getHits().length; System.out.println("len:" + len); Printer.print(hits); } catch (Exception e) { e.printStackTrace(); } finally { EsUtil.close(client); } } }
package com.syz.es.search; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.metrics.avg.InternalAvg; import org.elasticsearch.search.aggregations.metrics.sum.InternalSum; import com.syz.es.util.EsUtil; public class Avg { public static void main(String[] args) { avgQuery(); } private static void avgQuery() { Client client = null; try { client = EsUtil.getTransportClient(); QueryBuilder query = QueryBuilders.matchQuery("name", "张三"); SearchResponse response = client.prepareSearch("product") .setTypes("user").setQuery(query) .addAggregation( AggregationBuilders.avg("age_avg").field("age")) .addAggregation( AggregationBuilders.sum("salary_sum").field( "salary")) .execute().actionGet(); SearchHits hits = response.getHits(); InternalAvg agg = response.getAggregations().get("age_avg"); System.out.println(agg.getName() + "\t" + agg.getValue()); InternalSum agg2 = response.getAggregations().get("salary_sum"); System.out.println(agg2.getName() + "\t" + agg2.getValue()); long total = hits.getTotalHits(); System.out.println("total:" + total); int len = hits.getHits().length; System.out.println("len:" + len); Printer.print(hits); } catch (Exception e) { e.printStackTrace(); } finally { EsUtil.close(client); } } }
相关文章推荐
- java对世界各个时区(TimeZone)的通用转换处理方法(转载)
- java-注解annotation
- java-模拟tomcat服务器
- java-用HttpURLConnection发送Http请求.
- java-WEB中的监听器Lisener
- Android IPC进程间通讯机制
- Android Native 绘图方法
- Android java 与 javascript互访(相互调用)的方法例子
- 介绍一款信息管理系统的开源框架---jeecg
- 聚类算法之kmeans算法java版本
- java实现 PageRank算法
- 在Kubernetes上搭建EFK(Fluentd+Elasticsearch+Kibana)
- PropertyChangeListener简单理解
- c++11 + SDL2 + ffmpeg +OpenAL + java = Android播放器
- 插入排序
- 冒泡排序
- 堆排序
- 快速排序