您的位置:首页 > 编程语言 > Java开发

java api访问elasticsearch下ik-pinyin分词基本使用

2017-08-10 22:44 337 查看
笔者在使用java进行访问es创建ik分词的时候,当时找api比较费时间,现在将近期使用的api方法整理如下,若有更简单的欢迎拍砖。本实例代码仅限功能测试,不适用于作为生产环境,因为没有进行严格性能测试。本代码使用es版本2.3.5,ik版本1.9.5,ik-pinyin版本1.8.5(注意修改zip包中es版本有2.4.5到2.3.5)。

1. java获取es的连接

public static Client getElasticSearchClient() throws IOException {
// 设置elasticsearch的集群名称(多台时设置有意义)
Settings settings = Settings.settingsBuilder().put("cluster.name", "elasticsearch").build();
// 建立连接地址
InetSocketTransportAddress address = new InetSocketTransportAddress(InetAddress.getByName("127.0.0.1"), 9300);
// 获取es的客户端
return TransportClient.builder().settings(settings).build().addTransportAddress(address);
}


2.java创建索引
public static void creatIndexAndType() throws IOException {
Client client = getElasticSearchClient();
// 创建index的json
XContentBuilder source = XContentFactory.jsonBuilder().startObject().field("index.number_of_shards", 5).field("index.number_of_replicas", 1)
.startObject("analysis").startObject("analyzer").startObject("default").field("tokenizer", "ik_max_word").endObject()
.startObject("pinyin_analyzer").field("tokenizer", "my_pinyin").field("filter", "word_delimiter").endObject().endObject()
.startObject("tokenizer").startObject("my_pinyin").field("type", "pinyin").field("keep_separate_first_letter", "false")
.field("keep_full_pinyin", "true").field("keep_original", "true").field("limit_first_letter_length", "16").field("lowercase", "true")
.field("keep_full_pinyin", "true").endObject().endObject().endObject().endObject();
// 创建index
CreateIndexResponse indexResponse = client.admin().indices().prepareCreate(INDEX_NAME).setSource(source).execute().actionGet();
if (indexResponse.isAcknowledged()) {
System.out.println("创建成功");
} else {
System.out.println("创建失败");
}
// 创建type的json
XContentBuilder mappings = XContentFactory.jsonBuilder().startObject().startObject("_all").field("analyzer", "ik_max_word").endObject()
.startObject("properties").startObject("name").field("type", "multi_field").startObject("fields").startObject("name")
.field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").field("analyzer", "pinyin_analyzer")
.field("boost", "10").endObject().startObject("primitive").field("type", "string").field("store", "yes")
.field("analyzer", "ik_max_word").endObject().endObject().endObject().endObject().endObject();
PutMappingRequest mapping = Requests.putMappingRequest(INDEX_NAME).type(TYPE_NAME).source(mappings);
PutMappingResponse putResponse = client.admin().indices().putMapping(mapping).actionGet();
if (putResponse.isAcknowledged()) {
System.out.println("创建成功");
} else {
System.out.println("创建失败");
}
}
3.批量插入数据
public static void insertIndexRecord() throws IOException {
Client client = getElasticSearchClient();
// 获取批量操作
BulkRequestBuilder bulkRequest = client.prepareBulk();
bulkRequest.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, "1").setSource(XContentFactory.jsonBuilder().startObject().field("name", "张三")
.endObject()));
bulkRequest.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, "2").setSource(XContentFactory.jsonBuilder().startObject().field("name", "李四")
.endObject()));
bulkRequest.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, "3").setSource(XContentFactory.jsonBuilder().startObject().field("name", "王五")
.endObject()));
bulkRequest.add(client.prepareIndex(INDEX_NAME, TYPE_NAME, "4").setSource(XContentFactory.jsonBuilder().startObject().field("name", "赵六")
.endObject()));
BulkResponse bulkResponse = bulkRequest.execute().actionGet();
if (bulkResponse.hasFailures()) {
System.out.println("索引创建失败!");
}
}
4.修改数据

public static void updateIndexRecord() throws Exception {
Client client = getElasticSearchClient();
// 使用update时候,如果id不存在将出现异常
// [medcl][[medcl][1]] DocumentMissingException[[folks][5]: document missing]
// UpdateResponse updateResponse = client.prepareUpdate(INDEX_NAME, TYPE_NAME, "5")
// .setDoc(XContentFactory.jsonBuilder().startObject().field("name", "张三1").endObject())
// .execute().actionGet();
// System.out.println(updateResponse.isCreated());
// 添加setDocAsUpsert之后,如果存在修改,不存在新建
UpdateResponse updateResponse = client.prepareUpdate(INDEX_NAME, TYPE_NAME, "5")
.setDoc(XContentFactory.jsonBuilder().startObject().field("name", "张三1").endObject())
.setDocAsUpsert(true).execute().actionGet();
System.out.println(updateResponse.isCreated());
}
5.搜索查询数据

public static void findIndexRecordByName(String key) throws Exception {
Client client = getElasticSearchClient();
// 构造查询请求
QueryBuilder bq = QueryBuilders.matchQuery("name", key);
SearchRequestBuilder searchRequest = client.prepareSearch(INDEX_NAME).setTypes(TYPE_NAME);

// 设置查询条件和分页参数
int start = 0;
int size = 10;
searchRequest.setQuery(bq).setFrom(start).setSize(size);

// 获取返回值,并进行处理
SearchResponse response = searchRequest.execute().actionGet();
SearchHits shs = response.getHits();
for (SearchHit hit : shs) {
String name = (String) hit.getSource().get("name");
System.out.println("[searchkey:" + key + "]" + name);
}
client.close();
}


注意:为了便于本地进行分词查询,可以直接使用http请求。

查询分词
http://127.0.0.1:9200/medcl/_analyze?text=张三&analyzer=ik http://127.0.0.1:9200/medcl/_analyze?text=张三&analyzer=analyzer=pinyin_analyzer
直接查询结果

127.0.0.1:9200/medcl/folks/_search?q=name:zhang

查询结果并查看打分情况
http://127.0.0.1:9200/medcl/folks/_search?q=name:san&explian
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: