1.1 IK分詞器的安裝
1.1.1 前期準備工作
1)CentOS聯網
配置CentOS能連接外網。Linux虛擬機ping www.baidu.com 是暢通的
參照官方文檔安裝IK分詞器的兩種方法:
1、直接到
https://github.com/medcl/elasticsearch-analysis-ik/releases下載對應版本zip包
解壓到elasticsearch的plugin目錄下
unzip elasticsearch-analysis-ik-5.5.1.zip
2、使用elasticsearch-plugin命令安裝
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v5.5.1/elasticsearch-analysis-ik-5.5.1.zip
重新啟動elasticsearch
1.2 IK分詞器的使用
1.2.1 命令行查看結果
ik_smart模式
[itstar@bigdata11 elasticsearch]$ curl -XGET '
http://bigdata11:9200/_analyze?pretty&analyzer=ik_smart' -d '中華人民共和國'
{
"tokens" : [
{
"token" : "中華人民共和國",
"start_offset" : 0,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 0
}
]
}
ik_max_word模式
[itstar@bigdata11 elasticsearch]$ curl -XGET '
http://bigdata11:9200/_analyze?pretty&analyzer=ik_max_word' -d '中華人民共和國'
{
"tokens" : [
{
"token" : "中華人民共和國",
"start_offset" : 0,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "中華人民",
"start_offset" : 0,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 1
},
{
"token" : "中華",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 2
},
{
"token" : "華人",
"start_offset" : 1,
"end_offset" : 3,
"type" : "CN_WORD",
"position" : 3
},
{
"token" : "人民共和國",
"start_offset" : 2,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 4
},
{
"token" : "人民",
"start_offset" : 2,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 5
},
{
"token" : "共和國",
"start_offset" : 4,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 6
},
{
"token" : "共和",
"start_offset" : 4,
"end_offset" : 6,
"type" : "CN_WORD",
"position" : 7
},
{
"token" : "國",
"start_offset" : 6,
"end_offset" : 7,
"type" : "CN_CHAR",
"position" : 8
}
]
}
1.2.2 JavaAPI操作
1)創建索引
//創建索引(數據庫)
@Test
public void createIndex() {
//創建索引
client.admin().indices().prepareCreate("blog4").get();
//關閉資源
client.close();
}
2)創建mapping
//創建使用ik分詞器的mapping
@Test
public void createMapping() throws Exception {
// 1設置mapping
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.startObject("article")
.startObject("properties")
.startObject("id1")
.field("type", "string")
.field("store", "yes")
.field("analyzer","ik_smart")
.endObject()
.startObject("title2")
.field("type", "string")
.field("store", "no")
.field("analyzer","ik_smart")
.endObject()
.startObject("content")
.field("type", "string")
.field("store", "yes")
.field("analyzer","ik_smart")
.endObject()
.endObject()
.endObject()
.endObject();
// 2 添加mapping
PutMappingRequest mapping = Requests.putMappingRequest("blog4").type("article").source(builder);
client.admin().indices().putMapping(mapping).get();
// 3 關閉資源
client.close();
}
3)插入數據
//創建文檔,以map形式
@Test
public void createDocumentByMap() {
HashMap map = new HashMap<>();
map.put("id1", "2");
map.put("title2", "Lucene");
map.put("content", "它提供了一個分佈式的web接口");
IndexResponse response = client.prepareIndex("blog4", "article", "3").setSource(map).execute().actionGet();
//打印返回的結果
System.out.println("結果:" + response.getResult());
System.out.println("id:" + response.getId());
System.out.println("index:" + response.getIndex());
System.out.println("type:" + response.getType());
System.out.println("版本:" + response.getVersion());
//關閉資源
client.close();
}
4 詞條查詢
//詞條查詢
@Test
public void queryTerm() {
SearchResponse response = client.prepareSearch("blog4").setTypes("article").setQuery(QueryBuilders.termQuery("content","提供")).get();
//獲取查詢命中結果
SearchHits hits = response.getHits();
System.out.println("結果條數:" + hits.getTotalHits());
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
5)結果查看