ElasticStack
Nori plugin
닉의네임
2020. 6. 9. 14:38
반응형
테스트 환경
elasticsearch-7.6.1
kibana-7.6.1-linux-x86_64
aws ec2 ubuntu
위에 것들은 설치됐다 치고
[nori 설치]
bin/elasticsearch-plugin install analysis-nori
삭제
bin/elasticsearch-plugin remove analysis-nori
$ES_HOME/plugins/

** ES 재시작 **
테스트를 해보자
환경
스프링부트 + elasticsearch-rest-high-level-client + 스웨거
컨트롤러 와 서비스
package com.doo.nori.controller;
import com.doo.nori.service.IndexTestService;
import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import lombok.RequiredArgsConstructor;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
@Api
@RestController
@RequestMapping("api")
@RequiredArgsConstructor
public class IndexRestController {
private final IndexTestService indexService;
@CrossOrigin("*")
@ApiOperation(value = "index", notes = "인덱스 생성")
@PostMapping("/index")
public void createIndex() {
indexService.createIndex();
}
}
package com.doo.nori.service;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.common.xcontent.XContentType;
import org.springframework.stereotype.Service;
import java.io.IOException;
@Slf4j
@Service
public class IndexTestService {
public void createIndex() {
try (
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("{아이피주소}", 9200, "http")
));
) {
String indexName = "nori";
String typeName = "_doc";
CreateIndexRequest request = new CreateIndexRequest(indexName);
String source = getSourceSetingMappingJson();
request.source(source, XContentType.JSON);
client.indices().create(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
}
public String getSourceSetingMappingJson(){
return "{\n" +
" \"settings\":{\n" +
" \"number_of_shards\": 5,\n" +
" \"number_of_replicas\": 0,\n" +
" \"analysis\":{\n" +
" \"tokenizer\":{\n" +
" \"korean_nori_tokenizer\":{\n" +
" \"type\":\"nori_tokenizer\",\n" +
" \"decompound_mode\":\"mixed\",\n" +
" \"user_dictionary\":\"user_dictionary.txt\"\n" +
" }\n" +
" },\n" +
" \"analyzer\":{\n" +
" \"nori_analyzer\":{\n" +
" \"type\":\"custom\",\n" +
" \"tokenizer\":\"korean_nori_tokenizer\",\n" +
" \"filter\":[\n" +
" \"nori_posfilter\",\n" +
" \"nori_readingform\",\n" +
" \"synonym_filtering\",\n" +
" \"stop_filtering\"\n" +
" ]\n" +
" }\n" +
" },\n" +
" \"filter\":{\n" +
" \"nori_posfilter\":{\n" +
" \"type\":\"nori_part_of_speech\",\n" +
" \"stoptags\":[\n" +
" \"E\",\"IC\",\"J\",\"MAG\",\"MM\",\"NA\",\"NR\",\"SC\",\n" +
" \"SE\",\"SF\",\"SH\",\"SL\",\"SN\",\"SP\",\"SSC\",\"SSO\",\n" +
" \"SY\",\"UNA\",\"UNKNOWN\",\"VA\",\"VCN\",\"VCP\",\"VSV\",\n" +
" \"VV\",\"VX\",\"XPN\",\"XR\",\"XSA\",\"XSN\",\"XSV\"\n" +
" ]\n" +
" },\n" +
" \"synonym_filtering\":{\n" +
" \"type\":\"synonym\"\n" +
" ,\"synonyms_path\":\"synonyms_dic.txt\"\n" +
" },\n" +
" \"stop_filtering\":{\n" +
" \"type\":\"stop\"\n" +
" ,\"stopwords_path\":\"stop_dic.txt\"\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
"}\n";
}
}
인덱스 생성 확인


조아! 시작
사전을 적용시켰으니 내용을 입력
사전파일 위치 $ES_HOME/config
user_dictionary.txt
오징어식당
골뱅이식당
인덱스 close -> open
GET nori/_analyze
{
"text": "오징어식당",
"analyzer": "nori_analyzer"
}
{
"tokens" : [
{
"token" : "오징어식당",
"start_offset" : 0,
"end_offset" : 5,
"type" : "word",
"position" : 0
}
]
}
오징어 와 식당으로 분리되어 졌으나.. 고유명사로 인식함
이젠 동의어
오징어, 골뱅이 => 해물
GET nori/_analyze
{
"text": "골뱅이 식당이 어디에 있습니까?",
"analyzer": "nori_analyzer"
}
{
"tokens" : [
{
"token" : "해물",
"start_offset" : 0,
"end_offset" : 3,
"type" : "SYNONYM",
"position" : 0
},
{
"token" : "식당",
"start_offset" : 4,
"end_offset" : 6,
"type" : "word",
"position" : 1
},
{
"token" : "어디",
"start_offset" : 8,
"end_offset" : 10,
"type" : "word",
"position" : 3
}
]
}
오늘은 여까지 졸려서 못하겠다
반응형