일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | 3 | 4 | |||
5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 28 | 29 | 30 | 31 |
Tags
- ELASTIC
- licence delete curl
- flask
- TensorFlow
- Mac
- analyzer test
- Python
- query
- aggregation
- License
- Test
- Java
- matplotlib
- plugin
- springboot
- token filter test
- docker
- 900gle
- 파이썬
- Kafka
- zip 암호화
- API
- Elasticsearch
- zip 파일 암호화
- high level client
- license delete
- MySQL
- 차트
- sort
- aggs
Archives
- Today
- Total
개발잡부
[es] scripted similarity 본문
반응형
from elasticsearch import Elasticsearch
import pprint as ppr
import json
index_name = "script-similarity-index"
query = {
"query_string": {
"query": "foo^1.7",
"default_field": "field"
}
}
class EsAPI:
es = Elasticsearch(hosts="localhost", port=9200, http_auth=('elastic', 'elastic1!')) # 객체 생성
@classmethod
def srvHealthCheck(cls):
health = cls.es.cluster.health()
print (health)
@classmethod
def allIndex(cls): # Elasticsearch에 있는 모든 Index 조회
print (cls.es.cat.indices())
@classmethod
def dataInsert(cls):
# ===============
# 데이터 삽입
# ===============
with open("/Users/doo/doo_py/homeplus/data/similarity/products.json", "r", encoding="utf-8") as fjson:
data = json.loads(fjson.read())
for n, i in enumerate(data):
doc = {
"name": i['name']
}
res_n = cls.es.index(index=index_name, doc_type="_doc", id=n + 1, body=doc)
print (res_n)
cls.es.indices.refresh(index = index_name)
@classmethod
def searchAll(cls, indx=None): # =============== # 데이터 조회 [전체] # ===============
res_n = cls.es.search(
index=index_name,
doc_type="_doc",
body={"query": {"match_all": {}}})
print (json.dumps(res_n, ensure_ascii=False, indent=4))
@classmethod
def searchFilter(cls):
# =============== # 데이터 조회 [] # ===============
res_n = cls.es.search(
index=index_name,
doc_type="_doc",
body=query
)
ppr.pprint(res_n)
@classmethod
def createIndex(cls):
# =============== # 인덱스 생성 # ===============
cls.es.indices.create(
index=index_name,
body={
"settings": {
"number_of_replicas": 0,
"number_of_shards": 1,
"similarity": {
"scripted_tfidf": {
"type": "scripted",
"script": {
"source": "double tf = Math.sqrt(doc.freq); double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; double norm = 1/Math.sqrt(doc.length); return query.boost * tf * idf * norm;"
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"similarity": "scripted_tfidf"
}
}
}
}
)
# EsAPI.allIndex()
# EsAPI.srvHealthCheck()
# EsAPI.createIndex()
# EsAPI.dataInsert()
# EsAPI.searchAll()
EsAPI.searchFilter()
PUT /scripted-similarity-index
{
"settings": {
"number_of_shards": 1,
"similarity": {
"scripted_tfidf": {
"type": "scripted",
"script": {
"source": "double tf = Math.sqrt(doc.freq); double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; double norm = 1/Math.sqrt(doc.length); return query.boost * tf * idf * norm;"
}
}
}
},
"mappings": {
"properties": {
"field": {
"type": "text",
"similarity": "scripted_tfidf"
}
}
}
}
PUT /scripted-similarity-index/_doc/1
{
"field": "foo bar foo"
}
PUT /scripted-similarity-index/_doc/2
{
"field": "bar baz"
}
POST /scripted-similarity-index/_refresh
GET /scripted-similarity-index/_search?explain=true
{
"query": {
"query_string": {
"query": "foo^1.7",
"default_field": "field"
}
}
}
반응형
'ElasticStack > Elasticsearch' 카테고리의 다른 글
[es] sort - payload sort (0) | 2022.06.30 |
---|---|
[es] script similarity test (0) | 2022.06.26 |
[es] Similarity module (0) | 2022.06.24 |
[es] Nested Query vs Object Query (0) | 2022.06.21 |
[es] nested query test (0) | 2022.06.21 |
Comments