ElasticStack/Elasticsearch
[es] scripted similarity
닉의네임
2022. 6. 24. 11:47
반응형
from elasticsearch import Elasticsearch
import pprint as ppr
import json
index_name = "script-similarity-index"
query = {
"query_string": {
"query": "foo^1.7",
"default_field": "field"
}
}
class EsAPI:
es = Elasticsearch(hosts="localhost", port=9200, http_auth=('elastic', 'elastic1!')) # 객체 생성
@classmethod
def srvHealthCheck(cls):
health = cls.es.cluster.health()
print (health)
@classmethod
def allIndex(cls): # Elasticsearch에 있는 모든 Index 조회
print (cls.es.cat.indices())
@classmethod
def dataInsert(cls):
# ===============
# 데이터 삽입
# ===============
with open("/Users/doo/doo_py/homeplus/data/similarity/products.json", "r", encoding="utf-8") as fjson:
data = json.loads(fjson.read())
for n, i in enumerate(data):
doc = {
"name": i['name']
}
res_n = cls.es.index(index=index_name, doc_type="_doc", id=n + 1, body=doc)
print (res_n)
cls.es.indices.refresh(index = index_name)
@classmethod
def searchAll(cls, indx=None): # =============== # 데이터 조회 [전체] # ===============
res_n = cls.es.search(
index=index_name,
doc_type="_doc",
body={"query": {"match_all": {}}})
print (json.dumps(res_n, ensure_ascii=False, indent=4))
@classmethod
def searchFilter(cls):
# =============== # 데이터 조회 [] # ===============
res_n = cls.es.search(
index=index_name,
doc_type="_doc",
body=query
)
ppr.pprint(res_n)
@classmethod
def createIndex(cls):
# =============== # 인덱스 생성 # ===============
cls.es.indices.create(
index=index_name,
body={
"settings": {
"number_of_replicas": 0,
"number_of_shards": 1,
"similarity": {
"scripted_tfidf": {
"type": "scripted",
"script": {
"source": "double tf = Math.sqrt(doc.freq); double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; double norm = 1/Math.sqrt(doc.length); return query.boost * tf * idf * norm;"
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"similarity": "scripted_tfidf"
}
}
}
}
)
# EsAPI.allIndex()
# EsAPI.srvHealthCheck()
# EsAPI.createIndex()
# EsAPI.dataInsert()
# EsAPI.searchAll()
EsAPI.searchFilter()
PUT /scripted-similarity-index
{
"settings": {
"number_of_shards": 1,
"similarity": {
"scripted_tfidf": {
"type": "scripted",
"script": {
"source": "double tf = Math.sqrt(doc.freq); double idf = Math.log((field.docCount+1.0)/(term.docFreq+1.0)) + 1.0; double norm = 1/Math.sqrt(doc.length); return query.boost * tf * idf * norm;"
}
}
}
},
"mappings": {
"properties": {
"field": {
"type": "text",
"similarity": "scripted_tfidf"
}
}
}
}
PUT /scripted-similarity-index/_doc/1
{
"field": "foo bar foo"
}
PUT /scripted-similarity-index/_doc/2
{
"field": "bar baz"
}
POST /scripted-similarity-index/_refresh
GET /scripted-similarity-index/_search?explain=true
{
"query": {
"query_string": {
"query": "foo^1.7",
"default_field": "field"
}
}
}
반응형