반응형
Recent Posts
Recent Comments
관리 메뉴

개발잡부

[es] aggregation test 3 본문

ElasticStack/Elasticsearch

[es] aggregation test 3

닉의네임 2022. 7. 19. 16:30
반응형
import json
import time

from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from ssl import create_default_context
import matplotlib.pyplot as plt
from matplotlib.collections import EventCollection
import numpy as np

plt.rcParams['font.family'] = 'AppleGothic'


##### SEARCHING #####
def handle_query():
    embedding_start = time.time()
    embedding_time = time.time() - embedding_start

    script_query = {
        "match_all": {}
    }
    aggregations_a = {
        "MALL_TYPE": {
            "terms": {
                "field": "mallType",
                "size": 10,
                "min_doc_count": 1,
                "shard_min_doc_count": 0,
                "show_term_doc_count_error": "false",
                "order": [
                    {
                        "_count": "desc"
                    },
                    {
                        "_key": "asc"
                    }
                ]
            }
        },
        "BENEFIT": {
            "terms": {
                "field": "benefit",
                "size": 10,
                "min_doc_count": 1,
                "shard_min_doc_count": 0,
                "show_term_doc_count_error": "false",
                "order": [
                    {
                        "_count": "desc"
                    },
                    {
                        "_key": "asc"
                    }
                ]
            }
        },
        "GRADE": {
            "range": {
                "field": "grade",
                "ranges": [
                    {
                        "to": 1
                    },
                    {
                        "from": 1,
                        "to": 2
                    },
                    {
                        "from": 2,
                        "to": 3
                    },
                    {
                        "from": 3,
                        "to": 4
                    },
                    {
                        "from": 4,
                        "to": 5
                    },
                    {
                        "from": 5
                    }
                ],
                "keyed": "false"
            }
        }
    }

    aggregations_n = {
        "MALL_TYPE": {
            "terms": {
                "field": "mallType",
                "size": 10,
                "min_doc_count": 1,
                "shard_min_doc_count": 0,
                "show_term_doc_count_error": "false",
                "order": [
                    {
                        "_count": "desc"
                    },
                    {
                        "_key": "asc"
                    }
                ]
            }
        },
        "BENEFIT": {
            "terms": {
                "field": "benefit",
                "size": 10,
                "min_doc_count": 1,
                "shard_min_doc_count": 0,
                "show_term_doc_count_error": "false",
                "order": [
                    {
                        "_count": "desc"
                    },
                    {
                        "_key": "asc"
                    }
                ]
            }
        },
        "GRADE": {
            "range": {
                "field": "grade",
                "ranges": [
                    {
                        "to": 1
                    },
                    {
                        "from": 1,
                        "to": 2
                    },
                    {
                        "from": 2,
                        "to": 3
                    },
                    {
                        "from": 3,
                        "to": 4
                    },
                    {
                        "from": 4,
                        "to": 5
                    },
                    {
                        "from": 5
                    }
                ],
                "keyed": "false"
            }
        }
    }

    data_a = []
    data_n = []

    i = 0
    while i < LIMIT:
        search_start_a = time.time()
        response_a = client.search(
            index=ARRAY_INDEX_NAME,
            body={
                "size": SEARCH_SIZE,
                "query": script_query,
                "aggregations": aggregations_a
            }
        )
        search_time_a = time.time() - search_start_a
        data_a.append(round(search_time_a * 1000, 2))
        i = i + 1

    k = 0
    while k < LIMIT:
        search_start_n = time.time()
        response_n = client.search(
            index=NESTED_INDEX_NAME,
            body={
                "size": SEARCH_SIZE,
                "query": script_query,
                "aggregations": aggregations_n
            }
        )
        search_time_n = time.time() - search_start_n
        data_n.append(round(search_time_n * 1000, 2))
        k = k + 1

    print()
    print("{} total hits.".format(response_a["hits"]["total"]["value"]))
    print("{} total hits.".format(response_n["hits"]["total"]["value"]))

    print("search time_a: {:.2f} ms".format(search_time_a * 1000))
    print("search time_n: {:.2f} ms".format(search_time_n * 1000))

    print (data_a)
    print (data_n)

    xdata = range(LIMIT)

    # create some y data points
    ydata1 = data_a
    ydata2 = data_n

    # plot the data
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(xdata, ydata1, color='tab:blue')
    ax.plot(xdata, ydata2, color='tab:orange')
    ax.set_ylabel('query 속도(ms)')
    ax.set_xlabel('반복횟수(회)')

    # create the events marking the x data points
    xevents1 = EventCollection(xdata, color='tab:blue', linelength=0.05)
    xevents2 = EventCollection(xdata, color='tab:orange', linelength=0.05)

    # create the events marking the y data points
    yevents1 = EventCollection(ydata1, color='tab:blue', linelength=0.05,
                               orientation='vertical')
    yevents2 = EventCollection(ydata2, color='tab:orange', linelength=0.05,
                               orientation='vertical')

    # add the events to the axis
    ax.add_collection(xevents1)
    ax.add_collection(xevents2)
    ax.add_collection(yevents1)
    ax.add_collection(yevents2)

    # set the limits
    ax.set_xlim([0, len(xdata)])
    ax.set_ylim([0, 100])

    ax.set_title('array aggs vs nested aggs')

    # display the plot
    plt.show()


##### MAIN SCRIPT #####

if __name__ == '__main__':
    ARRAY_INDEX_NAME = "aggs_nested"
    NESTED_INDEX_NAME = "aggs_array"
    SEARCH_SIZE = 0

    LIMIT = 100
    client = Elasticsearch("https://elastic:dlengus@돔에인:폿드/", ca_certs=False,
                           verify_certs=False)
    print("start")
    handle_query()
반응형

'ElasticStack > Elasticsearch' 카테고리의 다른 글

[es] aggregation test 4  (0) 2022.07.20
[es] _update_by_query  (0) 2022.07.20
[es] aggregation test 2  (0) 2022.07.17
[es] nested aggregation  (0) 2022.07.16
[es] aggregation test 1  (0) 2022.07.16
Comments