Skip to content
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ data/cogstack_search_results/

# Default environments
venv

# python cache folder
__pycache__
24 changes: 17 additions & 7 deletions cogstack.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

import getpass
from typing import Dict, List, Any, Optional, Iterable, Tuple
import elasticsearch
Expand All @@ -6,13 +7,23 @@
from tqdm.notebook import tqdm
import eland as ed

# Suppress warnings related to security in Elasticsearch
# This is necessary to avoid warnings about insecure connections when using self-signed certificates or HTTP connections
import warnings
warnings.filterwarnings("ignore")
from elastic_transport import SecurityWarning
from urllib3.exceptions import InsecureRequestWarning

from credentials import *
# Reset all filters
warnings.resetwarnings()

warnings.filterwarnings("module", category=DeprecationWarning, module="cogstack")
warnings.filterwarnings('ignore', category=SecurityWarning)
warnings.filterwarnings('ignore', category=InsecureRequestWarning)

from credentials import *

class CogStack(object):
warnings.warn("cogstack module is deprecated, use cogstack2 instead.", DeprecationWarning)
"""
A class for interacting with Elasticsearch.

Expand All @@ -31,22 +42,22 @@ def __init__(self, hosts: List, username: Optional[str] = None, password: Option
self.elastic = elasticsearch.Elasticsearch(hosts=hosts,
api_key=api_key,
verify_certs=False,
timeout=timeout)
request_timeout=timeout)


elif api:
api_username, api_password = self._check_auth_details(username, password)
self.elastic = elasticsearch.Elasticsearch(hosts=hosts,
api_key=(api_username, api_password),
verify_certs=False,
timeout=timeout)
request_timeout=timeout)

else:
username, password = self._check_auth_details(username, password)
self.elastic = elasticsearch.Elasticsearch(hosts=hosts,
basic_auth=(username, password),
verify_certs=False,
timeout=timeout)
request_timeout=timeout)


def _check_auth_details(self, username=None, password=None) -> Tuple[str, str]:
Expand Down Expand Up @@ -108,7 +119,7 @@ def cogstack2df(self, query: Dict, index: str, column_headers=None, es_gen_size:
size=es_gen_size,
request_timeout=request_timeout)
temp_results = []
results = self.elastic.count(index=index, query=query['query'], request_timeout=300) # type: ignore
results = self.elastic.count(index=index, query=query['query']) # type: ignore
for hit in tqdm(docs_generator, total=results['count'], desc="CogStack retrieved...", disable=not show_progress):
row = dict()
row['_index'] = hit['_index']
Expand Down Expand Up @@ -155,4 +166,3 @@ def list_chunker(user_list: List[Any], n: int) -> List[List[Any]]:

def _no_progress_bar(iterable: Iterable, **kwargs):
return iterable

Loading