Skip to content

Commit 980c650

Browse files
authored
Refactor ver1.0.0 (#10) (#11)
* Added variable for selecting index_frequency postfix * Supports daily, weekly, monthly and yearly postfixes to the indexname * Changed a map section for an interator when creating actions to reduce * chances of extra memory used (trade-off with speed) * Added sonar support This closes #7, closes #9 . Beta version ready for release
1 parent e704720 commit 980c650

11 files changed

+322
-120
lines changed

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ var/
2525
.installed.cfg
2626
*.egg
2727

28+
pylint.txt
29+
coverage.xml
30+
pylint_*
31+
.pylint.d/
32+
.sonar/
33+
2834
# PyInstaller
2935
# Usually these files are written by a python script from a template
3036
# before PyInstaller builds the exe, so as to inject date/other infos into it.

.travis.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
language: python
22

3-
env:
4-
- TOXENV=py27
3+
python:
4+
- "2.7"
5+
- "3.6"
56

67
install:
7-
- pip install tox
8+
- pip install tox-travis
89
- pip install codecov
910
- mkdir /tmp/elasticsearch
1011
- wget -O - http://s3-eu-west-1.amazonaws.com/build.eu-west-1.elastic.co/origin/2.0/nightly/JDK7/elasticsearch-latest-SNAPSHOT.tar.gz | tar xz --directory=/tmp/elasticsearch --strip-components=1

README.rst

+23-8
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,30 @@ Install using pip::
2323

2424
pip install CMRESHandler
2525

26-
Requirements
27-
============
26+
Requirements Python 2
27+
=====================
2828
This library requires the following dependencies
29-
- requests
30-
- requests-kerberos
3129
- elasticsearch
30+
- requests
3231
- enum
3332

33+
34+
Requirements Python 3
35+
=====================
36+
This library requires the following dependencies
37+
- elasticsearch
38+
- requests
39+
40+
Additional requirements for Kerberos support
41+
============================================
42+
Additionally, the package support optionally kerberos authentication by adding the following dependecy
43+
- requests-kerberos
44+
3445
Using the handler in your program
3546
==================================
3647
To initialise and create the handler, just add the handler to your logger as follow ::
3748

38-
import CMRESHandler
49+
from cmrlogging.handlers import CMRESHandler
3950
handler = CMRESHandler(hosts=[{'host': 'localhost', 'port': 9200}],
4051
auth_type=CMRESHandler.AuthType.NO_AUTH,
4152
es_index_name="my_python_index")
@@ -45,7 +56,7 @@ To initialise and create the handler, just add the handler to your logger as fol
4556

4657
You can add fields upon initialisation, providing more data of the execution context ::
4758

48-
import CMRESHandler
59+
from cmrlogging.handlers import CMRESHandler
4960
handler = CMRESHandler(hosts=[{'host': 'localhost', 'port': 9200}],
5061
auth_type=CMRESHandler.AuthType.NO_AUTH,
5162
es_index_name="my_python_index",
@@ -89,6 +100,10 @@ The constructors takes the following parameters:
89100
- flush_frequency_in_sec: A float representing how often and when the buffer will be flushed
90101
- es_index_name: A string with the prefix of the elasticsearch index that will be created. Note a date with
91102
YYYY.MM.dd, ``python_logger`` used by default
103+
- index_name_frequency: The frequency to use as part of the index naming. Currently supports
104+
CMRESHandler.IndexNameFrequency.DAILY, CMRESHandler.IndexNameFrequency.WEEKLY,
105+
CMRESHandler.IndexNameFrequency.MONTHLY, CMRESHandler.IndexNameFrequency.YEARLY by default the daily rotation
106+
is used
92107
- es_doc_type: A string with the name of the document type that will be used ``python_log`` used by default
93108
- es_additional_fields: A dictionary with all the additional fields that you would like to add to the logs
94109

@@ -98,7 +113,7 @@ It is also very easy to integrate the handler to `Django <https://www.djangoproj
98113
better, at DEBUG level django logs information such as how long it takes for DB connections to return so
99114
they can be plotted on Kibana, or the SQL statements that Django executed. ::
100115

101-
from cmreshandler.cmreshandler import CMRESHandler
116+
from cmreslogging.handlers import CMRESHandler
102117
LOGGING = {
103118
'version': 1,
104119
'disable_existing_loggers': False,
@@ -112,7 +127,7 @@ they can be plotted on Kibana, or the SQL statements that Django executed. ::
112127
},
113128
'elasticsearch': {
114129
'level': 'DEBUG',
115-
'class': 'cmreshandler.cmreshandler.CMRESHandler',
130+
'class': 'cmrlogging.handlers.CMRESHandler',
116131
'hosts': [{'host': 'localhost', 'port': 9200}],
117132
'es_index_name': 'my_python_app',
118133
'es_additional_fields': {'App': 'Test', 'Environment': 'Dev'},
File renamed without changes.

cmreshandler/cmreshandler.py renamed to cmreslogging/handlers.py

+130-49
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@
22
"""
33

44
import logging
5-
from enum import Enum
6-
from elasticsearch import helpers as eshelpers
7-
from elasticsearch import Elasticsearch, RequestsHttpConnection
8-
from requests_kerberos import HTTPKerberosAuth, DISABLED
95
import datetime
106
import socket
7+
import copy
118
from threading import Timer
9+
from enum import Enum
10+
from elasticsearch import helpers as eshelpers
11+
from elasticsearch import Elasticsearch, RequestsHttpConnection
12+
try:
13+
from requests_kerberos import HTTPKerberosAuth, DISABLED
14+
CMR_KERBEROS_SUPPORTED = True
15+
except ImportError:
16+
CMR_KERBEROS_SUPPORTED = False
1217

1318

1419
class CMRESHandler(logging.Handler):
@@ -30,38 +35,94 @@ class AuthType(Enum):
3035
BASIC_AUTH = 1
3136
KERBEROS_AUTH = 2
3237

38+
class IndexNameFrequency(Enum):
39+
""" Index type supported
40+
the handler supports
41+
- Daily indices
42+
- Weekly indices
43+
- Monthly indices
44+
- Year indices
45+
"""
46+
DAILY = 0
47+
WEEKLY = 1
48+
MONTHLY = 2
49+
YEARLY = 3
50+
3351
# Defaults for the class
34-
__DEFAULT_HOST = [{'host': 'localhost', 'port': 9200}]
52+
__DEFAULT_ELASTICSEARCH_HOST = [{'host': 'localhost', 'port': 9200}]
3553
__DEFAULT_AUTH_USER = ''
3654
__DEFAULT_AUTH_PASSWD = ''
3755
__DEFAULT_USE_SSL = False
3856
__DEFAULT_VERIFY_SSL = True
3957
__DEFAULT_AUTH_TYPE = AuthType.NO_AUTH
58+
__DEFAULT_INDEX_FREQUENCY = IndexNameFrequency.DAILY
4059
__DEFAULT_BUFFER_SIZE = 1000
41-
__DEFAULT_FLUSH_FREQUENCY_IN_SEC = 1
60+
__DEFAULT_FLUSH_FREQ_INSEC = 1
4261
__DEFAULT_ADDITIONAL_FIELDS = {}
4362
__DEFAULT_ES_INDEX_NAME = 'python_logger'
4463
__DEFAULT_ES_DOC_TYPE = 'python_log'
45-
__DEFAULT_RAISE_ON_INDEXING_EXCEPTIONS = False
64+
__DEFAULT_RAISE_ON_EXCEPTION = False
4665
__DEFAULT_TIMESTAMP_FIELD_NAME = "timestamp"
4766

4867
__LOGGING_FILTER_FIELDS = ['msecs',
4968
'relativeCreated',
5069
'levelno',
5170
'created']
5271

72+
@staticmethod
73+
def _get_daily_index_name(es_index_name):
74+
""" Returns elasticearch index name
75+
:param: index_name the prefix to be used in the index
76+
:return: A srting containing the elasticsearch indexname used which should include the date.
77+
"""
78+
return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y.%m.%d'))
79+
80+
@staticmethod
81+
def _get_weekly_index_name(es_index_name):
82+
""" Return elasticsearch index name
83+
:param: index_name the prefix to be used in the index
84+
:return: A srting containing the elasticsearch indexname used which should include the date and specific week
85+
"""
86+
current_date = datetime.datetime.now()
87+
start_of_the_week = current_date - datetime.timedelta(days=current_date.weekday())
88+
return "{0!s}-{1!s}".format(es_index_name, start_of_the_week.strftime('%Y.%m.%d'))
89+
90+
@staticmethod
91+
def _get_monthly_index_name(es_index_name):
92+
""" Return elasticsearch index name
93+
:param: index_name the prefix to be used in the index
94+
:return: A srting containing the elasticsearch indexname used which should include the date and specific moth
95+
"""
96+
return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y.%m'))
97+
98+
@staticmethod
99+
def _get_yearly_index_name(es_index_name):
100+
""" Return elasticsearch index name
101+
:param: index_name the prefix to be used in the index
102+
:return: A srting containing the elasticsearch indexname used which should include the date and specific year
103+
"""
104+
return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y'))
105+
106+
_INDEX_FREQUENCY_FUNCION_DICT = {
107+
IndexNameFrequency.DAILY: _get_daily_index_name,
108+
IndexNameFrequency.WEEKLY: _get_weekly_index_name,
109+
IndexNameFrequency.MONTHLY: _get_monthly_index_name,
110+
IndexNameFrequency.YEARLY: _get_yearly_index_name
111+
}
112+
53113
def __init__(self,
54-
hosts=__DEFAULT_HOST,
114+
hosts=__DEFAULT_ELASTICSEARCH_HOST,
55115
auth_details=(__DEFAULT_AUTH_USER, __DEFAULT_AUTH_PASSWD),
56116
auth_type=__DEFAULT_AUTH_TYPE,
57117
use_ssl=__DEFAULT_USE_SSL,
58118
verify_ssl=__DEFAULT_VERIFY_SSL,
59119
buffer_size=__DEFAULT_BUFFER_SIZE,
60-
flush_frequency_in_sec=__DEFAULT_FLUSH_FREQUENCY_IN_SEC,
120+
flush_frequency_in_sec=__DEFAULT_FLUSH_FREQ_INSEC,
61121
es_index_name=__DEFAULT_ES_INDEX_NAME,
122+
index_name_frequency=__DEFAULT_INDEX_FREQUENCY,
62123
es_doc_type=__DEFAULT_ES_DOC_TYPE,
63124
es_additional_fields=__DEFAULT_ADDITIONAL_FIELDS,
64-
raise_on_indexing_exceptions=__DEFAULT_RAISE_ON_INDEXING_EXCEPTIONS,
125+
raise_on_indexing_exceptions=__DEFAULT_RAISE_ON_EXCEPTION,
65126
default_timestamp_field_name=__DEFAULT_TIMESTAMP_FIELD_NAME):
66127
""" Handler constructor
67128
@@ -80,6 +141,10 @@ def __init__(self,
80141
if the buffer_size has not been reached yet
81142
:param es_index_name: A string with the prefix of the elasticsearch index that will be created. Note a
82143
date with YYYY.MM.dd, ```python_logger``` used by default
144+
:param index_name_frequency: Defines what the date used in the postfix of the name would be. available values
145+
are selected from the IndexNameFrequency class (IndexNameFrequency.DAILY,
146+
IndexNameFrequency.WEEKLY, IndexNameFrequency.MONTHLY, IndexNameFrequency.YEARLY). By default
147+
it uses daily indices.
83148
:param es_doc_type: A string with the name of the document type that will be used ```python_log``` used
84149
by default
85150
:param es_additional_fields: A dictionary with all the additional fields that you would like to add
@@ -90,25 +155,29 @@ def __init__(self,
90155
"""
91156
logging.Handler.__init__(self)
92157

93-
self.hosts = hosts
158+
self.hosts = copy.deepcopy(hosts)
94159
self.auth_details = auth_details
95160
self.auth_type = auth_type
96161
self.use_ssl = use_ssl
97162
self.verify_certs = verify_ssl
98163
self.buffer_size = buffer_size
99164
self.flush_frequency_in_sec = flush_frequency_in_sec
100165
self.es_index_name = es_index_name
166+
self.index_name_frequency = index_name_frequency
101167
self.es_doc_type = es_doc_type
102-
self.es_additional_fields = es_additional_fields.copy()
168+
self.es_additional_fields = copy.deepcopy(es_additional_fields)
103169
self.es_additional_fields.update({'host': socket.gethostname(),
104170
'host_ip': socket.gethostbyname(socket.gethostname())})
105171
self.raise_on_indexing_exceptions = raise_on_indexing_exceptions
106172
self.default_timestamp_field_name = default_timestamp_field_name
107173

174+
self._client = None
108175
self._buffer = []
109176
self._timer = None
110177
self.__schedule_flush()
111178

179+
self._index_name_func = CMRESHandler._INDEX_FREQUENCY_FUNCION_DICT[self.index_name_frequency]
180+
112181
def __schedule_flush(self):
113182
if self._timer is None:
114183
self._timer = Timer(self.flush_frequency_in_sec, self.flush)
@@ -117,22 +186,34 @@ def __schedule_flush(self):
117186

118187
def __get_es_client(self):
119188
if self.auth_type == CMRESHandler.AuthType.NO_AUTH:
120-
return Elasticsearch(hosts=self.hosts,
121-
use_ssl=self.use_ssl,
122-
verify_certs=self.verify_certs,
123-
connection_class=RequestsHttpConnection)
124-
elif self.auth_type == CMRESHandler.AuthType.BASIC_AUTH:
125-
return Elasticsearch(hosts=self.hosts,
126-
http_auth=self.auth_details,
127-
use_ssl=self.use_ssl,
128-
verify_certs=self.verify_certs,
129-
connection_class=RequestsHttpConnection)
130-
elif self.auth_type == CMRESHandler.AuthType.KERBEROS_AUTH:
131-
return Elasticsearch(hosts=self.hosts,
132-
use_ssl=self.use_ssl,
133-
verify_certs=self.verify_certs,
134-
connection_class=RequestsHttpConnection,
135-
http_auth=HTTPKerberosAuth(mutual_authentication=DISABLED))
189+
if self._client is None:
190+
self._client = Elasticsearch(hosts=self.hosts,
191+
use_ssl=self.use_ssl,
192+
verify_certs=self.verify_certs,
193+
connection_class=RequestsHttpConnection)
194+
return self._client
195+
196+
if self.auth_type == CMRESHandler.AuthType.BASIC_AUTH:
197+
if self._client is None:
198+
return Elasticsearch(hosts=self.hosts,
199+
http_auth=self.auth_details,
200+
use_ssl=self.use_ssl,
201+
verify_certs=self.verify_certs,
202+
connection_class=RequestsHttpConnection)
203+
return self._client
204+
205+
if self.auth_type == CMRESHandler.AuthType.KERBEROS_AUTH:
206+
if CMR_KERBEROS_SUPPORTED:
207+
# For kerberos we return a new client each time to make sure the tokens are up to date
208+
return Elasticsearch(hosts=self.hosts,
209+
use_ssl=self.use_ssl,
210+
verify_certs=self.verify_certs,
211+
connection_class=RequestsHttpConnection,
212+
http_auth=HTTPKerberosAuth(mutual_authentication=DISABLED))
213+
else:
214+
raise EnvironmentError("Kerberos module not available. Please install \"requests-kerberos\"")
215+
216+
raise ValueError("Authentication method not supported")
136217

137218
def test_es_source(self):
138219
""" Returns True if the handler can ping the Elasticsearch servers
@@ -144,21 +225,15 @@ def test_es_source(self):
144225
"""
145226
return self.__get_es_client().ping()
146227

147-
def __get_es_index_name(self):
148-
""" Returns elasticearch index name
149-
:return: A srting containing the elasticsearch indexname used which should include the date.
150-
"""
151-
return "{0!s}-{1!s}".format(self.es_index_name, datetime.datetime.now().strftime('%Y.%m.%d'))
152-
153228
@staticmethod
154229
def __get_es_datetime_str(timestamp):
155230
""" Returns elasticsearch utc formatted time for an epoch timestamp
156231
157232
:param timestamp: epoch, including milliseconds
158233
:return: A string valid for elasticsearch time record
159234
"""
160-
t = datetime.datetime.utcfromtimestamp(timestamp)
161-
return "{0!s}.{1:03d}Z".format(t.strftime('%Y-%m-%dT%H:%M:%S'), int(t.microsecond / 1000))
235+
current_date = datetime.datetime.utcfromtimestamp(timestamp)
236+
return "{0!s}.{1:03d}Z".format(current_date.strftime('%Y-%m-%dT%H:%M:%S'), int(current_date.microsecond / 1000))
162237

163238
def flush(self):
164239
""" Flushes the buffer into ES
@@ -168,19 +243,25 @@ def flush(self):
168243
self._timer.cancel()
169244
self._timer = None
170245

171-
# FIXME: This should probably go on a different thread to speed up the execution
172246
if len(self._buffer) >= 0:
173247
try:
174-
actions = map(lambda x: {'_index': self.__get_es_index_name(),
175-
'_type': self.es_doc_type,
176-
'_source': x},
177-
self._buffer)
178-
eshelpers.bulk(client=self.__get_es_client(),
179-
actions=actions,
180-
stats_only=True)
181-
except Exception as e:
248+
actions = (
249+
{
250+
'_index': self._index_name_func.__func__(self.es_index_name),
251+
'_type': self.es_doc_type,
252+
'_source': log_record
253+
}
254+
for log_record in self._buffer
255+
)
256+
257+
eshelpers.bulk(
258+
client=self.__get_es_client(),
259+
actions=actions,
260+
stats_only=True
261+
)
262+
except Exception as exception:
182263
if self.raise_on_indexing_exceptions:
183-
raise e
264+
raise exception
184265
self._buffer = []
185266

186267
self.__schedule_flush()
@@ -203,9 +284,9 @@ def emit(self, record):
203284
:return: None
204285
"""
205286
rec = self.es_additional_fields.copy()
206-
for k, v in record.__dict__.items():
207-
if k not in CMRESHandler.__LOGGING_FILTER_FIELDS:
208-
rec[k] = "" if v is None else v
287+
for key, value in record.__dict__.items():
288+
if key not in CMRESHandler.__LOGGING_FILTER_FIELDS:
289+
rec[key] = "" if value is None else value
209290
rec[self.default_timestamp_field_name] = self.__get_es_datetime_str(record.created)
210291

211292
self._buffer.append(rec)

requirements/requirements_py27.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
elasticsearch==5.1.0
2+
requests==2.12.4
3+
enum==0.4.6

requirements/requirements_py36.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
elasticsearch==5.1.0
2+
requests==2.12.4

0 commit comments

Comments
 (0)