Skip to content

Commit f55d794

Browse files
committed
introduction of new modules
1 parent 61961c9 commit f55d794

File tree

6 files changed

+326
-0
lines changed

6 files changed

+326
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import json
2+
import binascii
3+
import np
4+
import docx
5+
import io
6+
7+
misperrors = {'error': 'Error'}
8+
mispattributes = {'input': ['attachment'],
9+
'output': ['freetext', 'text']}
10+
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
11+
'description': '.docx to freetext-import IOC extractor',
12+
'module-type': ['expansion']}
13+
14+
moduleconfig = []
15+
16+
17+
def handler(q=False):
18+
if q is False:
19+
return False
20+
q = json.loads(q)
21+
filename = q['attachment']
22+
try:
23+
docx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
24+
except Exception as e:
25+
print(e)
26+
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
27+
misperrors['error'] = err
28+
print(err)
29+
return misperrors
30+
31+
doc_content = ""
32+
doc_file = io.BytesIO(docx_array)
33+
try:
34+
doc = docx.Document(doc_file)
35+
for para in doc.paragraphs:
36+
print(para.text)
37+
doc_content = doc_content + "\n" + para.text
38+
tables = doc.tables
39+
for table in tables:
40+
for row in table.rows:
41+
for cell in row.cells:
42+
for para in cell.paragraphs:
43+
print(para.text)
44+
doc_content = doc_content + "\n" + para.text
45+
print(doc_content)
46+
return {'results': [{'types': ['freetext'], 'values': doc_content, 'comment': ".docx-to-text from file " + filename},
47+
{'types': ['text'], 'values': doc_content, 'comment': ".docx-to-text from file " + filename}]}
48+
except Exception as e:
49+
print(e)
50+
err = "Couldn't analyze file as .docx. Error was: " + str(e)
51+
misperrors['error'] = err
52+
return misperrors
53+
54+
55+
def introspection():
56+
return mispattributes
57+
58+
59+
def version():
60+
moduleinfo['config'] = moduleconfig
61+
return moduleinfo
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import json
2+
import binascii
3+
import np
4+
import ezodf
5+
import pandas_ods_reader
6+
import io
7+
8+
misperrors = {'error': 'Error'}
9+
mispattributes = {'input': ['attachment'],
10+
'output': ['freetext', 'text']}
11+
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
12+
'description': '.ods to freetext-import IOC extractor',
13+
'module-type': ['expansion']}
14+
15+
moduleconfig = []
16+
17+
18+
def handler(q=False):
19+
if q is False:
20+
return False
21+
q = json.loads(q)
22+
filename = q['attachment']
23+
try:
24+
ods_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
25+
except Exception as e:
26+
print(e)
27+
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
28+
misperrors['error'] = err
29+
print(err)
30+
return misperrors
31+
32+
ods_content = ""
33+
ods_file = io.BytesIO(ods_array)
34+
doc = ezodf.opendoc(ods_file)
35+
num_sheets = len(doc.sheets)
36+
try:
37+
for i in range(0, num_sheets):
38+
ods = pandas_ods_reader.read_ods(ods_file, i, headers=False)
39+
ods_content = ods_content + "\n" + ods.to_string(max_rows=None)
40+
print(ods_content)
41+
return {'results': [{'types': ['freetext'], 'values': ods_content, 'comment': ".ods-to-text from file " + filename},
42+
{'types': ['text'], 'values': ods_content, 'comment': ".ods-to-text from file " + filename}]}
43+
except Exception as e:
44+
print(e)
45+
err = "Couldn't analyze file as .ods. Error was: " + str(e)
46+
misperrors['error'] = err
47+
return misperrors
48+
49+
50+
def introspection():
51+
return mispattributes
52+
53+
54+
def version():
55+
moduleinfo['config'] = moduleconfig
56+
return moduleinfo
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import json
2+
import binascii
3+
import np
4+
from ODTReader.odtreader import odtToText
5+
import io
6+
7+
misperrors = {'error': 'Error'}
8+
mispattributes = {'input': ['attachment'],
9+
'output': ['freetext', 'text']}
10+
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
11+
'description': '.odt to freetext-import IOC extractor',
12+
'module-type': ['expansion']}
13+
14+
moduleconfig = []
15+
16+
17+
def handler(q=False):
18+
if q is False:
19+
return False
20+
q = json.loads(q)
21+
filename = q['attachment']
22+
try:
23+
odt_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
24+
except Exception as e:
25+
print(e)
26+
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
27+
misperrors['error'] = err
28+
print(err)
29+
return misperrors
30+
31+
odt_content = ""
32+
odt_file = io.BytesIO(odt_array)
33+
try:
34+
odt_content = odtToText(odt_file)
35+
print(odt_content)
36+
return {'results': [{'types': ['freetext'], 'values': odt_content, 'comment': ".odt-to-text from file " + filename},
37+
{'types': ['text'], 'values': odt_content, 'comment': ".odt-to-text from file " + filename}]}
38+
except Exception as e:
39+
print(e)
40+
err = "Couldn't analyze file as .odt. Error was: " + str(e)
41+
misperrors['error'] = err
42+
return misperrors
43+
44+
45+
def introspection():
46+
return mispattributes
47+
48+
49+
def version():
50+
moduleinfo['config'] = moduleconfig
51+
return moduleinfo
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import json
2+
import binascii
3+
import np
4+
import pytesseract
5+
import pdftotext
6+
import io
7+
import collections
8+
9+
misperrors = {'error': 'Error'}
10+
mispattributes = {'input': ['attachment'],
11+
'output': ['freetext', 'text']}
12+
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
13+
'description': 'PDF to freetext-import IOC extractor',
14+
'module-type': ['expansion']}
15+
16+
moduleconfig = []
17+
18+
19+
def handler(q=False):
20+
if q is False:
21+
return False
22+
q = json.loads(q)
23+
filename = q['attachment']
24+
try:
25+
pdf_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
26+
except Exception as e:
27+
print(e)
28+
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
29+
misperrors['error'] = err
30+
print(err)
31+
return misperrors
32+
33+
pdf_file = io.BytesIO(pdf_array)
34+
try:
35+
pdf_content = "\n\n".join(pdftotext.PDF(pdf_file))
36+
return {'results': [{'types': ['freetext'], 'values': pdf_content, 'comment': "PDF-to-text from file " + filename}]}
37+
except Exception as e:
38+
print(e)
39+
err = "Couldn't analyze file as PDF. Error was: " + str(e)
40+
misperrors['error'] = err
41+
return misperrors
42+
43+
44+
def introspection():
45+
return mispattributes
46+
47+
48+
def version():
49+
moduleinfo['config'] = moduleconfig
50+
return moduleinfo
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import json
2+
import binascii
3+
import np
4+
from pptx import Presentation
5+
import io
6+
7+
misperrors = {'error': 'Error'}
8+
mispattributes = {'input': ['attachment'],
9+
'output': ['freetext', 'text']}
10+
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
11+
'description': '.pptx to freetext-import IOC extractor',
12+
'module-type': ['expansion']}
13+
14+
moduleconfig = []
15+
16+
17+
def handler(q=False):
18+
if q is False:
19+
return False
20+
q = json.loads(q)
21+
filename = q['attachment']
22+
try:
23+
pptx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
24+
except Exception as e:
25+
print(e)
26+
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
27+
misperrors['error'] = err
28+
print(err)
29+
return misperrors
30+
31+
ppt_content = ""
32+
ppt_file = io.BytesIO(pptx_array)
33+
try:
34+
ppt = Presentation(ppt_file)
35+
for slide in ppt.slides:
36+
for shape in slide.shapes:
37+
if hasattr(shape, "text"):
38+
print(shape.text)
39+
ppt_content = ppt_content + "\n" + shape.text
40+
return {'results': [{'types': ['freetext'], 'values': ppt_content, 'comment': ".pptx-to-text from file " + filename},
41+
{'types': ['text'], 'values': ppt_content, 'comment': ".pptx-to-text from file " + filename}]}
42+
except Exception as e:
43+
print(e)
44+
err = "Couldn't analyze file as .pptx. Error was: " + str(e)
45+
misperrors['error'] = err
46+
return misperrors
47+
48+
49+
def introspection():
50+
return mispattributes
51+
52+
53+
def version():
54+
moduleinfo['config'] = moduleconfig
55+
return moduleinfo
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import json
2+
import binascii
3+
import np
4+
import pandas
5+
import io
6+
7+
misperrors = {'error': 'Error'}
8+
mispattributes = {'input': ['attachment'],
9+
'output': ['freetext', 'text']}
10+
moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
11+
'description': '.xlsx to freetext-import IOC extractor',
12+
'module-type': ['expansion']}
13+
14+
moduleconfig = []
15+
16+
17+
def handler(q=False):
18+
if q is False:
19+
return False
20+
q = json.loads(q)
21+
filename = q['attachment']
22+
try:
23+
xlsx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
24+
except Exception as e:
25+
print(e)
26+
err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
27+
misperrors['error'] = err
28+
print(err)
29+
return misperrors
30+
31+
xls_content = ""
32+
xls_file = io.BytesIO(xlsx_array)
33+
pandas.set_option('display.max_colwidth', -1)
34+
try:
35+
xls = pandas.read_excel(xls_file)
36+
xls_content = xls.to_string(max_rows=None)
37+
print(xls_content)
38+
return {'results': [{'types': ['freetext'], 'values': xls_content, 'comment': ".xlsx-to-text from file " + filename},
39+
{'types': ['text'], 'values': xls_content, 'comment': ".xlsx-to-text from file " + filename}]}
40+
except Exception as e:
41+
print(e)
42+
err = "Couldn't analyze file as .xlsx. Error was: " + str(e)
43+
misperrors['error'] = err
44+
return misperrors
45+
46+
47+
def introspection():
48+
return mispattributes
49+
50+
51+
def version():
52+
moduleinfo['config'] = moduleconfig
53+
return moduleinfo

0 commit comments

Comments
 (0)