introduction of new modules

rommelfs · rommelfs · commit f55d7946df19 · 2019-04-26T12:07:55.000+02:00
diff --git a/misp_modules/modules/expansion/docx-enrich.py b/misp_modules/modules/expansion/docx-enrich.py
@@ -0,0 +1,61 @@
+import json
+import binascii
+import np
+import docx
+import io
+
+misperrors = {'error': 'Error'}
+mispattributes = {'input': ['attachment'],
+                  'output': ['freetext', 'text']}
+moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
+              'description': '.docx to freetext-import IOC extractor',
+              'module-type': ['expansion']}
+
+moduleconfig = []
+
+
+def handler(q=False):
+    if q is False:
+        return False
+    q = json.loads(q)
+    filename = q['attachment']
+    try:
+        docx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
+    except Exception as e:
+        print(e)
+        err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
+        misperrors['error'] = err
+        print(err)
+        return misperrors
+
+    doc_content = "" 
+    doc_file = io.BytesIO(docx_array)
+    try:
+        doc = docx.Document(doc_file)
+        for para in doc.paragraphs:
+            print(para.text)
+            doc_content = doc_content + "\n" + para.text
+        tables = doc.tables
+        for table in tables:
+            for row in table.rows:
+                for cell in row.cells:
+                    for para in cell.paragraphs:
+                        print(para.text)
+                        doc_content = doc_content + "\n" + para.text
+        print(doc_content)
+        return {'results': [{'types': ['freetext'], 'values': doc_content, 'comment': ".docx-to-text from file " + filename},
+                            {'types': ['text'], 'values': doc_content, 'comment': ".docx-to-text from file " + filename}]}
+    except Exception as e:
+        print(e)
+        err = "Couldn't analyze file as .docx. Error was: " + str(e)
+        misperrors['error'] = err
+        return misperrors
+
+
+def introspection():
+    return mispattributes
+
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo
diff --git a/misp_modules/modules/expansion/ods-enrich.py b/misp_modules/modules/expansion/ods-enrich.py
@@ -0,0 +1,56 @@
+import json
+import binascii
+import np
+import ezodf
+import pandas_ods_reader
+import io
+
+misperrors = {'error': 'Error'}
+mispattributes = {'input': ['attachment'],
+                  'output': ['freetext', 'text']}
+moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
+              'description': '.ods to freetext-import IOC extractor',
+              'module-type': ['expansion']}
+
+moduleconfig = []
+
+
+def handler(q=False):
+    if q is False:
+        return False
+    q = json.loads(q)
+    filename = q['attachment']
+    try:
+        ods_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
+    except Exception as e:
+        print(e)
+        err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
+        misperrors['error'] = err
+        print(err)
+        return misperrors
+
+    ods_content = "" 
+    ods_file = io.BytesIO(ods_array)
+    doc = ezodf.opendoc(ods_file)
+    num_sheets = len(doc.sheets)
+    try:
+        for i in range(0, num_sheets):
+            ods = pandas_ods_reader.read_ods(ods_file, i, headers=False)
+            ods_content = ods_content + "\n" + ods.to_string(max_rows=None)    
+        print(ods_content)
+        return {'results': [{'types': ['freetext'], 'values': ods_content, 'comment': ".ods-to-text from file " + filename},
+                            {'types': ['text'], 'values': ods_content, 'comment': ".ods-to-text from file " + filename}]}
+    except Exception as e:
+        print(e)
+        err = "Couldn't analyze file as .ods. Error was: " + str(e)
+        misperrors['error'] = err
+        return misperrors
+
+
+def introspection():
+    return mispattributes
+
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo
diff --git a/misp_modules/modules/expansion/odt-enrich.py b/misp_modules/modules/expansion/odt-enrich.py
@@ -0,0 +1,51 @@
+import json
+import binascii
+import np
+from ODTReader.odtreader import odtToText
+import io
+
+misperrors = {'error': 'Error'}
+mispattributes = {'input': ['attachment'],
+                  'output': ['freetext', 'text']}
+moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
+              'description': '.odt to freetext-import IOC extractor',
+              'module-type': ['expansion']}
+
+moduleconfig = []
+
+
+def handler(q=False):
+    if q is False:
+        return False
+    q = json.loads(q)
+    filename = q['attachment']
+    try:
+        odt_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
+    except Exception as e:
+        print(e)
+        err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
+        misperrors['error'] = err
+        print(err)
+        return misperrors
+
+    odt_content = "" 
+    odt_file = io.BytesIO(odt_array)
+    try:
+        odt_content = odtToText(odt_file)
+        print(odt_content)
+        return {'results': [{'types': ['freetext'], 'values': odt_content, 'comment': ".odt-to-text from file " + filename},
+                            {'types': ['text'], 'values': odt_content, 'comment': ".odt-to-text from file " + filename}]}
+    except Exception as e:
+        print(e)
+        err = "Couldn't analyze file as .odt. Error was: " + str(e)
+        misperrors['error'] = err
+        return misperrors
+
+
+def introspection():
+    return mispattributes
+
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo
diff --git a/misp_modules/modules/expansion/pdf-enrich.py b/misp_modules/modules/expansion/pdf-enrich.py
@@ -0,0 +1,50 @@
+import json
+import binascii
+import np
+import pytesseract
+import pdftotext 
+import io
+import collections
+
+misperrors = {'error': 'Error'}
+mispattributes = {'input': ['attachment'],
+                  'output': ['freetext', 'text']}
+moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
+              'description': 'PDF to freetext-import IOC extractor',
+              'module-type': ['expansion']}
+
+moduleconfig = []
+
+
+def handler(q=False):
+    if q is False:
+        return False
+    q = json.loads(q)
+    filename = q['attachment']
+    try:
+        pdf_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
+    except Exception as e:
+        print(e)
+        err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
+        misperrors['error'] = err
+        print(err)
+        return misperrors
+
+    pdf_file = io.BytesIO(pdf_array)
+    try:
+        pdf_content = "\n\n".join(pdftotext.PDF(pdf_file))
+        return {'results': [{'types': ['freetext'], 'values': pdf_content, 'comment': "PDF-to-text from file " + filename}]}
+    except Exception as e:
+        print(e)
+        err = "Couldn't analyze file as PDF. Error was: " + str(e)
+        misperrors['error'] = err
+        return misperrors
+
+
+def introspection():
+    return mispattributes
+
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo
diff --git a/misp_modules/modules/expansion/pptx-enrich.py b/misp_modules/modules/expansion/pptx-enrich.py
@@ -0,0 +1,55 @@
+import json
+import binascii
+import np
+from pptx import Presentation
+import io
+
+misperrors = {'error': 'Error'}
+mispattributes = {'input': ['attachment'],
+                  'output': ['freetext', 'text']}
+moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
+              'description': '.pptx to freetext-import IOC extractor',
+              'module-type': ['expansion']}
+
+moduleconfig = []
+
+
+def handler(q=False):
+    if q is False:
+        return False
+    q = json.loads(q)
+    filename = q['attachment']
+    try:
+        pptx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
+    except Exception as e:
+        print(e)
+        err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
+        misperrors['error'] = err
+        print(err)
+        return misperrors
+
+    ppt_content = "" 
+    ppt_file = io.BytesIO(pptx_array)
+    try:
+        ppt = Presentation(ppt_file)
+        for slide in ppt.slides:
+            for shape in slide.shapes:
+                if hasattr(shape, "text"):
+                    print(shape.text)
+                    ppt_content = ppt_content + "\n" + shape.text
+        return {'results': [{'types': ['freetext'], 'values': ppt_content, 'comment': ".pptx-to-text from file " + filename},
+                            {'types': ['text'], 'values': ppt_content, 'comment': ".pptx-to-text from file " + filename}]}
+    except Exception as e:
+        print(e)
+        err = "Couldn't analyze file as .pptx. Error was: " + str(e)
+        misperrors['error'] = err
+        return misperrors
+
+
+def introspection():
+    return mispattributes
+
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo
diff --git a/misp_modules/modules/expansion/xlsx-enrich.py b/misp_modules/modules/expansion/xlsx-enrich.py
@@ -0,0 +1,53 @@
+import json
+import binascii
+import np
+import pandas 
+import io
+
+misperrors = {'error': 'Error'}
+mispattributes = {'input': ['attachment'],
+                  'output': ['freetext', 'text']}
+moduleinfo = {'version': '0.1', 'author': 'Sascha Rommelfangen',
+              'description': '.xlsx to freetext-import IOC extractor',
+              'module-type': ['expansion']}
+
+moduleconfig = []
+
+
+def handler(q=False):
+    if q is False:
+        return False
+    q = json.loads(q)
+    filename = q['attachment']
+    try:
+        xlsx_array = np.frombuffer(binascii.a2b_base64(q['data']), np.uint8)
+    except Exception as e:
+        print(e)
+        err = "Couldn't fetch attachment (JSON 'data' is empty). Are you using the 'Query enrichment' action?"
+        misperrors['error'] = err
+        print(err)
+        return misperrors
+
+    xls_content = "" 
+    xls_file = io.BytesIO(xlsx_array)
+    pandas.set_option('display.max_colwidth', -1)
+    try:
+        xls = pandas.read_excel(xls_file)
+        xls_content = xls.to_string(max_rows=None)    
+        print(xls_content)
+        return {'results': [{'types': ['freetext'], 'values': xls_content, 'comment': ".xlsx-to-text from file " + filename},
+                            {'types': ['text'], 'values': xls_content, 'comment': ".xlsx-to-text from file " + filename}]}
+    except Exception as e:
+        print(e)
+        err = "Couldn't analyze file as .xlsx. Error was: " + str(e)
+        misperrors['error'] = err
+        return misperrors
+
+
+def introspection():
+    return mispattributes
+
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo