diff --git a/iocp.py b/iocp.py index 9968cc2..c2db9fe 100755 --- a/iocp.py +++ b/iocp.py @@ -35,12 +35,16 @@ # ################################################################################################### +#from __future__ import unicode_literals import os import sys import fnmatch import argparse import re -from StringIO import StringIO +try: + from io import StringIO +except: + from StringIO import StringIO try: import configparser as ConfigParser except ImportError: @@ -190,7 +194,7 @@ def parse_pdf_pypdf2(self, f, fpath): def parse_pdf_pdfminer(self, f, fpath): try: laparams = LAParams() - laparams.all_texts = True + laparams.all_texts = True rsrcmgr = PDFResourceManager() pagenos = set() @@ -223,7 +227,7 @@ def parse_pdf(self, f, fpath): except AttributeError: e = 'Selected PDF parser library is not supported: %s' % (self.library) raise NotImplementedError(e) - + self.parser_func(f, fpath) def parse_txt(self, f, fpath): @@ -244,19 +248,19 @@ def parse_html(self, f, fpath): try: if self.dedup: self.dedup_store = set() - + data = f.read() soup = BeautifulSoup(data) html = soup.findAll(text=True) - text = u'' + text = '' for elem in html: if elem.parent.name in ['style', 'script', '[document]', 'head', 'title']: continue - elif re.match('', unicode(elem)): + elif re.match('', elem): continue else: - text += unicode(elem) + text += elem self.handler.print_header(fpath) self.parse_page(fpath, text, 1) @@ -304,7 +308,7 @@ def parse(self, path): argparser.add_argument('-i', dest='INPUT_FORMAT', default='pdf', help='Input format (pdf/txt/html)') argparser.add_argument('-o', dest='OUTPUT_FORMAT', default='csv', help='Output format (csv/json/yara/netflow)') argparser.add_argument('-d', dest='DEDUP', action='store_true', default=False, help='Deduplicate matches') - argparser.add_argument('-l', dest='LIB', default='pdfminer', help='PDF parsing library (pypdf2/pdfminer)') + argparser.add_argument('-l', dest='LIB', default='pypdf2', help='PDF parsing library (pypdf2/pdfminer)') args = argparser.parse_args() parser = IOC_Parser(args.INI, args.INPUT_FORMAT, args.DEDUP, args.LIB, args.OUTPUT_FORMAT) diff --git a/output.py b/output.py index d71c92c..af485d6 100644 --- a/output.py +++ b/output.py @@ -70,7 +70,7 @@ def print_match(self, fpath, page, name, match): self.cnt[name] += 1 else: self.cnt[name] = 1 - + string_id = "$%s%d" % (name, self.cnt[name]) self.sids.append(string_id) string_value = match.replace('\\', '\\\\') @@ -92,10 +92,10 @@ def print_footer(self, fpath): print("\tcondition:") print("\t\t" + cond) print("}") - + class OutputHandler_netflow(OutputHandler): def __init__(self): - print "host 255.255.255.255" + print("host 255.255.255.255") def print_match(self, fpath, page, name, match): data = { @@ -103,4 +103,4 @@ def print_match(self, fpath, page, name, match): 'match': match } if data["type"] == "IP": - print " or host %s " % data["match"] + print(" or host %s " % data["match"]) diff --git a/whitelist.py b/whitelist.py index 5f12d31..bf30aec 100644 --- a/whitelist.py +++ b/whitelist.py @@ -4,9 +4,9 @@ class WhiteList(dict): def __init__(self, basedir): - searchdir = os.path.join(basedir, "whitelists/whitelist_*.ini") + searchdir = os.path.join(basedir, "whitelists/whitelist_*.ini") fpaths = glob.glob(searchdir) for fpath in fpaths: t = os.path.splitext(os.path.split(fpath)[1])[0].split('_',1)[1] patterns = [line.strip() for line in open(fpath)] - self[t] = [re.compile(p) for p in patterns] \ No newline at end of file + self[t] = [re.compile(p) for p in patterns]