From ab62eb3588fb6e7de5b57d292ced59d6706d3360 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Thu, 11 Feb 2021 13:43:51 +0100 Subject: [PATCH 1/3] pp_ser: avoid explicit conversion of native strings to bytes --- src/serialbox-python/pp_ser/pp_ser.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/serialbox-python/pp_ser/pp_ser.py b/src/serialbox-python/pp_ser/pp_ser.py index fb80ccef..77a57f66 100755 --- a/src/serialbox-python/pp_ser/pp_ser.py +++ b/src/serialbox-python/pp_ser/pp_ser.py @@ -48,13 +48,6 @@ __email__ = 'oliver.fuhrer@meteoswiss.ch' -def to_ascii(text): - if sys.version_info[0] == 3: - return bytes(text, 'ascii') - else: - return str(text) - - def filter_fortran(f): return (f.split('.')[-1].lower() in ['f90', 'inc', 'incf', 'f', 'f03']) @@ -944,10 +937,10 @@ def preprocess(self): self.parse(generate=True) # second pass, preprocess # write output if self.outfile != '': - output_file = tempfile.NamedTemporaryFile(delete=False) + output_file = tempfile.NamedTemporaryFile(mode='w+', delete=False) # same permissions as infile os.chmod(output_file.name, os.stat(self.infile).st_mode) - output_file.write(to_ascii(self.__outputBuffer)) + output_file.write(self.__outputBuffer) output_file.close() useit = True if os.path.isfile(self.outfile) and not self.identical: From 08e470faf19ce71e8f2ca7dbfecd51db1d755ef8 Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Tue, 1 Jun 2021 14:16:32 +0200 Subject: [PATCH 2/3] pp_ser: compatibility with old Python3 installations --- src/serialbox-python/pp_ser/pp_ser.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/serialbox-python/pp_ser/pp_ser.py b/src/serialbox-python/pp_ser/pp_ser.py index 77a57f66..6fc52e4d 100755 --- a/src/serialbox-python/pp_ser/pp_ser.py +++ b/src/serialbox-python/pp_ser/pp_ser.py @@ -48,6 +48,21 @@ __email__ = 'oliver.fuhrer@meteoswiss.ch' +def open23(name, mode='r'): + if sys.version_info[0] == 3: + return open(name, mode, + encoding=(None if 'b' in mode else 'UTF-8')) + else: + return open(name, mode) + + +def bytes23(text): + if sys.version_info[0] == 3: + return bytes(text, 'UTF-8') + else: + return str(text) + + def filter_fortran(f): return (f.split('.')[-1].lower() in ['f90', 'inc', 'incf', 'f', 'f03']) @@ -895,7 +910,7 @@ def parse(self, generate=False): self.__outputBuffer += '#define ACC_PREFIX !$acc\n' # open and parse file - input_file = open(os.path.join(self.infile), 'r') + input_file = open23(os.path.join(self.infile), 'r') try: self.line = '' for line in input_file: @@ -937,10 +952,10 @@ def preprocess(self): self.parse(generate=True) # second pass, preprocess # write output if self.outfile != '': - output_file = tempfile.NamedTemporaryFile(mode='w+', delete=False) + output_file = tempfile.NamedTemporaryFile(delete=False) # same permissions as infile os.chmod(output_file.name, os.stat(self.infile).st_mode) - output_file.write(self.__outputBuffer) + output_file.write(bytes23(self.__outputBuffer)) output_file.close() useit = True if os.path.isfile(self.outfile) and not self.identical: From e165cb511a58e25a256ec0ccbf119ab188e451ef Mon Sep 17 00:00:00 2001 From: Sergey Kosukhin Date: Sun, 2 Oct 2022 16:24:37 +0200 Subject: [PATCH 3/3] pp_ser: handle UTF-8 conversion errors --- src/serialbox-python/pp_ser/pp_ser.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/serialbox-python/pp_ser/pp_ser.py b/src/serialbox-python/pp_ser/pp_ser.py index 6fc52e4d..27d8fe65 100755 --- a/src/serialbox-python/pp_ser/pp_ser.py +++ b/src/serialbox-python/pp_ser/pp_ser.py @@ -51,18 +51,30 @@ def open23(name, mode='r'): if sys.version_info[0] == 3: return open(name, mode, - encoding=(None if 'b' in mode else 'UTF-8')) + encoding=(None if 'b' in mode else 'UTF-8'), + errors=(None if 'b' in mode else 'surrogateescape')) else: return open(name, mode) def bytes23(text): if sys.version_info[0] == 3: - return bytes(text, 'UTF-8') + return bytes(text, 'UTF-8', 'surrogateescape') else: return str(text) +def getline(filename, lineno): + try: + return linecache.getline(filename, lineno) + except: + with open23(filename, 'r') as f: + for i, line in enumerate(f, start=1): + if i == lineno: + return line + return '' + + def filter_fortran(f): return (f.split('.')[-1].lower() in ['f90', 'inc', 'incf', 'f', 'f03']) @@ -717,12 +729,12 @@ def __re_subroutine_function(self): lookahead_index = self.__linenum + 1 # look ahead - nextline = linecache.getline(os.path.join(self.infile), lookahead_index) + nextline = getline(os.path.join(self.infile), lookahead_index) r_continued_line = re.compile('^([^!]*)&', re.IGNORECASE) while r_continued_line.search(nextline): self.__line += nextline lookahead_index += 1 - nextline = linecache.getline(os.path.join(self.infile), lookahead_index) + nextline = getline(os.path.join(self.infile), lookahead_index) self.__line += nextline self.__skip_next_n_lines = lookahead_index - self.__linenum self.__produce_use_stmt() @@ -817,12 +829,12 @@ def __re_def(self): # set to line after the intent declaration lookahead_index += 1 # look ahead - nextline = linecache.getline(os.path.join(self.infile), lookahead_index) + nextline = getline(os.path.join(self.infile), lookahead_index) while nextline: self.__check_intent_in(nextline) if nextline.find('&') != -1: lookahead_index += 1 - nextline = linecache.getline(os.path.join(self.infile), lookahead_index) + nextline = getline(os.path.join(self.infile), lookahead_index) else: nextline = None