From 552fb55e5af05b756cc7a2ec1f10e44d414c0bbe Mon Sep 17 00:00:00 2001 From: GavinRay97 Date: Tue, 6 Apr 2021 21:09:55 -0400 Subject: [PATCH] Fix encoding error on Python +3.8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ```sh ❯ python main.py Converting api/reascripthelp.html to vscode_snippets/reascript.code-snippets ... Build of vscode_snippets/reascript.code-snippets done ! Converting api/reaper-videoprocessor-docs.USDocML to vscode_snippets/reascript_vp.code-snippets ... Build of vscode_snippets/reascript_vp.code-snippets done ! Converting api/reaper-apidocs.USDocML to vscode_snippets/reascript_usdoc.code-snippets ... Build of vscode_snippets/reascript_usdoc.code-snippets done ! Converting api/ultraschall.USDocML to vscode_snippets/ultraschall.code-snippets ... Traceback (most recent call last): File "main.py", line 150, in doc_data: AnyStr = f.read() File "/usr/lib/python3.8/codecs.py", line 322, in decode (result, consumed) = self._buffer_decode(data, self.errors, final) UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 1688214: invalid start byte ``` https://stackoverflow.com/questions/42339876/error-unicodedecodeerror-utf-8-codec-cant-decode-byte-0xff-in-position-0-in --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index a637489..f1c764d 100644 --- a/main.py +++ b/main.py @@ -146,7 +146,7 @@ def build_usdoc(us_path: str, output_file: str): output = output_dir + output + output_format print("Converting {} to {} ...".format(api_doc, output)) doc_format: str = api_doc.rsplit('.')[-1] - with open(api_doc, 'r') as f: + with open(api_doc, 'rb') as f: doc_data: AnyStr = f.read() soup: BeautifulSoup = BeautifulSoup(doc_data, features="html.parser")