Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
name: CI

# Controls when the action will run.
on:
# Triggers the workflow on push or pull request events but only for the master branch
push:
branches: [ master ]
pull_request:
branches: [ master ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:
code-quality:
runs-on: ubuntu-latest
strategy:
matrix:
toxenv:
- black
- flake8
- mypy
- isort
env:
TOXENV: ${{ matrix.toxenv }}

name: "Tox ${{ matrix.toxenv }}"
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
with:
fetch-depth: 0

- name: setup python
uses: actions/setup-python@v2
with:
python-version: '3.9'

- name: Install Requirements [${{ matrix.toxenv }}]
run: pip install tox

- name: Tox-${{ matrix.toxenv }}
run: tox
# This workflow contains a single job called "build"
test:
# The type of runner that the job will run on
runs-on: ubuntu-latest
strategy:
matrix:
toxenv:
- py38
- py39
- py310
include:
- toxenv: py38
python-version: '3.8'
- toxenv: py39
python-version: '3.9'
- toxenv: py310
python-version: '3.10'
- toxenv: py311
python-version: '3.11'
- toxenv: py312
python-version: '3.12'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TOXENV: ${{ matrix.toxenv }}
name: "Python ${{ matrix.python-version }} | Tox ${{ matrix.toxenv }}"

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
with:
fetch-depth: 2

- name: setup python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install Requirements [Python-${{ matrix.python-version }}]
run: pip install tox

- name: Tox-${{ matrix.toxenv }}
run: tox

- name: Upload coverage to Codecov
# see https://github.com/codecov/codecov-action/blob/master/README.md
uses: codecov/codecov-action@v2
with:
flags: unittests-${{ matrix.python-version }}
fail_ci_if_error: true # default = false
os: toxenv
verbose: true # default = false
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ dist
.idea
.coverage
.coverage.*
coverage.xml
env/
.c9/
.vscode
Expand Down
33 changes: 0 additions & 33 deletions .travis.yml

This file was deleted.

3 changes: 3 additions & 0 deletions ChangeLog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ UNRELEASED
* Fix #344: indent ``<ul>`` inside ``<ol>`` three spaces instead of two to comply with CommonMark, GFM, etc.
* Fix #324: unnecessary spaces around ``<b>``, ``<em>``, and ``strike`` tags.
* Don't wrap tables by default and add a ``--wrap-tables`` config option
* Remove support for Python ≤ 3.5. Now requires Python 3.6+.
* Support for Python 3.10.
* Fix #320 padding empty tables and tables with no </tr> tags.
* Add ``ignore_mailto_links`` config option to ignore ``mailto:`` style links.



2020.1.16
=========
----
Expand Down
42 changes: 33 additions & 9 deletions html2text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(
self.tag_callback = None
self.open_quote = config.OPEN_QUOTE # covered in cli
self.close_quote = config.CLOSE_QUOTE # covered in cli

if out is None:
self.out = self.outtextf
else:
Expand Down Expand Up @@ -120,6 +120,8 @@ def __init__(
self.tag_stack = (
[]
) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
self.emphasis_tag_stack = {}
self.remove_space = False
self.emphasis = 0
self.drop_white_space = 0
self.inheader = False
Expand All @@ -142,6 +144,7 @@ def feed(self, data: str) -> None:
super().feed(data)

def handle(self, data: str) -> str:
self.start = True
self.feed(data)
self.feed("")
markdown = self.optwrap(self.finish())
Expand Down Expand Up @@ -302,10 +305,19 @@ def handle_tag(
) -> None:
self.current_tag = tag

if tag in ["b","em","i","u"]:
if start:
if tag in self.emphasis_tag_stack:
self.emphasis_tag_stack[tag] += 1
else:
self.emphasis_tag_stack[tag] = 1
elif list(self.emphasis_tag_stack.keys()):
self.emphasis_tag_stack.popitem()

if self.tag_callback is not None:
if self.tag_callback(self, tag, attrs, start) is True:
return

# first thing inside the anchor tag is another tag
# that produces some output
if (
Expand Down Expand Up @@ -372,12 +384,24 @@ def handle_tag(
self.p()

if tag == "br" and start:
if self.astack:
self.space = True
elif self.blockquote > 0:
for key in list(self.emphasis_tag_stack.keys())[::-1]:
if(key == "b"):
self.o(self.strong_mark)
elif key in ["em","i","u"]:
self.o(self.emphasis_mark)

if self.blockquote > 0:
self.o(" \n> ")
else:
self.o(" \n")

for key in list(self.emphasis_tag_stack.keys()):
if(key == "b"):
self.o(self.strong_mark)
elif key in ["em","i","u"]:
self.o(self.emphasis_mark)
self.remove_space = True
self.drop_white_space = 1

if tag == "hr" and start:
self.p()
Expand Down Expand Up @@ -642,11 +666,11 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
# https://spec.commonmark.org/0.28/#motivation
# TODO: line up <ol><li>s > 9 correctly.
parent_list = None
for list in self.list:
for item in self.list:
self.o(
" " if parent_list == "ol" and list.name == "ul" else " "
" " if parent_list == "ol" and item.name == "ul" else " "
)
parent_list = list.name
parent_list = item.name

if li.name == "ul":
self.o(self.ul_item_mark + " ")
Expand Down Expand Up @@ -745,7 +769,7 @@ def o(
self.abbr_data += data

if not self.quiet:
if self.google_doc:
if self.google_doc or self.remove_space:
# prevent white space immediately after 'begin emphasis'
# marks ('**' and '_')
lstripped_data = data.lstrip()
Expand Down
7 changes: 3 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@ classifiers =
Operating System :: OS Independent
Programming Language :: Python
Programming Language :: Python :: 3
Programming Language :: Python :: 3.5
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy
Expand All @@ -30,7 +29,7 @@ platform = OS Independent
[options]
zip_safe = False
packages = html2text
python_requires = >=3.5
python_requires = >=3.7

[options.entry_points]
console_scripts =
Expand All @@ -48,4 +47,4 @@ combine_as_imports = True
profile = black

[mypy]
python_version = 3.5
python_version = 3.7
1 change: 0 additions & 1 deletion test/br_inside_a.html

This file was deleted.

1 change: 0 additions & 1 deletion test/br_inside_a.md

This file was deleted.

1 change: 1 addition & 0 deletions test/new_line_in_emphasis.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<b>Our multiline<br />bold text</b>
3 changes: 3 additions & 0 deletions test/new_line_in_emphasis.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
**Our multiline**
**bold text**

8 changes: 8 additions & 0 deletions test/test_new_line_inside_emphasis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import html2text

def test_emphasis_with_new_line():
h = html2text.HTML2Text()
html = "<b>Our multiline<br />bold text</b>"
result = h.handle(html)
assert result == '**Our multiline** \n**bold text**\n\n'

12 changes: 12 additions & 0 deletions test/test_newlines_on_multiple_calls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import html2text

# See https://github.com/Alir3z4/html2text/issues/163 for more information.


def test_newline_on_multiple_calls():
h = html2text.HTML2Text()
html = "<p>test</p>"
md1 = h.handle(html)
md2 = h.handle(html)
md3 = h.handle(html)
assert md1 == md2 == md3
10 changes: 5 additions & 5 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@ envlist =
flake8
isort
mypy
py{35,36,37,38,py3}
minversion = 1.9
py{38,39,310,311,312}
minversion = 3.24

[testenv]
commands =
pytest --cov=html2text {posargs}
pytest --cov=./ --cov-report=xml {posargs}
deps =
pytest
pytest-cov

[testenv:black]
basepython = python3
commands =
black --target-version py35 --check --diff .
black --target-version py311 --check --diff .
deps =
black
skip_install = true
Expand All @@ -35,7 +35,7 @@ basepython = python3
commands =
isort --check-only --diff .
deps =
isort >= 5.0.1
isort >= 5.10.1
skip_install = true

[testenv:mypy]
Expand Down