Use html2text to back-fill a plain text body if an HTML body is present

This commit is contained in:
Joshua Tauberer
2024-02-23 09:51:04 -05:00
parent 4104dc937d
commit 85f6573ecb
3 changed files with 9 additions and 0 deletions

View File

@@ -26,6 +26,7 @@ from email.utils import parsedate_to_datetime, formatdate, formataddr
import compoundfiles
from rtfparse.parser import Rtf_Parser
from rtfparse.renderers.de_encapsulate_html import De_encapsulate_HTML
import html2text
logger = logging.getLogger(__name__)
@@ -131,6 +132,12 @@ def load_message_stream(entry, is_top_level, doc):
De_encapsulate_HTML().render(parsed, html_stream)
html_body = html_stream.getvalue()
if not has_body:
# Try to convert that to plain/text if possible.
text_body = html2text.html2text(html_body)
msg.set_content(text_body, subtype="text", cte='quoted-printable')
has_body = True
if not has_body:
msg.set_content(html_body, subtype="html", cte='quoted-printable')
has_body = True

View File

@@ -1,3 +1,4 @@
compoundfiles
compressed-rtf
rtfparse # Python 3.9+ only
html2text

View File

@@ -4,6 +4,7 @@ install_requires = [
'compoundfiles',
'compressed_rtf',
'rtfparse',
'html2text',
]
with open("README.md", "r") as fh: