From 560a513349d4186b2bf6c9069331a01bcc118966 Mon Sep 17 00:00:00 2001 From: Martijn van de Streek Date: Wed, 21 Jul 2021 16:03:19 +0200 Subject: [PATCH] Skip attachments without "__properties_version1.0" streams (#18) We've found that messages with RTF formatting that contain embedded images contain attachments without a "__properties_version1.0" stream. As the current code is built around the "__properties_version1.0" stream, these are skipped for now. These image attachments do contain streams named "Ole" and "MailStream" that should help with decoding/parsing in the future, but that's a bigger project. --- outlookmsgfile.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/outlookmsgfile.py b/outlookmsgfile.py index 73c83fb..0a07744 100644 --- a/outlookmsgfile.py +++ b/outlookmsgfile.py @@ -135,7 +135,11 @@ def load_message_stream(entry, is_top_level, doc): # Add attachments. for stream in entry: if stream.name.startswith("__attach_version1.0_#"): - process_attachment(msg, stream, doc) + try: + process_attachment(msg, stream, doc) + except KeyError as e: + print("Error processing attachment {} not found".format(str(e)), file=sys.stderr) + continue return msg @@ -229,7 +233,11 @@ def parse_properties(properties, is_top_level, container, doc): # Stream isn't present! print("stream missing", streamname, file=sys.stderr) continue - value = tag_type.load(value, doc) + try: + value = tag_type.load(value, doc) + except KeyError as e: + print("Error while reading stream: {} not found".format(str(e)) , file=sys.stderr) + continue else: # unrecognized type