Skip attachments without "__properties_version1.0" streams (#18)

We've found that messages with RTF formatting that contain embedded images
contain attachments without a "__properties_version1.0" stream.

As the current code is built around the "__properties_version1.0" stream,
these are skipped for now.

These image attachments do contain streams named "Ole" and "MailStream"
that should help with decoding/parsing in the future, but that's a bigger
project.
This commit is contained in:
Martijn van de Streek
2021-07-21 16:03:19 +02:00
committed by GitHub
parent a057080bad
commit 560a513349
+8
View File
@@ -135,7 +135,11 @@ def load_message_stream(entry, is_top_level, doc):
# Add attachments. # Add attachments.
for stream in entry: for stream in entry:
if stream.name.startswith("__attach_version1.0_#"): if stream.name.startswith("__attach_version1.0_#"):
try:
process_attachment(msg, stream, doc) process_attachment(msg, stream, doc)
except KeyError as e:
print("Error processing attachment {} not found".format(str(e)), file=sys.stderr)
continue
return msg return msg
@@ -229,7 +233,11 @@ def parse_properties(properties, is_top_level, container, doc):
# Stream isn't present! # Stream isn't present!
print("stream missing", streamname, file=sys.stderr) print("stream missing", streamname, file=sys.stderr)
continue continue
try:
value = tag_type.load(value, doc) value = tag_type.load(value, doc)
except KeyError as e:
print("Error while reading stream: {} not found".format(str(e)) , file=sys.stderr)
continue
else: else:
# unrecognized type # unrecognized type