From 8228c2222fcf5791fe5643252e4d248839c199e9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 16 Nov 2023 10:42:10 -0600 Subject: [PATCH 1/2] Try both utf-8 and windows-1252 for decoding email Recent submissions from Cirrus were classified as spam by the lore analysis robot script. This is because cirrus used windows-1252 for the encoding which failed to decode as utf-8. Try both encodings when decoding email. Signed-off-by: Mario Limonciello --- contrib/process_linux_firmware.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/contrib/process_linux_firmware.py b/contrib/process_linux_firmware.py index 668e35c0..ea108391 100755 --- a/contrib/process_linux_firmware.py +++ b/contrib/process_linux_firmware.py @@ -34,6 +34,8 @@ content_types = { def classify_content(content): # load content into the email library msg = email.message_from_string(content) + decoded = None + body = None # check the subject subject = msg["Subject"] @@ -42,17 +44,28 @@ def classify_content(content): if "PATCH" in subject: return ContentType.PATCH - for part in msg.walk(): - if part.get_content_type() == "text/plain": + if msg.is_multipart(): + for part in msg.walk(): + if part.get_content_type() == "text/plain": + body = part.get_payload(decode=True) + else: + body = msg.get_payload(decode=True) + + if body: + for encoding in ["utf-8", "windows-1252"]: try: - body = part.get_payload(decode=True).decode("utf-8") - for key in content_types.keys(): - if key in body: - return content_types[key] - break - except UnicodeDecodeError as e: - logging.warning("Failed to decode email: %s, treating as SPAM" % e) + decoded = body.decode(encoding) break + except UnicodeDecodeError: + pass + + if decoded: + for key in content_types.keys(): + if key in decoded: + return content_types[key] + else: + logging.warning("Failed to decode email: %s, treating as SPAM", body) + return ContentType.SPAM From bfd5f0b9d5998a5c15e68579265a83753fc71634 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 16 Nov 2023 10:52:46 -0600 Subject: [PATCH 2/2] Make email replies more resilient Signed-off-by: Mario Limonciello --- contrib/process_linux_firmware.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/contrib/process_linux_firmware.py b/contrib/process_linux_firmware.py index ea108391..8e2eb350 100755 --- a/contrib/process_linux_firmware.py +++ b/contrib/process_linux_firmware.py @@ -81,6 +81,11 @@ def quiet_cmd(cmd): def reply_email(content, branch): + user = None + password = None + server = None + port = None + if "SMTP_USER" in os.environ: user = os.environ["SMTP_USER"] if "SMTP_PASS" in os.environ: @@ -96,15 +101,26 @@ def reply_email(content, branch): reply = email.message.EmailMessage() orig = email.message_from_string(content) - reply["To"] = ", ".join( - email.utils.formataddr(t) - for t in email.utils.getaddresses( - orig.get_all("from", []) + orig.get_all("to", []) + orig.get_all("cc", []) + try: + reply["To"] = ", ".join( + email.utils.formataddr(t) + for t in email.utils.getaddresses( + orig.get_all("from", []) + + orig.get_all("to", []) + + orig.get_all("cc", []) + ) ) - ) + except ValueError: + logging.warning("Failed to parse email addresses, not sending email") + return reply["From"] = "linux-firmware@kernel.org" - reply["Subject"] = "Re: {}".format(orig["Subject"]) + try: + reply["Subject"] = "Re: {}".format(orig["Subject"]) + except ValueError: + logging.warning("Failed to parse subject, not sending email") + return + reply["In-Reply-To"] = orig["Message-Id"] reply["References"] = orig["Message-Id"] reply["Thread-Topic"] = orig["Thread-Topic"]