From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on gnuweeb.org X-Spam-Level: X-Spam-Status: No, score=-0.8 required=5.0 tests=ALL_TRUSTED,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,NO_DNS_FOR_FROM,URIBL_BLOCKED autolearn=no autolearn_force=no version=3.4.6 Received: from localhost.localdomain (unknown [101.128.125.123]) by gnuweeb.org (Postfix) with ESMTPSA id 93851812F6; Sat, 22 Oct 2022 06:52:17 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=gnuweeb.org; s=default; t=1666421539; bh=uY9uF+MKgCHPGs+2P+ByGvSW+SJVeyO0WHb3ZQLNaFM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ZhbOgMZgD8ZTFTZV1y68f34yWIBb/LGBHEie5E/LeUNheQKHv+Q1arwuBijtIn9TN 859AOZL9kUUEryqooFMUs2ezaQyanFzSIC+vaKMkonUlPdFQ31FekC1ujqKHv0/tKY uNCNSHNosVgIQeHXjisxxfXGU5say1PlFen3v/rjRe9jZUIcl6yYcJqQ1GNdYapuKj hF4V6qL62yE3ppt4h9AmHXGDG6BnSYzH48s9+tHNhiKpTYgsIzq+aOd6RPGzRAmtgk 9X+62zdL5ZiFSuFoggi94+ArThMUeiIIdghXnWgdP6oEfbDUW2a7e7KlRB+p/IkKJG FBca438fm5aIw== From: Muhammad Rizki To: Ammar Faizi Cc: Muhammad Rizki , Alviro Iskandar Setiawan , GNU/Weeb Mailing List Subject: [PATCH v4 5/9] atom: add get_decoded_payload() Date: Sat, 22 Oct 2022 13:51:45 +0700 Message-Id: <20221022065149.865-6-kiizuha@gnuweeb.org> X-Mailer: git-send-email 2.34.1.windows.1 In-Reply-To: <20221022065149.865-1-kiizuha@gnuweeb.org> References: <20221022065149.865-1-kiizuha@gnuweeb.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Add get_decoded_payload() to handle the email decoding to utf-8. This include a non-UTF8 character, base64 decoding, and quoted-printable decoding. Signed-off-by: Muhammad Rizki --- daemon/atom/utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/daemon/atom/utils.py b/daemon/atom/utils.py index f554f6f..deff99d 100644 --- a/daemon/atom/utils.py +++ b/daemon/atom/utils.py @@ -8,6 +8,7 @@ from pyrogram.types import Chat, InlineKeyboardMarkup, InlineKeyboardButton from email.message import Message from typing import Dict, Union from slugify import slugify +from base64 import b64decode import hashlib import uuid import os @@ -15,6 +16,7 @@ import re import shutil import httpx import html +import quopri def get_email_msg_id(mail): @@ -136,7 +138,7 @@ def gen_temp(name: str, platform: str): def extract_body(thread: Message, platform: str): if not thread.is_multipart(): - p = thread.get_payload(decode=True).decode(errors='replace') + p = get_decoded_payload(thread) if platform == "discord": p = quote_reply(p) @@ -253,6 +255,20 @@ def fix_utf8_char(text: str, html_escape: bool = True): return t +def get_decoded_payload(payload: Message): + p = str(payload.get_payload()) + tf_encode = payload.get("Content-Transfer-Encoding") + charset = payload.get_content_charset("utf-8") + + if tf_encode == "base64": + return b64decode(p).decode(charset) + if tf_encode == "quoted-printable": + quobyte = quopri.decodestring(p.encode()) + return quobyte.decode(charset) + + return p.encode().decode(charset, errors="replace") + + EMAIL_MSG_ID_PATTERN = r"<([^\<\>]+)>" def extract_email_msg_id(msg_id): ret = re.search(EMAIL_MSG_ID_PATTERN, msg_id) -- Muhammad Rizki