From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on gnuweeb.org X-Spam-Level: X-Spam-Status: No, score=-0.8 required=5.0 tests=ALL_TRUSTED,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,NO_DNS_FOR_FROM,URIBL_BLOCKED autolearn=no autolearn_force=no version=3.4.6 Received: from localhost.localdomain (unknown [101.128.125.123]) by gnuweeb.org (Postfix) with ESMTPSA id 77D8081275; Fri, 21 Oct 2022 13:45:48 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=gnuweeb.org; s=default; t=1666359950; bh=O8DgYy1ZDXvfCzRYnOtPVkqeu64qm7Z2jdXGMI4gM7o=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=hIpLyi8iSV5kvGSZmOLcqmWy+XwPVodudY42OBwsL6sYhMyM4ICzpHxzQ9wytH9mo RsVrFndcFHvXh6tAJEviC5aaQB4PSuB207AXfn3RBs/c3j+JF/8oZEe/IPRO0V9Ebq 5T1D5/kWAtafSls0xC8kJm7ptg9lvwgTPzvTN1PlOXq/rM5tztdbdS3KLExmHIc+Ye 8+nHX1iL9uDf/X+p4eEjt/v0NPYi8fmQgAicueSKpaJJanmgTeV9mVzZqOY7DOW9EN pOOSqi3eQu8NmO5TAJbBjFC0nPKm9t1h7qkbppur9FuoNyb3sl0sYwMFGgTIA8sMW7 m9tyS4uNefyFw== From: Muhammad Rizki To: Ammar Faizi Cc: Muhammad Rizki , Alviro Iskandar Setiawan , GNU/Weeb Mailing List Subject: [PATCH v3 5/9] atom: add manage_payload() Date: Fri, 21 Oct 2022 20:45:16 +0700 Message-Id: <20221021134520.701-6-kiizuha@gnuweeb.org> X-Mailer: git-send-email 2.34.1.windows.1 In-Reply-To: <20221021134520.701-1-kiizuha@gnuweeb.org> References: <20221021134520.701-1-kiizuha@gnuweeb.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Add manage_payload() to handle the email decoding to utf-8. This include a non-UTF8 character and base64 decoding. Signed-off-by: Muhammad Rizki --- daemon/atom/utils.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/daemon/atom/utils.py b/daemon/atom/utils.py index f554f6f..ed5ca03 100644 --- a/daemon/atom/utils.py +++ b/daemon/atom/utils.py @@ -8,6 +8,7 @@ from pyrogram.types import Chat, InlineKeyboardMarkup, InlineKeyboardButton from email.message import Message from typing import Dict, Union from slugify import slugify +from base64 import b64decode import hashlib import uuid import os @@ -15,6 +16,7 @@ import re import shutil import httpx import html +import quopri def get_email_msg_id(mail): @@ -136,7 +138,7 @@ def gen_temp(name: str, platform: str): def extract_body(thread: Message, platform: str): if not thread.is_multipart(): - p = thread.get_payload(decode=True).decode(errors='replace') + p = manage_payload(thread) if platform == "discord": p = quote_reply(p) @@ -253,6 +255,18 @@ def fix_utf8_char(text: str, html_escape: bool = True): return t +def manage_payload(payload: Message): + p = str(payload.get_payload()) + tf_encode = payload.get("Content-Transfer-Encoding") + + if tf_encode == "base64": + return b64decode(p).decode("utf-8") + if tf_encode == "quoted-printable": + return quopri.decodestring(p.encode()).decode() + + return p.encode().decode("utf-8", errors="replace") + + EMAIL_MSG_ID_PATTERN = r"<([^\<\>]+)>" def extract_email_msg_id(msg_id): ret = re.search(EMAIL_MSG_ID_PATTERN, msg_id) -- Muhammad Rizki