From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on gnuweeb.org X-Spam-Level: X-Spam-Status: No, score=-0.8 required=5.0 tests=ALL_TRUSTED,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,NO_DNS_FOR_FROM autolearn=no autolearn_force=no version=3.4.6 Received: from localhost.localdomain (unknown [101.128.125.209]) by gnuweeb.org (Postfix) with ESMTPSA id CE7698193C; Mon, 19 Dec 2022 23:52:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=gnuweeb.org; s=default; t=1671493977; bh=hXgn0H6dwaaymX9fkWQk93LLYKlqX2wQp1BEDQTTVQA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=YwXTBChGgaItaprP1AWoiEkbcU8Td4muoSFDsZF0tePLlsfa19zwW2tfz7ssaXj9N PFHSOPodKlJLETjCwMCUuqfnGpWJ1Gt080kaIXs1QcaKHV2SHwG00j+g/FEIBD1NQZ MXC9muo+DfA92mWiQn22iuqtRBTw0PFoZAxD/o7gSYwdkuoi9LCbfhTy2h6KN+KqRp sw+hFuWAm+T9Qcp55XaZLgT+RGHhkwoVQ1biecIysn/0WsMOlyRFKrEhjR1xImIXXn RDMpYqMN1bikr2XVESmwGhvZ6wRBL2dQ7yAkNRsyDXWNWZCNTDnB3xoVCR9vEG6RcX Ka8oAn+n/DZtA== From: Muhammad Rizki To: Cc: Muhammad Rizki , Alviro Iskandar Setiawan , Ammar Faizi , GNU/Weeb Mailing List Subject: [PATCH 05/28] atom: add get_decoded_payload() Date: Tue, 20 Dec 2022 06:52:03 +0700 Message-Id: <20221219235226.1567-5-kiizuha@gnuweeb.org> X-Mailer: git-send-email 2.34.1.windows.1 In-Reply-To: <20221219235226.1567-1-kiizuha@gnuweeb.org> References: <20221219235226.1567-1-kiizuha@gnuweeb.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Add get_decoded_payload() to handle the email decoding to utf-8. This include a non-UTF8 character, base64 decoding, and quoted-printable decoding. Signed-off-by: Muhammad Rizki Link: https://lore.gnuweeb.org/gwml/20221022065149.865-6-kiizuha@gnuweeb.org Cc: Alviro Iskandar Setiawan Cc: Ammar Faizi Cc: GNU/Weeb Mailing List Signed-off-by: Ammar Faizi --- daemon/atom/utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/daemon/atom/utils.py b/daemon/atom/utils.py index f554f6f..deff99d 100644 --- a/daemon/atom/utils.py +++ b/daemon/atom/utils.py @@ -8,6 +8,7 @@ from pyrogram.types import Chat, InlineKeyboardMarkup, InlineKeyboardButton from email.message import Message from typing import Dict, Union from slugify import slugify +from base64 import b64decode import hashlib import uuid import os @@ -15,6 +16,7 @@ import re import shutil import httpx import html +import quopri def get_email_msg_id(mail): @@ -136,7 +138,7 @@ def gen_temp(name: str, platform: str): def extract_body(thread: Message, platform: str): if not thread.is_multipart(): - p = thread.get_payload(decode=True).decode(errors='replace') + p = get_decoded_payload(thread) if platform == "discord": p = quote_reply(p) @@ -253,6 +255,20 @@ def fix_utf8_char(text: str, html_escape: bool = True): return t +def get_decoded_payload(payload: Message): + p = str(payload.get_payload()) + tf_encode = payload.get("Content-Transfer-Encoding") + charset = payload.get_content_charset("utf-8") + + if tf_encode == "base64": + return b64decode(p).decode(charset) + if tf_encode == "quoted-printable": + quobyte = quopri.decodestring(p.encode()) + return quobyte.decode(charset) + + return p.encode().decode(charset, errors="replace") + + EMAIL_MSG_ID_PATTERN = r"<([^\<\>]+)>" def extract_email_msg_id(msg_id): ret = re.search(EMAIL_MSG_ID_PATTERN, msg_id) -- 2.34.1.windows.1