From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on gnuweeb.org X-Spam-Level: X-Spam-Status: No, score=-0.8 required=5.0 tests=ALL_TRUSTED,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,NO_DNS_FOR_FROM,URIBL_BLOCKED autolearn=no autolearn_force=no version=3.4.6 Received: from localhost.localdomain (unknown [101.128.125.100]) by gnuweeb.org (Postfix) with ESMTPSA id 2268D80D11; Tue, 6 Sep 2022 11:19:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=gnuweeb.org; s=default; t=1662463199; bh=rxNnKV7UKtNlGPDomL/Qw4irX3wehVjyQ23fk7k9mVA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=C9Guhfn2bn++EVA8WvhEKBQ43OiNuHSgjiU4i7BsB+IeqVt3FnXmnSgfQP24bvZDm pzKYcYxx2e7LyDe91vpXcxVRMtH4YKT+IvP14VlxqypeU6N0ro9DBDcDpoKRw/iWX6 VD9QQvnMAKefaQwHfTaYioLxkao14BiIZIahJ1/pTFuN/D7hUmltQjr3tGEq2+j0tE d8RNo/vWO9i3UCSPPqIIiS9IABGEus0TUm5lSA9H6Cvtwm+vePVz+1p74F0ijvPnTK UM0lRiSSiQ5jN5+ytg7PA+Z0ui7l6lQXvQ1QMmLI82XqMmkcwDOk245pbeU0zPxuD6 nySPDWWZzExxw== From: Muhammad Rizki To: Ammar Faizi Cc: Muhammad Rizki , GNU/Weeb Mailing List , Alviro Iskandar Setiawan Subject: [RFC PATCH v1 5/5] Refactor many files Date: Tue, 6 Sep 2022 18:19:29 +0700 Message-Id: <20220906111929.1657-6-kiizuha@gnuweeb.org> X-Mailer: git-send-email 2.34.1.windows.1 In-Reply-To: <20220906111929.1657-1-kiizuha@gnuweeb.org> References: <20220906111929.1657-1-kiizuha@gnuweeb.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit List-Id: I want to refactor atom scraper file and utility file and create a directory for both of it to make it reuseable in the future use. This commit contains: - Rename some functions in utils - Rename file name in telegram such as scraper => mailer, bot.py => listener - Move class Mutexes to utility file - Rename the Mutexes attribute send_to_tg => lock - Changes affected codes during this refactor Signed-off-by: Muhammad Rizki --- .gitignore | 1 + daemon/atom/__init__.py | 7 ++ daemon/{telegram/scraper => atom}/scraper.py | 12 +-- daemon/{telegram/scraper => atom}/utils.py | 87 ++++++++++++++----- daemon/{telegram => }/db.sql | 0 .../.env.example => telegram.env.example} | 0 .../telegram/{scraper => mailer}/__init__.py | 4 +- .../{scraper/bot.py => mailer/listener.py} | 23 ++--- daemon/telegram/packages/client.py | 10 ++- .../packages/plugins/callbacks/del_atom.py | 6 +- .../packages/plugins/callbacks/del_chat.py | 6 +- .../packages/plugins/commands/debugger.py | 2 +- .../packages/plugins/commands/manage_atom.py | 6 +- .../plugins/commands/manage_broadcast.py | 6 +- .../packages/plugins/commands/scrape.py | 10 +-- daemon/{telegram/run.py => tg.py} | 24 +++-- 16 files changed, 122 insertions(+), 82 deletions(-) create mode 100644 daemon/atom/__init__.py rename daemon/{telegram/scraper => atom}/scraper.py (79%) rename daemon/{telegram/scraper => atom}/utils.py (72%) rename daemon/{telegram => }/db.sql (100%) rename daemon/{telegram/.env.example => telegram.env.example} (100%) rename daemon/telegram/{scraper => mailer}/__init__.py (68%) rename daemon/telegram/{scraper/bot.py => mailer/listener.py} (88%) rename daemon/{telegram/run.py => tg.py} (68%) diff --git a/.gitignore b/.gitignore index 4201a17..53027d9 100644 --- a/.gitignore +++ b/.gitignore @@ -140,5 +140,6 @@ data.json *.patch # configuration file +daemon/*.env daemon/telegram/config.py daemon/discord/config.py diff --git a/daemon/atom/__init__.py b/daemon/atom/__init__.py new file mode 100644 index 0000000..2fe4e31 --- /dev/null +++ b/daemon/atom/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2022 Muhammad Rizki +# Copyright (C) 2022 Ammar Faizi +# + +from .scraper import Scraper diff --git a/daemon/telegram/scraper/scraper.py b/daemon/atom/scraper.py similarity index 79% rename from daemon/telegram/scraper/scraper.py rename to daemon/atom/scraper.py index 2d5942b..8508ae9 100644 --- a/daemon/telegram/scraper/scraper.py +++ b/daemon/atom/scraper.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # -# Copyright (C) 2022 Muhammad Rizki +# Copyright (C) 2022 Muhammad Rizki # Copyright (C) 2022 Ammar Faizi # @@ -11,7 +11,7 @@ import httpx import email -class Scraper(): +class Scraper: async def get_new_threads_urls(self, atom_url): ret = await self.__get_atom_content(atom_url) return await self.__get_new_threads_from_atom(ret) @@ -19,10 +19,10 @@ class Scraper(): async def __get_atom_content(self, atom_url): async with httpx.AsyncClient() as client: - res = await client.get(atom_url) + res = await client.get(atom_url, timeout=20) if res.status_code == 200: return res.text - raise Exception(f"[get_atom_content]: Returned {res.status_code} HTTP code") + raise Exception(f"[__get_atom_content]: Returned {res.status_code} HTTP code") async def __get_new_threads_from_atom(self, atom): @@ -54,10 +54,10 @@ class Scraper(): async def get_email_from_url(self, url): async with httpx.AsyncClient() as client: - res = await client.get(url) + res = await client.get(url, timeout=20) if res.status_code == 200: return email.message_from_string( res.text, policy=email.policy.default ) - raise Exception(f"[get_atom_content]: Returned {res.status_code} HTTP code") + raise Exception(f"[get_email_from_url]: Returned {res.status_code} HTTP code") diff --git a/daemon/telegram/scraper/utils.py b/daemon/atom/utils.py similarity index 72% rename from daemon/telegram/scraper/utils.py rename to daemon/atom/utils.py index c428a33..d73d6bd 100644 --- a/daemon/telegram/scraper/utils.py +++ b/daemon/atom/utils.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # -# Copyright (C) 2022 Muhammad Rizki +# Copyright (C) 2022 Muhammad Rizki # Copyright (C) 2022 Ammar Faizi # @@ -8,13 +8,19 @@ from pyrogram.types import Chat, InlineKeyboardMarkup, InlineKeyboardButton from email.message import Message from typing import Dict from slugify import slugify +import html import hashlib import uuid import os import re import shutil import httpx -import html +import asyncio + + +class Mutexes: + def __init__(self): + self.lock = asyncio.Lock() def get_email_msg_id(mail): @@ -113,25 +119,37 @@ def consruct_to_n_cc(to: list, cc: list): return ret -def gen_temp(name: str): +def gen_temp(name: str, platform: str): + platform = platform.lower() + plt_ls = ["telegram", "discord"] + + if platform not in plt_ls: + t = f"Platform {platform} is not found, " + t += f"only {', '.join(plt_ls)} is available" + raise ValueError(f"Platform {platform} is not found") + md5 = hashlib.md5(name.encode()).hexdigest() - ret = os.getenv("STORAGE_DIR", "storage") + "/" + md5 + store_dir = os.getenv("STORAGE_DIR", "storage") + platform = platform.replace("discord", "dscord") + path = f"{platform}/{store_dir}/{md5}" try: - os.mkdir(ret) + os.mkdir(path) except FileExistsError: pass - return ret + return path -def extract_body(thread: Message): +def extract_body(thread: Message, platform: str): if not thread.is_multipart(): - p = thread.get_payload(decode=True) - return f"{p.decode(errors='replace')}\n".lstrip(), [] + p = thread.get_payload(decode=True).decode(errors='replace') + if platform == "discord": + p = quote_reply(p) + return f"{p}\n".lstrip(), [] ret = "" files = [] - temp = gen_temp(str(uuid.uuid4())) + temp = gen_temp(str(uuid.uuid4()), platform) for p in thread.get_payload(): fname = p.get_filename() payload = p.get_payload(decode=True) @@ -164,35 +182,42 @@ def __is_patch(subject, content): return True -def create_template(thread: Message, to=None, cc=None): +def create_template(thread: Message, platform: str, to=None, cc=None): if not to: to = extract_list("to", thread) if not cc: cc = extract_list("cc", thread) + if platform == "telegram": + substr = 4000 + border = f"\n{'-'*72}" + else: + substr = 1900 + border = f"\n{'-'*80}" subject = thread.get('subject') ret = f"From: {thread.get('from')}\n" ret += consruct_to_n_cc(to, cc) ret += f"Date: {thread.get('date')}\n" ret += f"Subject: {subject}\n\n" - content, files = extract_body(thread) + content, files = extract_body(thread, platform) is_patch = __is_patch(subject, content) if is_patch: ret += content else: ret += content.strip().replace("\t", " ") - if len(ret) >= 4000: - ret = ret[:4000] + "..." - ret = fix_utf8_char(ret) - ret += f"\n{'-'*72}" + if len(ret) >= substr: + ret = ret[:substr] + "..." + + ret = fix_utf8_char(ret, platform == "telegram") + ret += border return ret, files, is_patch -def prepare_send_patch(mail, text, url): - tmp = gen_temp(url) +def prepare_patch(mail: "Message", text: str, url: str, platform: str): + tmp = gen_temp(url, platform) fnm = str(mail.get("subject")) sch = re.search(PATCH_PATTERN, fnm, re.IGNORECASE) @@ -210,17 +235,31 @@ def prepare_send_patch(mail, text, url): with open(file, "wb") as f: f.write(bytes(text, encoding="utf8")) - caption = "#patch #ml\n" + fix_utf8_char(cap) + caption = "#patch #ml" + if platform == "telegram": + caption += fix_utf8_char("\n" + cap, True) return tmp, file, caption, url -def clean_up_after_send_patch(tmp): +def remove_patch(tmp): shutil.rmtree(tmp) -def fix_utf8_char(text: str): - text = text.rstrip().replace("�"," ") - return html.escape(html.escape(text)) +def fix_utf8_char(text: str, html_escape: bool = True): + t = text.rstrip().replace("�"," ") + if html_escape: + t = html.escape(html.escape(text)) + return t + + +def quote_reply(text: str): + a = "" + for b in text.split("\n"): + b = b.replace(">\n", "> ") + if b.startswith(">"): + a += "> " + a += f"{b}\n" + return a EMAIL_MSG_ID_PATTERN = r"<([^\<\>]+)>" @@ -240,6 +279,8 @@ async def is_atom_url(text: str): return mime == "application/atom+xml" except: return False + + def remove_command(text: str): txt = text.split(" ") txt = text.replace(txt[0] + " ","") diff --git a/daemon/telegram/db.sql b/daemon/db.sql similarity index 100% rename from daemon/telegram/db.sql rename to daemon/db.sql diff --git a/daemon/telegram/.env.example b/daemon/telegram.env.example similarity index 100% rename from daemon/telegram/.env.example rename to daemon/telegram.env.example diff --git a/daemon/telegram/scraper/__init__.py b/daemon/telegram/mailer/__init__.py similarity index 68% rename from daemon/telegram/scraper/__init__.py rename to daemon/telegram/mailer/__init__.py index 4294302..20f9034 100644 --- a/daemon/telegram/scraper/__init__.py +++ b/daemon/telegram/mailer/__init__.py @@ -4,6 +4,4 @@ # Copyright (C) 2022 Ammar Faizi # -from .scraper import Scraper -from .bot import BotMutexes -from .bot import Bot +from .listener import Listener diff --git a/daemon/telegram/scraper/bot.py b/daemon/telegram/mailer/listener.py similarity index 88% rename from daemon/telegram/scraper/bot.py rename to daemon/telegram/mailer/listener.py index a7087ad..5e9acd2 100644 --- a/daemon/telegram/scraper/bot.py +++ b/daemon/telegram/mailer/listener.py @@ -6,26 +6,21 @@ from pyrogram.types import Message from apscheduler.schedulers.asyncio import AsyncIOScheduler -from packages import DaemonClient -from scraper import Scraper -from . import utils +from telegram.packages import DaemonClient +from atom import Scraper +from atom import utils import asyncio import shutil import re import traceback -class BotMutexes(): - def __init__(self): - self.send_to_tg = asyncio.Lock() - - -class Bot(): +class Listener: def __init__(self, client: DaemonClient, sched: AsyncIOScheduler, - scraper: Scraper, mutexes: BotMutexes): + mutexes: utils.Mutexes): self.client = client self.sched = sched - self.scraper = scraper + self.scraper = Scraper() self.mutexes = mutexes self.db = client.db self.isRunnerFixed = False @@ -72,7 +67,7 @@ class Bot(): async def __handle_mail(self, url, mail): chats = self.db.get_broadcast_chats() for chat in chats: - async with self.mutexes.send_to_tg: + async with self.mutexes.lock: should_wait = await self.__send_mail(url, mail, chat[1]) @@ -80,7 +75,7 @@ class Bot(): await asyncio.sleep(1) - # @__must_hold(self.mutexes.send_to_tg) + # @__must_hold(self.mutexes.lock) async def __send_mail(self, url, mail, tg_chat_id): email_msg_id = utils.get_email_msg_id(mail) if not email_msg_id: @@ -99,7 +94,7 @@ class Bot(): # return False - text, files, is_patch = utils.create_template(mail) + text, files, is_patch = utils.create_template(mail, "telegram") reply_to = self.get_reply(mail, tg_chat_id) url = str(re.sub(r"/raw$", "", url)) diff --git a/daemon/telegram/packages/client.py b/daemon/telegram/packages/client.py index 820c3e2..686e5ef 100644 --- a/daemon/telegram/packages/client.py +++ b/daemon/telegram/packages/client.py @@ -8,8 +8,8 @@ from pyrogram.enums import ParseMode from pyrogram.types import Message, InlineKeyboardMarkup, InlineKeyboardButton from typing import Union from email.message import Message -from scraper import utils -from database import DB +from atom import utils +from telegram.database import DB from .decorator import handle_flood @@ -56,7 +56,9 @@ class DaemonClient(Client): parse_mode: ParseMode = ParseMode.HTML ) -> Message: print("[send_patch_email]") - tmp, doc, caption, url = utils.prepare_send_patch(mail, text, url) + tmp, doc, caption, url = utils.prepare_patch( + mail, text, url, "telegram" + ) m = await self.send_document( chat_id=chat_id, document=doc, @@ -71,5 +73,5 @@ class DaemonClient(Client): ]) ) - utils.clean_up_after_send_patch(tmp) + utils.remove_patch(tmp) return m diff --git a/daemon/telegram/packages/plugins/callbacks/del_atom.py b/daemon/telegram/packages/plugins/callbacks/del_atom.py index 1510d60..b750e1c 100644 --- a/daemon/telegram/packages/plugins/callbacks/del_atom.py +++ b/daemon/telegram/packages/plugins/callbacks/del_atom.py @@ -3,10 +3,10 @@ # Copyright (C) 2022 Muhammad Rizki # -from packages import DaemonClient -from scraper import utils +from telegram.packages import DaemonClient +from atom import utils from pyrogram.types import CallbackQuery -import config +from telegram import config @DaemonClient.on_callback_query(config.admin_only, group=1) diff --git a/daemon/telegram/packages/plugins/callbacks/del_chat.py b/daemon/telegram/packages/plugins/callbacks/del_chat.py index 26c6dd8..90b557e 100644 --- a/daemon/telegram/packages/plugins/callbacks/del_chat.py +++ b/daemon/telegram/packages/plugins/callbacks/del_chat.py @@ -3,10 +3,10 @@ # Copyright (C) 2022 Muhammad Rizki # -from packages import DaemonClient -from scraper import utils +from telegram.packages import DaemonClient +from atom import utils from pyrogram.types import CallbackQuery -import config +from telegram import config @DaemonClient.on_callback_query(config.admin_only, group=2) diff --git a/daemon/telegram/packages/plugins/commands/debugger.py b/daemon/telegram/packages/plugins/commands/debugger.py index ae2d31d..7f6f367 100644 --- a/daemon/telegram/packages/plugins/commands/debugger.py +++ b/daemon/telegram/packages/plugins/commands/debugger.py @@ -7,7 +7,7 @@ from pyrogram import Client, filters, enums from pyrogram.types import Message from textwrap import indent import io, import_expression, contextlib, traceback -import config +from telegram import config @Client.on_message( diff --git a/daemon/telegram/packages/plugins/commands/manage_atom.py b/daemon/telegram/packages/plugins/commands/manage_atom.py index 4ba422a..99df7f7 100644 --- a/daemon/telegram/packages/plugins/commands/manage_atom.py +++ b/daemon/telegram/packages/plugins/commands/manage_atom.py @@ -5,9 +5,9 @@ from pyrogram.types import Message from pyrogram import filters -from packages import DaemonClient -from scraper import utils -import config +from telegram.packages import DaemonClient +from atom import utils +from telegram import config @DaemonClient.on_message( diff --git a/daemon/telegram/packages/plugins/commands/manage_broadcast.py b/daemon/telegram/packages/plugins/commands/manage_broadcast.py index 6d75c36..0aa70de 100644 --- a/daemon/telegram/packages/plugins/commands/manage_broadcast.py +++ b/daemon/telegram/packages/plugins/commands/manage_broadcast.py @@ -5,9 +5,9 @@ from pyrogram.types import Message from pyrogram import filters, enums -from packages import DaemonClient -from scraper import utils -import config +from telegram.packages import DaemonClient +from atom import utils +from telegram import config @DaemonClient.on_message( diff --git a/daemon/telegram/packages/plugins/commands/scrape.py b/daemon/telegram/packages/plugins/commands/scrape.py index 45b1581..4cdbf1c 100644 --- a/daemon/telegram/packages/plugins/commands/scrape.py +++ b/daemon/telegram/packages/plugins/commands/scrape.py @@ -6,10 +6,10 @@ from pyrogram.types import Message from pyrogram import filters -from packages import DaemonClient -from scraper import Scraper -from scraper import utils -import config +from telegram.packages import DaemonClient +from atom import Scraper +from atom import utils +from telegram import config import shutil import re import asyncio @@ -37,7 +37,7 @@ async def scrap_email(c: DaemonClient, m: Message): s = Scraper() mail = await s.get_email_from_url(url) - text, files, is_patch = utils.create_template(mail) + text, files, is_patch = utils.create_template(mail, "telegram") if is_patch: m = await c.send_patch_email( diff --git a/daemon/telegram/run.py b/daemon/tg.py similarity index 68% rename from daemon/telegram/run.py rename to daemon/tg.py index 5360395..c3e85ab 100644 --- a/daemon/telegram/run.py +++ b/daemon/tg.py @@ -1,24 +1,23 @@ # SPDX-License-Identifier: GPL-2.0-only # -# Copyright (C) 2022 Muhammad Rizki +# Copyright (C) 2022 Muhammad Rizki # Copyright (C) 2022 Ammar Faizi # from apscheduler.schedulers.asyncio import AsyncIOScheduler -from scraper import BotMutexes +from atom.utils import Mutexes from dotenv import load_dotenv from mysql import connector -from packages import DaemonClient -from scraper import Scraper -from scraper import Bot +from telegram.packages import DaemonClient +from telegram.mailer import Listener import os def main(): - load_dotenv() + load_dotenv("telegram.env") client = DaemonClient( - "storage/EmailScraper", + "telegram/storage/EmailScraper", api_id=int(os.getenv("API_ID")), api_hash=os.getenv("API_HASH"), bot_token=os.getenv("BOT_TOKEN"), @@ -28,9 +27,7 @@ def main(): password=os.getenv("DB_PASS"), database=os.getenv("DB_NAME") ), - plugins=dict( - root="packages.plugins" - ), + plugins=dict(root="telegram.packages.plugins") ) sched = AsyncIOScheduler( @@ -40,14 +37,13 @@ def main(): } ) - bot = Bot( + mailer = Listener( client=client, sched=sched, - scraper=Scraper(), - mutexes=BotMutexes() + mutexes=Mutexes() ) sched.start() - bot.run() + mailer.run() client.run() -- Muhammad Rizki