public inbox for [email protected]
 help / color / mirror / Atom feed
From: Alviro Iskandar Setiawan <[email protected]>
To: Ammar Faizi <[email protected]>
Cc: Alviro Iskandar Setiawan <[email protected]>,
	GNU/Weeb Mailing List <[email protected]>
Subject: [PATCH] generic scraper: add chat scraper
Date: Fri, 13 Aug 2021 21:37:31 +0700	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

Changes:
 - Create tgvisd::Scrapers::ChatScraper class.
 - Move `db_` object from tgvisd::Scraper to
   tgvisd::Scrapers::ChatScraper.

Each scraper instance should have its own class inside the namespace
tgvisd::Scrapers. Each of them should also has its own database
connection instance if needed.

Cc: Ammar Faizi <[email protected]>
Cc: GNU/Weeb Mailing List <[email protected]>
Signed-off-by: Alviro Iskandar Setiawan <[email protected]>
---
 CMakeLists.txt                      |  2 +
 src/tgvisd/Main.hpp                 |  5 +++
 src/tgvisd/Scraper.cpp              | 48 ++++++++++----------
 src/tgvisd/Scraper.hpp              | 10 ++++-
 src/tgvisd/Scrapers/ChatScraper.cpp | 69 +++++++++++++++++++++++++++++
 src/tgvisd/Scrapers/ChatScraper.hpp | 47 ++++++++++++++++++++
 6 files changed, 156 insertions(+), 25 deletions(-)
 create mode 100644 src/tgvisd/Scrapers/ChatScraper.cpp
 create mode 100644 src/tgvisd/Scrapers/ChatScraper.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f97fc83..4d32c4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,8 @@ set(TGVISD_TD_SOURCE
 set(TGVISD_CORE_SOURCE
   src/tgvisd/DBFunc/DBStatement.cpp
   src/tgvisd/DBFunc/DBStatement.hpp
+  src/tgvisd/Scrapers/ChatScraper.cpp
+  src/tgvisd/Scrapers/ChatScraper.hpp
   src/tgvisd/common.hpp
   src/tgvisd/DB.cpp
   src/tgvisd/DB.hpp
diff --git a/src/tgvisd/Main.hpp b/src/tgvisd/Main.hpp
index 6067986..60c1cba 100644
--- a/src/tgvisd/Main.hpp
+++ b/src/tgvisd/Main.hpp
@@ -38,6 +38,11 @@ public:
 		return isReady_;
 	}
 
+	inline tgvisd::Td::Td *getTd(void)
+	{
+		return &td_;
+	}
+
 private:
 	tgvisd::Td::Td td_;
 	volatile bool isReady_ = false;
diff --git a/src/tgvisd/Scraper.cpp b/src/tgvisd/Scraper.cpp
index d6221ab..c8e5c92 100644
--- a/src/tgvisd/Scraper.cpp
+++ b/src/tgvisd/Scraper.cpp
@@ -35,28 +35,35 @@ Scraper::Scraper(Main *main, std::thread *threadPtr):
 
 Scraper::~Scraper(void)
 {
-	if (db_) {
-		delete db_;
-		db_ = nullptr;
+	if (chatScraper_) {
+		delete chatScraper_;
+		chatScraper_ = nullptr;
 	}
 }
 
 
-static void run_scraper(Scraper *s, Main *main, DB *db)
+void Scraper::runScrapers(void)
 {
-	auto st = db->prepare("SELECT eeee;");
-	st->execute();
-	if (auto row = st->fetch()) {
-		int ft;
-		char buffer[64];
-		auto stmt = st->getStmt();
-		size_t len = sizeof(buffer);
-
-		ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
-		mysql_fetch_chk(ft, stmt);
-		pr_notice("buf = %s (len = %zu)", buffer, len);
-	}
-	sleep(1);
+	// auto st = db->prepare("SELECT eeee;");
+	// st->execute();
+	// if (auto row = st->fetch()) {
+	// 	int ft;
+	// 	char buffer[64];
+	// 	auto stmt = st->getStmt();
+	// 	size_t len = sizeof(buffer);
+
+	// 	ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
+	// 	mysql_fetch_chk(ft, stmt);
+	// 	pr_notice("buf = %s (len = %zu)", buffer, len);
+	// }
+	// sleep(1);
+
+	chatScraper_ = new tgvisd::Scrapers::ChatScraper(this);
+
+	std::thread chatScraper([this]{
+		this->chatScraper_->run();
+	});
+	chatScraper.join();
 }
 
 
@@ -68,12 +75,7 @@ void Scraper::run(void)
 	}
 
 	try {
-		db_ = DB::create_conn_from_env();
-		db_->connect();
-
-		while (!main_->getStop())
-			run_scraper(this, main_, db_);
-
+		runScrapers();
 	} catch (std::runtime_error &e) {
 		pr_err("std::runtime_error: %s", e.what());
 		main_->doStop();
diff --git a/src/tgvisd/Scraper.hpp b/src/tgvisd/Scraper.hpp
index 75bc6bf..a7308d0 100644
--- a/src/tgvisd/Scraper.hpp
+++ b/src/tgvisd/Scraper.hpp
@@ -10,11 +10,11 @@
 #ifndef TGVISD__SCRAPER_HPP
 #define TGVISD__SCRAPER_HPP
 
-#include <tgvisd/DB.hpp>
 #include <tgvisd/Td/Td.hpp>
 #include <tgvisd/common.hpp>
 
 #include <tgvisd/Main.hpp>
+#include <tgvisd/Scrapers/ChatScraper.hpp>
 
 namespace tgvisd {
 
@@ -25,9 +25,15 @@ public:
 	~Scraper(void);
 	void run(void);
 
+	inline Main *getMain(void)
+	{
+		return main_;
+	}
+
 private:
-	DB *db_ = nullptr;
+	void runScrapers(void);
 	Main *main_ = nullptr;
+	tgvisd::Scrapers::ChatScraper *chatScraper_ = nullptr;
 };
 
 } /* namespace tgvisd */
diff --git a/src/tgvisd/Scrapers/ChatScraper.cpp b/src/tgvisd/Scrapers/ChatScraper.cpp
new file mode 100644
index 0000000..1fe5ce8
--- /dev/null
+++ b/src/tgvisd/Scrapers/ChatScraper.cpp
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author Alviro Iskandar Setiawan <[email protected]>
+ * @license GPL-2.0
+ * @package tgvisd
+ *
+ * Copyright (C) 2021  Alviro Iskandar Setiawan <[email protected]>
+ */
+
+#include <limits>
+#include <tgvisd/Scrapers/ChatScraper.hpp>
+
+namespace tgvisd::Scrapers {
+
+
+ChatScraper::ChatScraper(tgvisd::Scraper *scraper):
+	scraper_(scraper)
+{
+	pr_debug("Initializing chat scraper...");
+	db_ = DB::create_conn_from_env();
+	db_->connect();
+	td_ = scraper_->getMain()->getTd();
+}
+
+
+ChatScraper::~ChatScraper(void)
+{
+	if (db_) {
+		delete db_;
+		db_ = nullptr;
+	}
+}
+
+
+void ChatScraper::run(void)
+{
+	/*
+	 * This function retrieves the list of chat_ids.
+	 */
+	pr_debug("ChatScraper: Getting chatList...");
+	auto chats = getChat(
+		nullptr,
+		std::numeric_limits<std::int64_t>::max(),
+		0,
+		300
+	);
+	pr_debug("ChatScraper: Got %d chat ID(s)", chats->total_count_);
+}
+
+
+td_api::object_ptr<td_api::chats> ChatScraper::getChat(
+		td_api::object_ptr<td_api::ChatList> &&chat_list,
+		int64_t offset_order,
+		int64_t offset_chat_id,
+		int32_t limit
+	)
+{
+	return td_->send_query_sync<td_api::getChats, td_api::chats>(
+		td_api::make_object<td_api::getChats>(
+			std::move(chat_list),
+			offset_order,
+			offset_chat_id,
+			limit
+		)
+	);
+}
+
+
+} /* namespace tgvisd::Scrapers */
diff --git a/src/tgvisd/Scrapers/ChatScraper.hpp b/src/tgvisd/Scrapers/ChatScraper.hpp
new file mode 100644
index 0000000..c851b6c
--- /dev/null
+++ b/src/tgvisd/Scrapers/ChatScraper.hpp
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author Alviro Iskandar Setiawan <[email protected]>
+ * @license GPL-2.0
+ * @package tgvisd
+ *
+ * Copyright (C) 2021  Alviro Iskandar Setiawan <[email protected]>
+ */
+
+#ifndef TGVISD__SCRAPERS__CHATSCRAPER_HPP
+#define TGVISD__SCRAPERS__CHATSCRAPER_HPP
+
+#include <tgvisd/DB.hpp>
+#include <tgvisd/Td/Td.hpp>
+#include <tgvisd/common.hpp>
+
+namespace tgvisd {
+
+class Scraper;
+
+} /* namespace tgvisd */
+
+namespace tgvisd::Scrapers {
+
+class ChatScraper {
+public:
+	ChatScraper(tgvisd::Scraper *scraper);
+	~ChatScraper(void);
+	void run(void);
+
+	td_api::object_ptr<td_api::chats> getChat(
+		td_api::object_ptr<td_api::ChatList> &&chat_list,
+		int64_t offset_order,
+		int64_t offset_chat_id,
+		int32_t limit
+	);
+private:
+	tgvisd::DB *db_ = nullptr;
+	tgvisd::Td::Td *td_ = nullptr;
+	tgvisd::Scraper *scraper_ = nullptr;
+};
+
+} /* namespace tgvisd::Scrapers */
+
+#include <tgvisd/Scraper.hpp>
+
+#endif /* #ifndef TGVISD__SCRAPERS__CHATSCRAPER_HPP */
-- 
2.30.2

-- 
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml

  reply	other threads:[~2021-08-13 14:38 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-13 14:37 [PATCH] generic scraper: add chat scraper Alviro Iskandar Setiawan
2021-08-13 14:37 ` Alviro Iskandar Setiawan [this message]
2021-08-13 16:45   ` Ammar Faizi
2021-08-14  6:40     ` [PATCH] Scraper: don't make runScrapers() be class method Alviro Iskandar Setiawan
2021-08-14  6:40       ` Alviro Iskandar Setiawan
2021-08-14  7:01         ` Ammar Faizi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox