* [PATCH] generic scraper: add chat scraper
2021-08-13 14:37 [PATCH] generic scraper: add chat scraper Alviro Iskandar Setiawan
@ 2021-08-13 14:37 ` Alviro Iskandar Setiawan
2021-08-13 16:45 ` Ammar Faizi
0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-13 14:37 UTC (permalink / raw)
To: Ammar Faizi; +Cc: Alviro Iskandar Setiawan, GNU/Weeb Mailing List
Changes:
- Create tgvisd::Scrapers::ChatScraper class.
- Move `db_` object from tgvisd::Scraper to
tgvisd::Scrapers::ChatScraper.
Each scraper instance should have its own class inside the namespace
tgvisd::Scrapers. Each of them should also has its own database
connection instance if needed.
Cc: Ammar Faizi <[email protected]>
Cc: GNU/Weeb Mailing List <[email protected]>
Signed-off-by: Alviro Iskandar Setiawan <[email protected]>
---
CMakeLists.txt | 2 +
src/tgvisd/Main.hpp | 5 +++
src/tgvisd/Scraper.cpp | 48 ++++++++++----------
src/tgvisd/Scraper.hpp | 10 ++++-
src/tgvisd/Scrapers/ChatScraper.cpp | 69 +++++++++++++++++++++++++++++
src/tgvisd/Scrapers/ChatScraper.hpp | 47 ++++++++++++++++++++
6 files changed, 156 insertions(+), 25 deletions(-)
create mode 100644 src/tgvisd/Scrapers/ChatScraper.cpp
create mode 100644 src/tgvisd/Scrapers/ChatScraper.hpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f97fc83..4d32c4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,8 @@ set(TGVISD_TD_SOURCE
set(TGVISD_CORE_SOURCE
src/tgvisd/DBFunc/DBStatement.cpp
src/tgvisd/DBFunc/DBStatement.hpp
+ src/tgvisd/Scrapers/ChatScraper.cpp
+ src/tgvisd/Scrapers/ChatScraper.hpp
src/tgvisd/common.hpp
src/tgvisd/DB.cpp
src/tgvisd/DB.hpp
diff --git a/src/tgvisd/Main.hpp b/src/tgvisd/Main.hpp
index 6067986..60c1cba 100644
--- a/src/tgvisd/Main.hpp
+++ b/src/tgvisd/Main.hpp
@@ -38,6 +38,11 @@ public:
return isReady_;
}
+ inline tgvisd::Td::Td *getTd(void)
+ {
+ return &td_;
+ }
+
private:
tgvisd::Td::Td td_;
volatile bool isReady_ = false;
diff --git a/src/tgvisd/Scraper.cpp b/src/tgvisd/Scraper.cpp
index d6221ab..c8e5c92 100644
--- a/src/tgvisd/Scraper.cpp
+++ b/src/tgvisd/Scraper.cpp
@@ -35,28 +35,35 @@ Scraper::Scraper(Main *main, std::thread *threadPtr):
Scraper::~Scraper(void)
{
- if (db_) {
- delete db_;
- db_ = nullptr;
+ if (chatScraper_) {
+ delete chatScraper_;
+ chatScraper_ = nullptr;
}
}
-static void run_scraper(Scraper *s, Main *main, DB *db)
+void Scraper::runScrapers(void)
{
- auto st = db->prepare("SELECT eeee;");
- st->execute();
- if (auto row = st->fetch()) {
- int ft;
- char buffer[64];
- auto stmt = st->getStmt();
- size_t len = sizeof(buffer);
-
- ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
- mysql_fetch_chk(ft, stmt);
- pr_notice("buf = %s (len = %zu)", buffer, len);
- }
- sleep(1);
+ // auto st = db->prepare("SELECT eeee;");
+ // st->execute();
+ // if (auto row = st->fetch()) {
+ // int ft;
+ // char buffer[64];
+ // auto stmt = st->getStmt();
+ // size_t len = sizeof(buffer);
+
+ // ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
+ // mysql_fetch_chk(ft, stmt);
+ // pr_notice("buf = %s (len = %zu)", buffer, len);
+ // }
+ // sleep(1);
+
+ chatScraper_ = new tgvisd::Scrapers::ChatScraper(this);
+
+ std::thread chatScraper([this]{
+ this->chatScraper_->run();
+ });
+ chatScraper.join();
}
@@ -68,12 +75,7 @@ void Scraper::run(void)
}
try {
- db_ = DB::create_conn_from_env();
- db_->connect();
-
- while (!main_->getStop())
- run_scraper(this, main_, db_);
-
+ runScrapers();
} catch (std::runtime_error &e) {
pr_err("std::runtime_error: %s", e.what());
main_->doStop();
diff --git a/src/tgvisd/Scraper.hpp b/src/tgvisd/Scraper.hpp
index 75bc6bf..a7308d0 100644
--- a/src/tgvisd/Scraper.hpp
+++ b/src/tgvisd/Scraper.hpp
@@ -10,11 +10,11 @@
#ifndef TGVISD__SCRAPER_HPP
#define TGVISD__SCRAPER_HPP
-#include <tgvisd/DB.hpp>
#include <tgvisd/Td/Td.hpp>
#include <tgvisd/common.hpp>
#include <tgvisd/Main.hpp>
+#include <tgvisd/Scrapers/ChatScraper.hpp>
namespace tgvisd {
@@ -25,9 +25,15 @@ public:
~Scraper(void);
void run(void);
+ inline Main *getMain(void)
+ {
+ return main_;
+ }
+
private:
- DB *db_ = nullptr;
+ void runScrapers(void);
Main *main_ = nullptr;
+ tgvisd::Scrapers::ChatScraper *chatScraper_ = nullptr;
};
} /* namespace tgvisd */
diff --git a/src/tgvisd/Scrapers/ChatScraper.cpp b/src/tgvisd/Scrapers/ChatScraper.cpp
new file mode 100644
index 0000000..1fe5ce8
--- /dev/null
+++ b/src/tgvisd/Scrapers/ChatScraper.cpp
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author Alviro Iskandar Setiawan <[email protected]>
+ * @license GPL-2.0
+ * @package tgvisd
+ *
+ * Copyright (C) 2021 Alviro Iskandar Setiawan <[email protected]>
+ */
+
+#include <limits>
+#include <tgvisd/Scrapers/ChatScraper.hpp>
+
+namespace tgvisd::Scrapers {
+
+
+ChatScraper::ChatScraper(tgvisd::Scraper *scraper):
+ scraper_(scraper)
+{
+ pr_debug("Initializing chat scraper...");
+ db_ = DB::create_conn_from_env();
+ db_->connect();
+ td_ = scraper_->getMain()->getTd();
+}
+
+
+ChatScraper::~ChatScraper(void)
+{
+ if (db_) {
+ delete db_;
+ db_ = nullptr;
+ }
+}
+
+
+void ChatScraper::run(void)
+{
+ /*
+ * This function retrieves the list of chat_ids.
+ */
+ pr_debug("ChatScraper: Getting chatList...");
+ auto chats = getChat(
+ nullptr,
+ std::numeric_limits<std::int64_t>::max(),
+ 0,
+ 300
+ );
+ pr_debug("ChatScraper: Got %d chat ID(s)", chats->total_count_);
+}
+
+
+td_api::object_ptr<td_api::chats> ChatScraper::getChat(
+ td_api::object_ptr<td_api::ChatList> &&chat_list,
+ int64_t offset_order,
+ int64_t offset_chat_id,
+ int32_t limit
+ )
+{
+ return td_->send_query_sync<td_api::getChats, td_api::chats>(
+ td_api::make_object<td_api::getChats>(
+ std::move(chat_list),
+ offset_order,
+ offset_chat_id,
+ limit
+ )
+ );
+}
+
+
+} /* namespace tgvisd::Scrapers */
diff --git a/src/tgvisd/Scrapers/ChatScraper.hpp b/src/tgvisd/Scrapers/ChatScraper.hpp
new file mode 100644
index 0000000..c851b6c
--- /dev/null
+++ b/src/tgvisd/Scrapers/ChatScraper.hpp
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author Alviro Iskandar Setiawan <[email protected]>
+ * @license GPL-2.0
+ * @package tgvisd
+ *
+ * Copyright (C) 2021 Alviro Iskandar Setiawan <[email protected]>
+ */
+
+#ifndef TGVISD__SCRAPERS__CHATSCRAPER_HPP
+#define TGVISD__SCRAPERS__CHATSCRAPER_HPP
+
+#include <tgvisd/DB.hpp>
+#include <tgvisd/Td/Td.hpp>
+#include <tgvisd/common.hpp>
+
+namespace tgvisd {
+
+class Scraper;
+
+} /* namespace tgvisd */
+
+namespace tgvisd::Scrapers {
+
+class ChatScraper {
+public:
+ ChatScraper(tgvisd::Scraper *scraper);
+ ~ChatScraper(void);
+ void run(void);
+
+ td_api::object_ptr<td_api::chats> getChat(
+ td_api::object_ptr<td_api::ChatList> &&chat_list,
+ int64_t offset_order,
+ int64_t offset_chat_id,
+ int32_t limit
+ );
+private:
+ tgvisd::DB *db_ = nullptr;
+ tgvisd::Td::Td *td_ = nullptr;
+ tgvisd::Scraper *scraper_ = nullptr;
+};
+
+} /* namespace tgvisd::Scrapers */
+
+#include <tgvisd/Scraper.hpp>
+
+#endif /* #ifndef TGVISD__SCRAPERS__CHATSCRAPER_HPP */
--
2.30.2
--
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml
^ permalink raw reply related [flat|nested] 6+ messages in thread