* [PATCH] generic scraper: add chat scraper
@ 2021-08-13 14:37 Alviro Iskandar Setiawan
2021-08-13 14:37 ` Alviro Iskandar Setiawan
0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-13 14:37 UTC (permalink / raw)
To: Ammar Faizi; +Cc: GNU/Weeb Mailing List
Hi sir @ammarfaizi2, this is a small patch for chat scraper and database
bot telegram. Please review my work, if you think it's good please merge
---
Alviro Iskandar Setiawan
--
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] generic scraper: add chat scraper
2021-08-13 14:37 [PATCH] generic scraper: add chat scraper Alviro Iskandar Setiawan
@ 2021-08-13 14:37 ` Alviro Iskandar Setiawan
2021-08-13 16:45 ` Ammar Faizi
0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-13 14:37 UTC (permalink / raw)
To: Ammar Faizi; +Cc: Alviro Iskandar Setiawan, GNU/Weeb Mailing List
Changes:
- Create tgvisd::Scrapers::ChatScraper class.
- Move `db_` object from tgvisd::Scraper to
tgvisd::Scrapers::ChatScraper.
Each scraper instance should have its own class inside the namespace
tgvisd::Scrapers. Each of them should also has its own database
connection instance if needed.
Cc: Ammar Faizi <[email protected]>
Cc: GNU/Weeb Mailing List <[email protected]>
Signed-off-by: Alviro Iskandar Setiawan <[email protected]>
---
CMakeLists.txt | 2 +
src/tgvisd/Main.hpp | 5 +++
src/tgvisd/Scraper.cpp | 48 ++++++++++----------
src/tgvisd/Scraper.hpp | 10 ++++-
src/tgvisd/Scrapers/ChatScraper.cpp | 69 +++++++++++++++++++++++++++++
src/tgvisd/Scrapers/ChatScraper.hpp | 47 ++++++++++++++++++++
6 files changed, 156 insertions(+), 25 deletions(-)
create mode 100644 src/tgvisd/Scrapers/ChatScraper.cpp
create mode 100644 src/tgvisd/Scrapers/ChatScraper.hpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f97fc83..4d32c4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,8 @@ set(TGVISD_TD_SOURCE
set(TGVISD_CORE_SOURCE
src/tgvisd/DBFunc/DBStatement.cpp
src/tgvisd/DBFunc/DBStatement.hpp
+ src/tgvisd/Scrapers/ChatScraper.cpp
+ src/tgvisd/Scrapers/ChatScraper.hpp
src/tgvisd/common.hpp
src/tgvisd/DB.cpp
src/tgvisd/DB.hpp
diff --git a/src/tgvisd/Main.hpp b/src/tgvisd/Main.hpp
index 6067986..60c1cba 100644
--- a/src/tgvisd/Main.hpp
+++ b/src/tgvisd/Main.hpp
@@ -38,6 +38,11 @@ public:
return isReady_;
}
+ inline tgvisd::Td::Td *getTd(void)
+ {
+ return &td_;
+ }
+
private:
tgvisd::Td::Td td_;
volatile bool isReady_ = false;
diff --git a/src/tgvisd/Scraper.cpp b/src/tgvisd/Scraper.cpp
index d6221ab..c8e5c92 100644
--- a/src/tgvisd/Scraper.cpp
+++ b/src/tgvisd/Scraper.cpp
@@ -35,28 +35,35 @@ Scraper::Scraper(Main *main, std::thread *threadPtr):
Scraper::~Scraper(void)
{
- if (db_) {
- delete db_;
- db_ = nullptr;
+ if (chatScraper_) {
+ delete chatScraper_;
+ chatScraper_ = nullptr;
}
}
-static void run_scraper(Scraper *s, Main *main, DB *db)
+void Scraper::runScrapers(void)
{
- auto st = db->prepare("SELECT eeee;");
- st->execute();
- if (auto row = st->fetch()) {
- int ft;
- char buffer[64];
- auto stmt = st->getStmt();
- size_t len = sizeof(buffer);
-
- ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
- mysql_fetch_chk(ft, stmt);
- pr_notice("buf = %s (len = %zu)", buffer, len);
- }
- sleep(1);
+ // auto st = db->prepare("SELECT eeee;");
+ // st->execute();
+ // if (auto row = st->fetch()) {
+ // int ft;
+ // char buffer[64];
+ // auto stmt = st->getStmt();
+ // size_t len = sizeof(buffer);
+
+ // ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
+ // mysql_fetch_chk(ft, stmt);
+ // pr_notice("buf = %s (len = %zu)", buffer, len);
+ // }
+ // sleep(1);
+
+ chatScraper_ = new tgvisd::Scrapers::ChatScraper(this);
+
+ std::thread chatScraper([this]{
+ this->chatScraper_->run();
+ });
+ chatScraper.join();
}
@@ -68,12 +75,7 @@ void Scraper::run(void)
}
try {
- db_ = DB::create_conn_from_env();
- db_->connect();
-
- while (!main_->getStop())
- run_scraper(this, main_, db_);
-
+ runScrapers();
} catch (std::runtime_error &e) {
pr_err("std::runtime_error: %s", e.what());
main_->doStop();
diff --git a/src/tgvisd/Scraper.hpp b/src/tgvisd/Scraper.hpp
index 75bc6bf..a7308d0 100644
--- a/src/tgvisd/Scraper.hpp
+++ b/src/tgvisd/Scraper.hpp
@@ -10,11 +10,11 @@
#ifndef TGVISD__SCRAPER_HPP
#define TGVISD__SCRAPER_HPP
-#include <tgvisd/DB.hpp>
#include <tgvisd/Td/Td.hpp>
#include <tgvisd/common.hpp>
#include <tgvisd/Main.hpp>
+#include <tgvisd/Scrapers/ChatScraper.hpp>
namespace tgvisd {
@@ -25,9 +25,15 @@ public:
~Scraper(void);
void run(void);
+ inline Main *getMain(void)
+ {
+ return main_;
+ }
+
private:
- DB *db_ = nullptr;
+ void runScrapers(void);
Main *main_ = nullptr;
+ tgvisd::Scrapers::ChatScraper *chatScraper_ = nullptr;
};
} /* namespace tgvisd */
diff --git a/src/tgvisd/Scrapers/ChatScraper.cpp b/src/tgvisd/Scrapers/ChatScraper.cpp
new file mode 100644
index 0000000..1fe5ce8
--- /dev/null
+++ b/src/tgvisd/Scrapers/ChatScraper.cpp
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author Alviro Iskandar Setiawan <[email protected]>
+ * @license GPL-2.0
+ * @package tgvisd
+ *
+ * Copyright (C) 2021 Alviro Iskandar Setiawan <[email protected]>
+ */
+
+#include <limits>
+#include <tgvisd/Scrapers/ChatScraper.hpp>
+
+namespace tgvisd::Scrapers {
+
+
+ChatScraper::ChatScraper(tgvisd::Scraper *scraper):
+ scraper_(scraper)
+{
+ pr_debug("Initializing chat scraper...");
+ db_ = DB::create_conn_from_env();
+ db_->connect();
+ td_ = scraper_->getMain()->getTd();
+}
+
+
+ChatScraper::~ChatScraper(void)
+{
+ if (db_) {
+ delete db_;
+ db_ = nullptr;
+ }
+}
+
+
+void ChatScraper::run(void)
+{
+ /*
+ * This function retrieves the list of chat_ids.
+ */
+ pr_debug("ChatScraper: Getting chatList...");
+ auto chats = getChat(
+ nullptr,
+ std::numeric_limits<std::int64_t>::max(),
+ 0,
+ 300
+ );
+ pr_debug("ChatScraper: Got %d chat ID(s)", chats->total_count_);
+}
+
+
+td_api::object_ptr<td_api::chats> ChatScraper::getChat(
+ td_api::object_ptr<td_api::ChatList> &&chat_list,
+ int64_t offset_order,
+ int64_t offset_chat_id,
+ int32_t limit
+ )
+{
+ return td_->send_query_sync<td_api::getChats, td_api::chats>(
+ td_api::make_object<td_api::getChats>(
+ std::move(chat_list),
+ offset_order,
+ offset_chat_id,
+ limit
+ )
+ );
+}
+
+
+} /* namespace tgvisd::Scrapers */
diff --git a/src/tgvisd/Scrapers/ChatScraper.hpp b/src/tgvisd/Scrapers/ChatScraper.hpp
new file mode 100644
index 0000000..c851b6c
--- /dev/null
+++ b/src/tgvisd/Scrapers/ChatScraper.hpp
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author Alviro Iskandar Setiawan <[email protected]>
+ * @license GPL-2.0
+ * @package tgvisd
+ *
+ * Copyright (C) 2021 Alviro Iskandar Setiawan <[email protected]>
+ */
+
+#ifndef TGVISD__SCRAPERS__CHATSCRAPER_HPP
+#define TGVISD__SCRAPERS__CHATSCRAPER_HPP
+
+#include <tgvisd/DB.hpp>
+#include <tgvisd/Td/Td.hpp>
+#include <tgvisd/common.hpp>
+
+namespace tgvisd {
+
+class Scraper;
+
+} /* namespace tgvisd */
+
+namespace tgvisd::Scrapers {
+
+class ChatScraper {
+public:
+ ChatScraper(tgvisd::Scraper *scraper);
+ ~ChatScraper(void);
+ void run(void);
+
+ td_api::object_ptr<td_api::chats> getChat(
+ td_api::object_ptr<td_api::ChatList> &&chat_list,
+ int64_t offset_order,
+ int64_t offset_chat_id,
+ int32_t limit
+ );
+private:
+ tgvisd::DB *db_ = nullptr;
+ tgvisd::Td::Td *td_ = nullptr;
+ tgvisd::Scraper *scraper_ = nullptr;
+};
+
+} /* namespace tgvisd::Scrapers */
+
+#include <tgvisd/Scraper.hpp>
+
+#endif /* #ifndef TGVISD__SCRAPERS__CHATSCRAPER_HPP */
--
2.30.2
--
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] generic scraper: add chat scraper
2021-08-13 14:37 ` Alviro Iskandar Setiawan
@ 2021-08-13 16:45 ` Ammar Faizi
2021-08-14 6:40 ` [PATCH] Scraper: don't make runScrapers() be class method Alviro Iskandar Setiawan
0 siblings, 1 reply; 6+ messages in thread
From: Ammar Faizi @ 2021-08-13 16:45 UTC (permalink / raw)
To: Alviro Iskandar Setiawan; +Cc: GNU/Weeb Mailing List
Hi Alviro,
Thank you for your contribution to GNU/Weeb project, here is my code
review for your patch. Please address my request and I will merge your
patch to the main repo. I only see one problem with your patch.
> -static void run_scraper(Scraper *s, Main *main, DB *db)
> +void Scraper::runScrapers(void)
> {
> - auto st = db->prepare("SELECT eeee;");
> - st->execute();
> - if (auto row = st->fetch()) {
> - int ft;
> - char buffer[64];
> - auto stmt = st->getStmt();
> - size_t len = sizeof(buffer);
> -
> - ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
> - mysql_fetch_chk(ft, stmt);
> - pr_notice("buf = %s (len = %zu)", buffer, len);
> - }
> - sleep(1);
> + // auto st = db->prepare("SELECT eeee;");
> + // st->execute();
> + // if (auto row = st->fetch()) {
> + // int ft;
> + // char buffer[64];
> + // auto stmt = st->getStmt();
> + // size_t len = sizeof(buffer);
> +
> + // ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
> + // mysql_fetch_chk(ft, stmt);
> + // pr_notice("buf = %s (len = %zu)", buffer, len);
> + // }
> + // sleep(1);
> +
> + chatScraper_ = new tgvisd::Scrapers::ChatScraper(this);
> +
> + std::thread chatScraper([this]{
> + this->chatScraper_->run();
> + });
> + chatScraper.join();
> }
Don't plug runScrapers() inside the class Scraper, this can be inlined
inside Scraper::run(). So please use static function without class.
Just that, the rest is fine to me.
Regards,
Ammar
--
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] Scraper: don't make runScrapers() be class method
2021-08-13 16:45 ` Ammar Faizi
@ 2021-08-14 6:40 ` Alviro Iskandar Setiawan
2021-08-14 6:40 ` Alviro Iskandar Setiawan
0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-14 6:40 UTC (permalink / raw)
To: Ammar Faizi; +Cc: GNU/Weeb Mailing List
Hi sir @ammarfaizi2, this is my patch revision for the runScrapers()
function, please review again.
--
Alviro Iskandar Setiawan
--
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] Scraper: don't make runScrapers() be class method
2021-08-14 6:40 ` [PATCH] Scraper: don't make runScrapers() be class method Alviro Iskandar Setiawan
@ 2021-08-14 6:40 ` Alviro Iskandar Setiawan
2021-08-14 7:01 ` Ammar Faizi
0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-14 6:40 UTC (permalink / raw)
To: Ammar Faizi; +Cc: Alviro Iskandar Setiawan, GNU/Weeb Mailing List
Don't plug runScrapers() inside the class Scraper, this can be inlined
inside Scraper::run().
Cc: Ammar Faizi <[email protected]>
Cc: GNU/Weeb Mailing List <[email protected]>
Link: https://gwml.gnuweeb.org/pipermail/gwml/20210813/000068.html
Signed-off-by: Alviro Iskandar Setiawan <[email protected]>
---
src/tgvisd/Scraper.cpp | 10 +++++-----
src/tgvisd/Scraper.hpp | 4 ++--
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/tgvisd/Scraper.cpp b/src/tgvisd/Scraper.cpp
index c8e5c92..b5dfedd 100644
--- a/src/tgvisd/Scraper.cpp
+++ b/src/tgvisd/Scraper.cpp
@@ -42,7 +42,7 @@ Scraper::~Scraper(void)
}
-void Scraper::runScrapers(void)
+static void runScrapers(Scraper *sc)
{
// auto st = db->prepare("SELECT eeee;");
// st->execute();
@@ -58,10 +58,10 @@ void Scraper::runScrapers(void)
// }
// sleep(1);
- chatScraper_ = new tgvisd::Scrapers::ChatScraper(this);
+ sc->chatScraper_ = new tgvisd::Scrapers::ChatScraper(sc);
- std::thread chatScraper([this]{
- this->chatScraper_->run();
+ std::thread chatScraper([sc]{
+ sc->chatScraper_->run();
});
chatScraper.join();
}
@@ -75,7 +75,7 @@ void Scraper::run(void)
}
try {
- runScrapers();
+ runScrapers(this);
} catch (std::runtime_error &e) {
pr_err("std::runtime_error: %s", e.what());
main_->doStop();
diff --git a/src/tgvisd/Scraper.hpp b/src/tgvisd/Scraper.hpp
index a7308d0..1d019e2 100644
--- a/src/tgvisd/Scraper.hpp
+++ b/src/tgvisd/Scraper.hpp
@@ -30,10 +30,10 @@ public:
return main_;
}
+ tgvisd::Scrapers::ChatScraper *chatScraper_ = nullptr;
+
private:
- void runScrapers(void);
Main *main_ = nullptr;
- tgvisd::Scrapers::ChatScraper *chatScraper_ = nullptr;
};
} /* namespace tgvisd */
--
2.30.2
--
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] Scraper: don't make runScrapers() be class method
2021-08-14 6:40 ` Alviro Iskandar Setiawan
@ 2021-08-14 7:01 ` Ammar Faizi
0 siblings, 0 replies; 6+ messages in thread
From: Ammar Faizi @ 2021-08-14 7:01 UTC (permalink / raw)
To: Alviro Iskandar Setiawan; +Cc: GNU/Weeb Mailing List
Applied, thanks.
* patch_from_alviro:
Scraper: don't make runScrapers() be class method
generic scraper: add chat scraper
--
Ammar
--
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2021-08-14 7:01 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-08-13 14:37 [PATCH] generic scraper: add chat scraper Alviro Iskandar Setiawan
2021-08-13 14:37 ` Alviro Iskandar Setiawan
2021-08-13 16:45 ` Ammar Faizi
2021-08-14 6:40 ` [PATCH] Scraper: don't make runScrapers() be class method Alviro Iskandar Setiawan
2021-08-14 6:40 ` Alviro Iskandar Setiawan
2021-08-14 7:01 ` Ammar Faizi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox