GNU/Weeb Mailing List <[email protected]>
 help / color / mirror / Atom feed
* [PATCH] generic scraper: add chat scraper
@ 2021-08-13 14:37 Alviro Iskandar Setiawan
  2021-08-13 14:37 ` Alviro Iskandar Setiawan
  0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-13 14:37 UTC (permalink / raw)
  To: Ammar Faizi; +Cc: GNU/Weeb Mailing List

Hi sir @ammarfaizi2, this is a small patch for chat scraper and database
bot telegram. Please review my work, if you think it's good please merge

---
Alviro Iskandar Setiawan


-- 
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] generic scraper: add chat scraper
  2021-08-13 14:37 [PATCH] generic scraper: add chat scraper Alviro Iskandar Setiawan
@ 2021-08-13 14:37 ` Alviro Iskandar Setiawan
  2021-08-13 16:45   ` Ammar Faizi
  0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-13 14:37 UTC (permalink / raw)
  To: Ammar Faizi; +Cc: Alviro Iskandar Setiawan, GNU/Weeb Mailing List

Changes:
 - Create tgvisd::Scrapers::ChatScraper class.
 - Move `db_` object from tgvisd::Scraper to
   tgvisd::Scrapers::ChatScraper.

Each scraper instance should have its own class inside the namespace
tgvisd::Scrapers. Each of them should also has its own database
connection instance if needed.

Cc: Ammar Faizi <[email protected]>
Cc: GNU/Weeb Mailing List <[email protected]>
Signed-off-by: Alviro Iskandar Setiawan <[email protected]>
---
 CMakeLists.txt                      |  2 +
 src/tgvisd/Main.hpp                 |  5 +++
 src/tgvisd/Scraper.cpp              | 48 ++++++++++----------
 src/tgvisd/Scraper.hpp              | 10 ++++-
 src/tgvisd/Scrapers/ChatScraper.cpp | 69 +++++++++++++++++++++++++++++
 src/tgvisd/Scrapers/ChatScraper.hpp | 47 ++++++++++++++++++++
 6 files changed, 156 insertions(+), 25 deletions(-)
 create mode 100644 src/tgvisd/Scrapers/ChatScraper.cpp
 create mode 100644 src/tgvisd/Scrapers/ChatScraper.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f97fc83..4d32c4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,8 @@ set(TGVISD_TD_SOURCE
 set(TGVISD_CORE_SOURCE
   src/tgvisd/DBFunc/DBStatement.cpp
   src/tgvisd/DBFunc/DBStatement.hpp
+  src/tgvisd/Scrapers/ChatScraper.cpp
+  src/tgvisd/Scrapers/ChatScraper.hpp
   src/tgvisd/common.hpp
   src/tgvisd/DB.cpp
   src/tgvisd/DB.hpp
diff --git a/src/tgvisd/Main.hpp b/src/tgvisd/Main.hpp
index 6067986..60c1cba 100644
--- a/src/tgvisd/Main.hpp
+++ b/src/tgvisd/Main.hpp
@@ -38,6 +38,11 @@ public:
 		return isReady_;
 	}
 
+	inline tgvisd::Td::Td *getTd(void)
+	{
+		return &td_;
+	}
+
 private:
 	tgvisd::Td::Td td_;
 	volatile bool isReady_ = false;
diff --git a/src/tgvisd/Scraper.cpp b/src/tgvisd/Scraper.cpp
index d6221ab..c8e5c92 100644
--- a/src/tgvisd/Scraper.cpp
+++ b/src/tgvisd/Scraper.cpp
@@ -35,28 +35,35 @@ Scraper::Scraper(Main *main, std::thread *threadPtr):
 
 Scraper::~Scraper(void)
 {
-	if (db_) {
-		delete db_;
-		db_ = nullptr;
+	if (chatScraper_) {
+		delete chatScraper_;
+		chatScraper_ = nullptr;
 	}
 }
 
 
-static void run_scraper(Scraper *s, Main *main, DB *db)
+void Scraper::runScrapers(void)
 {
-	auto st = db->prepare("SELECT eeee;");
-	st->execute();
-	if (auto row = st->fetch()) {
-		int ft;
-		char buffer[64];
-		auto stmt = st->getStmt();
-		size_t len = sizeof(buffer);
-
-		ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
-		mysql_fetch_chk(ft, stmt);
-		pr_notice("buf = %s (len = %zu)", buffer, len);
-	}
-	sleep(1);
+	// auto st = db->prepare("SELECT eeee;");
+	// st->execute();
+	// if (auto row = st->fetch()) {
+	// 	int ft;
+	// 	char buffer[64];
+	// 	auto stmt = st->getStmt();
+	// 	size_t len = sizeof(buffer);
+
+	// 	ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
+	// 	mysql_fetch_chk(ft, stmt);
+	// 	pr_notice("buf = %s (len = %zu)", buffer, len);
+	// }
+	// sleep(1);
+
+	chatScraper_ = new tgvisd::Scrapers::ChatScraper(this);
+
+	std::thread chatScraper([this]{
+		this->chatScraper_->run();
+	});
+	chatScraper.join();
 }
 
 
@@ -68,12 +75,7 @@ void Scraper::run(void)
 	}
 
 	try {
-		db_ = DB::create_conn_from_env();
-		db_->connect();
-
-		while (!main_->getStop())
-			run_scraper(this, main_, db_);
-
+		runScrapers();
 	} catch (std::runtime_error &e) {
 		pr_err("std::runtime_error: %s", e.what());
 		main_->doStop();
diff --git a/src/tgvisd/Scraper.hpp b/src/tgvisd/Scraper.hpp
index 75bc6bf..a7308d0 100644
--- a/src/tgvisd/Scraper.hpp
+++ b/src/tgvisd/Scraper.hpp
@@ -10,11 +10,11 @@
 #ifndef TGVISD__SCRAPER_HPP
 #define TGVISD__SCRAPER_HPP
 
-#include <tgvisd/DB.hpp>
 #include <tgvisd/Td/Td.hpp>
 #include <tgvisd/common.hpp>
 
 #include <tgvisd/Main.hpp>
+#include <tgvisd/Scrapers/ChatScraper.hpp>
 
 namespace tgvisd {
 
@@ -25,9 +25,15 @@ public:
 	~Scraper(void);
 	void run(void);
 
+	inline Main *getMain(void)
+	{
+		return main_;
+	}
+
 private:
-	DB *db_ = nullptr;
+	void runScrapers(void);
 	Main *main_ = nullptr;
+	tgvisd::Scrapers::ChatScraper *chatScraper_ = nullptr;
 };
 
 } /* namespace tgvisd */
diff --git a/src/tgvisd/Scrapers/ChatScraper.cpp b/src/tgvisd/Scrapers/ChatScraper.cpp
new file mode 100644
index 0000000..1fe5ce8
--- /dev/null
+++ b/src/tgvisd/Scrapers/ChatScraper.cpp
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author Alviro Iskandar Setiawan <[email protected]>
+ * @license GPL-2.0
+ * @package tgvisd
+ *
+ * Copyright (C) 2021  Alviro Iskandar Setiawan <[email protected]>
+ */
+
+#include <limits>
+#include <tgvisd/Scrapers/ChatScraper.hpp>
+
+namespace tgvisd::Scrapers {
+
+
+ChatScraper::ChatScraper(tgvisd::Scraper *scraper):
+	scraper_(scraper)
+{
+	pr_debug("Initializing chat scraper...");
+	db_ = DB::create_conn_from_env();
+	db_->connect();
+	td_ = scraper_->getMain()->getTd();
+}
+
+
+ChatScraper::~ChatScraper(void)
+{
+	if (db_) {
+		delete db_;
+		db_ = nullptr;
+	}
+}
+
+
+void ChatScraper::run(void)
+{
+	/*
+	 * This function retrieves the list of chat_ids.
+	 */
+	pr_debug("ChatScraper: Getting chatList...");
+	auto chats = getChat(
+		nullptr,
+		std::numeric_limits<std::int64_t>::max(),
+		0,
+		300
+	);
+	pr_debug("ChatScraper: Got %d chat ID(s)", chats->total_count_);
+}
+
+
+td_api::object_ptr<td_api::chats> ChatScraper::getChat(
+		td_api::object_ptr<td_api::ChatList> &&chat_list,
+		int64_t offset_order,
+		int64_t offset_chat_id,
+		int32_t limit
+	)
+{
+	return td_->send_query_sync<td_api::getChats, td_api::chats>(
+		td_api::make_object<td_api::getChats>(
+			std::move(chat_list),
+			offset_order,
+			offset_chat_id,
+			limit
+		)
+	);
+}
+
+
+} /* namespace tgvisd::Scrapers */
diff --git a/src/tgvisd/Scrapers/ChatScraper.hpp b/src/tgvisd/Scrapers/ChatScraper.hpp
new file mode 100644
index 0000000..c851b6c
--- /dev/null
+++ b/src/tgvisd/Scrapers/ChatScraper.hpp
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author Alviro Iskandar Setiawan <[email protected]>
+ * @license GPL-2.0
+ * @package tgvisd
+ *
+ * Copyright (C) 2021  Alviro Iskandar Setiawan <[email protected]>
+ */
+
+#ifndef TGVISD__SCRAPERS__CHATSCRAPER_HPP
+#define TGVISD__SCRAPERS__CHATSCRAPER_HPP
+
+#include <tgvisd/DB.hpp>
+#include <tgvisd/Td/Td.hpp>
+#include <tgvisd/common.hpp>
+
+namespace tgvisd {
+
+class Scraper;
+
+} /* namespace tgvisd */
+
+namespace tgvisd::Scrapers {
+
+class ChatScraper {
+public:
+	ChatScraper(tgvisd::Scraper *scraper);
+	~ChatScraper(void);
+	void run(void);
+
+	td_api::object_ptr<td_api::chats> getChat(
+		td_api::object_ptr<td_api::ChatList> &&chat_list,
+		int64_t offset_order,
+		int64_t offset_chat_id,
+		int32_t limit
+	);
+private:
+	tgvisd::DB *db_ = nullptr;
+	tgvisd::Td::Td *td_ = nullptr;
+	tgvisd::Scraper *scraper_ = nullptr;
+};
+
+} /* namespace tgvisd::Scrapers */
+
+#include <tgvisd/Scraper.hpp>
+
+#endif /* #ifndef TGVISD__SCRAPERS__CHATSCRAPER_HPP */
-- 
2.30.2

-- 
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] generic scraper: add chat scraper
  2021-08-13 14:37 ` Alviro Iskandar Setiawan
@ 2021-08-13 16:45   ` Ammar Faizi
  2021-08-14  6:40     ` [PATCH] Scraper: don't make runScrapers() be class method Alviro Iskandar Setiawan
  0 siblings, 1 reply; 6+ messages in thread
From: Ammar Faizi @ 2021-08-13 16:45 UTC (permalink / raw)
  To: Alviro Iskandar Setiawan; +Cc: GNU/Weeb Mailing List

Hi Alviro,

Thank you for your contribution to GNU/Weeb project, here is my code
review for your patch. Please address my request and I will merge your
patch to the main repo. I only see one problem with your patch.

> -static void run_scraper(Scraper *s, Main *main, DB *db)
> +void Scraper::runScrapers(void)
>  {
> -	auto st = db->prepare("SELECT eeee;");
> -	st->execute();
> -	if (auto row = st->fetch()) {
> -		int ft;
> -		char buffer[64];
> -		auto stmt = st->getStmt();
> -		size_t len = sizeof(buffer);
> -
> -		ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
> -		mysql_fetch_chk(ft, stmt);
> -		pr_notice("buf = %s (len = %zu)", buffer, len);
> -	}
> -	sleep(1);
> +	// auto st = db->prepare("SELECT eeee;");
> +	// st->execute();
> +	// if (auto row = st->fetch()) {
> +	// 	int ft;
> +	// 	char buffer[64];
> +	// 	auto stmt = st->getStmt();
> +	// 	size_t len = sizeof(buffer);
> +
> +	// 	ft = mysqlx_get_bytes(row, 0, 0, buffer, &len);
> +	// 	mysql_fetch_chk(ft, stmt);
> +	// 	pr_notice("buf = %s (len = %zu)", buffer, len);
> +	// }
> +	// sleep(1);
> +
> +	chatScraper_ = new tgvisd::Scrapers::ChatScraper(this);
> +
> +	std::thread chatScraper([this]{
> +		this->chatScraper_->run();
> +	});
> +	chatScraper.join();
>  }

Don't plug runScrapers() inside the class Scraper, this can be inlined
inside Scraper::run(). So please use static function without class.

Just that, the rest is fine to me.

Regards,
Ammar
-- 
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Scraper: don't make runScrapers() be class method
  2021-08-13 16:45   ` Ammar Faizi
@ 2021-08-14  6:40     ` Alviro Iskandar Setiawan
  2021-08-14  6:40       ` Alviro Iskandar Setiawan
  0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-14  6:40 UTC (permalink / raw)
  To: Ammar Faizi; +Cc: GNU/Weeb Mailing List

Hi sir @ammarfaizi2, this is my patch revision for the runScrapers()
function, please review again.

--
Alviro Iskandar Setiawan


-- 
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] Scraper: don't make runScrapers() be class method
  2021-08-14  6:40     ` [PATCH] Scraper: don't make runScrapers() be class method Alviro Iskandar Setiawan
@ 2021-08-14  6:40       ` Alviro Iskandar Setiawan
  2021-08-14  7:01         ` Ammar Faizi
  0 siblings, 1 reply; 6+ messages in thread
From: Alviro Iskandar Setiawan @ 2021-08-14  6:40 UTC (permalink / raw)
  To: Ammar Faizi; +Cc: Alviro Iskandar Setiawan, GNU/Weeb Mailing List

Don't plug runScrapers() inside the class Scraper, this can be inlined
inside Scraper::run().

Cc: Ammar Faizi <[email protected]>
Cc: GNU/Weeb Mailing List <[email protected]>
Link: https://gwml.gnuweeb.org/pipermail/gwml/20210813/000068.html
Signed-off-by: Alviro Iskandar Setiawan <[email protected]>
---
 src/tgvisd/Scraper.cpp | 10 +++++-----
 src/tgvisd/Scraper.hpp |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/tgvisd/Scraper.cpp b/src/tgvisd/Scraper.cpp
index c8e5c92..b5dfedd 100644
--- a/src/tgvisd/Scraper.cpp
+++ b/src/tgvisd/Scraper.cpp
@@ -42,7 +42,7 @@ Scraper::~Scraper(void)
 }
 
 
-void Scraper::runScrapers(void)
+static void runScrapers(Scraper *sc)
 {
 	// auto st = db->prepare("SELECT eeee;");
 	// st->execute();
@@ -58,10 +58,10 @@ void Scraper::runScrapers(void)
 	// }
 	// sleep(1);
 
-	chatScraper_ = new tgvisd::Scrapers::ChatScraper(this);
+	sc->chatScraper_ = new tgvisd::Scrapers::ChatScraper(sc);
 
-	std::thread chatScraper([this]{
-		this->chatScraper_->run();
+	std::thread chatScraper([sc]{
+		sc->chatScraper_->run();
 	});
 	chatScraper.join();
 }
@@ -75,7 +75,7 @@ void Scraper::run(void)
 	}
 
 	try {
-		runScrapers();
+		runScrapers(this);
 	} catch (std::runtime_error &e) {
 		pr_err("std::runtime_error: %s", e.what());
 		main_->doStop();
diff --git a/src/tgvisd/Scraper.hpp b/src/tgvisd/Scraper.hpp
index a7308d0..1d019e2 100644
--- a/src/tgvisd/Scraper.hpp
+++ b/src/tgvisd/Scraper.hpp
@@ -30,10 +30,10 @@ public:
 		return main_;
 	}
 
+	tgvisd::Scrapers::ChatScraper *chatScraper_ = nullptr;
+
 private:
-	void runScrapers(void);
 	Main *main_ = nullptr;
-	tgvisd::Scrapers::ChatScraper *chatScraper_ = nullptr;
 };
 
 } /* namespace tgvisd */
-- 
2.30.2

-- 
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] Scraper: don't make runScrapers() be class method
  2021-08-14  6:40       ` Alviro Iskandar Setiawan
@ 2021-08-14  7:01         ` Ammar Faizi
  0 siblings, 0 replies; 6+ messages in thread
From: Ammar Faizi @ 2021-08-14  7:01 UTC (permalink / raw)
  To: Alviro Iskandar Setiawan; +Cc: GNU/Weeb Mailing List

Applied, thanks.

* patch_from_alviro:
  Scraper: don't make runScrapers() be class method
  generic scraper: add chat scraper

-- 
Ammar
-- 
GWML mailing list
[email protected]
https://gwml.gnuweeb.org/listinfo/gwml

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-08-14  7:01 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-13 14:37 [PATCH] generic scraper: add chat scraper Alviro Iskandar Setiawan
2021-08-13 14:37 ` Alviro Iskandar Setiawan
2021-08-13 16:45   ` Ammar Faizi
2021-08-14  6:40     ` [PATCH] Scraper: don't make runScrapers() be class method Alviro Iskandar Setiawan
2021-08-14  6:40       ` Alviro Iskandar Setiawan
2021-08-14  7:01         ` Ammar Faizi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox