GNU/Weeb Mailing List <[email protected]>
 help / color / mirror / Atom feed
From: Taufiq Pohan <[email protected]>
To: Ammar Faizi <[email protected]>
Cc: Taufiq Pohan <[email protected]>,
	Aldy Prastyo <[email protected]>,
	VNLX Kernel Department <[email protected]>,
	GNU/Weeb Mailing List <[email protected]>
Subject: [PATCH v1 3/4] index: Integrate vn-stats.json with the scraper
Date: Sun, 27 Nov 2022 02:32:49 +0700	[thread overview]
Message-ID: <TY0PR06MB542704C0EB6F26EA22BB59F4E2119@TY0PR06MB5427.apcprd06.prod.outlook.com> (raw)
In-Reply-To: <[email protected]>

vn-stats.json saves the last scraped VN ID from VNDB. Integrate this
state to our scraper.

Signed-off-by: Ammar Faizi <[email protected]>
Co-authored-by: Ammar Faizi <[email protected]>
Signed-off-by: Aldy Prastyo <[email protected]>
Co-authored-by: Aldy Prastyo <[email protected]>
Signed-off-by: Taufiq Pohan <[email protected]>
---
 index.js | 82 +++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 58 insertions(+), 24 deletions(-)

diff --git a/index.js b/index.js
index 9ceee76..513ffad 100644
--- a/index.js
+++ b/index.js
@@ -7,21 +7,26 @@ import fs from 'fs';
 
 config();
 
-mongoose.connect(process.env.MONGODB_URI, {
-	useNewUrlParser: true,
-	useUnifiedTopology: true
-});
-
-const init_db = () =>
-	mongoose.connection
-		.on('error', (error) => console.error(error))
-		.once('open', () => console.log('Database Connected'));
-
 async function get_vn_by_code(code)
 {
 	return await vndb.query(`get vn details,basic,stats (id = ${code})`);
 }
 
+async function get_number_of_vndb_vns()
+{
+	let res = await vndb.query("dbstats");
+
+	if (!("vn" in res))
+		throw Error("Error, vndb malformed response");
+
+	return res.vn;
+}
+
+async function get_number_of_our_vns()
+{
+	return await model.countDocuments();
+}
+
 async function insert_to_db(result)
 {
 	const body = {
@@ -65,32 +70,61 @@ function save_last_id(id)
 
 function get_last_id()
 {
-	if (fs.existsSync('./vn-stats.json')) {
-		const jsonVal = require('./vn-stats.json');
-		return jsonVal['last_vn_id'];
-	}
+	if (!fs.existsSync('./vn-stats.json'))
+		return 1;
 
-	return 1;
+	const jsonVal = fs.readFileSync('./vn-stats.json');
+	let ret = JSON.parse(jsonVal);
+	if (!("last_vn_id" in ret) || isNaN(ret.last_vn_id))
+		return 1;
+
+	return ret.last_vn_id
+}
+
+function sleep(ms)
+{
+	return new Promise((resolve) => {
+		setTimeout(resolve, ms);
+	});
 }
 
-async function main()
+async function start_scrape()
 {
-	init_db();
+	let i = get_last_id() + 1;
 
-	let code = 40029;
-	let i;
+	while (true) {
+		let nr_vns_ours = get_number_of_our_vns();
+		let nr_vns_vndb = get_number_of_vndb_vns();
+
+		if (nr_vns_vndb == nr_vns_ours)
+			break;
 
-	i = code - 5;
-	while (i++) {
 		console.log(`Scraping VN ${i}...`);
 		let ret = await scrape_vn_and_save_to_db(i);
 		if (!ret)
 			break;
+
 		console.log(`Successfully scraped VN ${i}`);
+		save_last_id(i);
+		i++;
+		await sleep(1000);
 	}
-	console.log(`Last VN ID is ${code}`);
-	save_last_id(i);
 	process.exit();
 }
 
-main();
\ No newline at end of file
+function main()
+{
+	mongoose.connect(process.env.MONGODB_URI, {
+		useNewUrlParser: true,
+		useUnifiedTopology: true
+	});
+
+	mongoose.connection
+		.on('error', (error) => console.error(error))
+		.once('open', async function () {
+			console.log('Database Connected');
+			await start_scrape();
+		});
+}
+
+main();
-- 
Taufiq Pohan


  parent reply	other threads:[~2022-11-26 19:33 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <[email protected]>
2022-11-26 19:32 ` [PATCH v1 1/4] Add vndb-api, mongose, and dotenv module Taufiq Pohan
2022-11-26 19:32 ` [PATCH v1 2/4] Initial VNDB scraper and storage management Taufiq Pohan
2022-11-26 19:32 ` Taufiq Pohan [this message]
2022-11-26 19:32 ` [PATCH v1 4/4] .gitignore: Add *.patch file to .gitingore Taufiq Pohan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=TY0PR06MB542704C0EB6F26EA22BB59F4E2119@TY0PR06MB5427.apcprd06.prod.outlook.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox