public inbox for [email protected]
 help / color / mirror / Atom feed
* [RESEND PATCH v1 1/4] Add vndb-api, mongose, and dotenv module
       [not found] <[email protected]>
@ 2022-11-26 19:37 ` Taufiq Pohan
  2022-11-26 20:11   ` Ammar Faizi
  2022-11-26 19:37 ` [RESEND PATCH v1 2/4] Initial VNDB scraper and storage management Taufiq Pohan
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 5+ messages in thread
From: Taufiq Pohan @ 2022-11-26 19:37 UTC (permalink / raw)
  To: Ammar Faizi
  Cc: Taufiq Pohan, Aldy Prastyo, VNLX Kernel Department,
	GNU/Weeb Mailing List

For initial project.

Signed-off-by: Taufiq Pohan <[email protected]>
---
 .gitignore   |  3 +++
 index.js     |  4 ++++
 package.json | 14 ++++++++++++++
 3 files changed, 21 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 index.js
 create mode 100644 package.json

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b855cbc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.env
+node_modules/
+yarn.lock
\ No newline at end of file
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..a3dadfb
--- /dev/null
+++ b/index.js
@@ -0,0 +1,4 @@
+import VNDB from "vndb-api";
+
+const vndb = new VNDB('atri_api');
+
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..7ef6a42
--- /dev/null
+++ b/package.json
@@ -0,0 +1,14 @@
+{
+  "name": "vndb_scraper",
+  "version": "1.0.0",
+  "description": "VNDB Scrapper for ATRI",
+  "main": "index.js",
+  "repository": "[email protected]:vnlx2/vndb_scraper.git",
+  "author": "Taufiq Pohan <[email protected]>",
+  "license": "GPLv2",
+  "dependencies": {
+    "dotenv": "^16.0.3",
+    "mongose": "^0.0.2-security",
+    "vndb-api": "^1.0.3"
+  }
+}
-- 
Taufiq Pohan


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RESEND PATCH v1 2/4] Initial VNDB scraper and storage management
       [not found] <[email protected]>
  2022-11-26 19:37 ` [RESEND PATCH v1 1/4] Add vndb-api, mongose, and dotenv module Taufiq Pohan
@ 2022-11-26 19:37 ` Taufiq Pohan
  2022-11-26 19:37 ` [RESEND PATCH v1 3/4] index: Integrate vn-stats.json with the scraper Taufiq Pohan
  2022-11-26 19:37 ` [RESEND PATCH v1 4/4] .gitignore: Add *.patch file to .gitingore Taufiq Pohan
  3 siblings, 0 replies; 5+ messages in thread
From: Taufiq Pohan @ 2022-11-26 19:37 UTC (permalink / raw)
  To: Ammar Faizi
  Cc: Taufiq Pohan, Aldy Prastyo, VNLX Kernel Department,
	GNU/Weeb Mailing List

Signed-off-by: Ammar Faizi <[email protected]>
Co-authored-by: Ammar Faizi <[email protected]>
Signed-off-by: Aldy Prastyo <[email protected]>
Co-authored-by: Aldy Prastyo <[email protected]>
Signed-off-by: Taufiq Pohan <[email protected]>
---
 .gitignore   |  7 ++--
 VNDBModel.js | 34 +++++++++++++++++++
 index.js     | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 package.json |  3 ++
 4 files changed, 135 insertions(+), 5 deletions(-)
 create mode 100644 VNDBModel.js

diff --git a/.gitignore b/.gitignore
index b855cbc..29ea801 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
-.env
-node_modules/
-yarn.lock
\ No newline at end of file
+/.env
+/node_modules/
+/yarn.lock
+/vn-stats.json
\ No newline at end of file
diff --git a/VNDBModel.js b/VNDBModel.js
new file mode 100644
index 0000000..4ba11a0
--- /dev/null
+++ b/VNDBModel.js
@@ -0,0 +1,34 @@
+import mongoose from "mongoose";
+
+// Schema
+
+const VisualNovel = mongoose.Schema({
+    code: {
+        type: String,
+        required: true
+    },
+    title: {
+        type: String,
+        required: true
+    },
+    alias: {
+        type: String
+    },
+    length: {
+        type: Number
+    },
+    rating: {
+        type: Number,
+    },
+    description: {
+        type: String,
+        required: true
+    },
+    image: {
+        type: String
+    }
+}, {
+    timestamps: true
+});
+
+export default mongoose.model('vndb', VisualNovel);
\ No newline at end of file
diff --git a/index.js b/index.js
index a3dadfb..9ceee76 100644
--- a/index.js
+++ b/index.js
@@ -1,4 +1,96 @@
-import VNDB from "vndb-api";
-
+import VNDB from 'vndb-api';
 const vndb = new VNDB('atri_api');
+import mongoose from "mongoose";
+import { config } from "dotenv";
+import model from './VNDBModel.js';
+import fs from 'fs';
+
+config();
+
+mongoose.connect(process.env.MONGODB_URI, {
+	useNewUrlParser: true,
+	useUnifiedTopology: true
+});
+
+const init_db = () =>
+	mongoose.connection
+		.on('error', (error) => console.error(error))
+		.once('open', () => console.log('Database Connected'));
+
+async function get_vn_by_code(code)
+{
+	return await vndb.query(`get vn details,basic,stats (id = ${code})`);
+}
+
+async function insert_to_db(result)
+{
+	const body = {
+		code: result.id,
+		title: result.title,
+		alias: result.alias,
+		length: result.length,
+		rating: result.rating,
+		description: result.image,
+		image: result.image
+	};
+	const response = await model(body);
+	await response.save();
+}
+
+async function scrape_vn_and_save_to_db(code)
+{
+	const result = await get_vn_by_code(code);
+	if (!result) {
+		console.log("Internal error");
+		return false;
+	}
+
+	if (result.items.length == 0) {
+		console.log(`VN ${code} is not found`);
+		return false;
+	}
+
+	insert_to_db(result.items[0]);
+	return true;
+}
+
+function save_last_id(id)
+{
+	const jsonVal = {
+		last_vn_id: id
+	};
+	fs.writeFileSync('vn-stats.json', JSON.stringify(jsonVal)+"\n");
+	return true;
+}
+
+function get_last_id()
+{
+	if (fs.existsSync('./vn-stats.json')) {
+		const jsonVal = require('./vn-stats.json');
+		return jsonVal['last_vn_id'];
+	}
+
+	return 1;
+}
+
+async function main()
+{
+	init_db();
+
+	let code = 40029;
+	let i;
+
+	i = code - 5;
+	while (i++) {
+		console.log(`Scraping VN ${i}...`);
+		let ret = await scrape_vn_and_save_to_db(i);
+		if (!ret)
+			break;
+		console.log(`Successfully scraped VN ${i}`);
+	}
+	console.log(`Last VN ID is ${code}`);
+	save_last_id(i);
+	process.exit();
+}
 
+main();
\ No newline at end of file
diff --git a/package.json b/package.json
index 7ef6a42..e5e9912 100644
--- a/package.json
+++ b/package.json
@@ -2,12 +2,15 @@
   "name": "vndb_scraper",
   "version": "1.0.0",
   "description": "VNDB Scrapper for ATRI",
+  "type": "module",
   "main": "index.js",
   "repository": "[email protected]:vnlx2/vndb_scraper.git",
   "author": "Taufiq Pohan <[email protected]>",
   "license": "GPLv2",
   "dependencies": {
     "dotenv": "^16.0.3",
+    "fs": "^0.0.1-security",
+    "mongoose": "^6.7.3",
     "mongose": "^0.0.2-security",
     "vndb-api": "^1.0.3"
   }
-- 
Taufiq Pohan


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RESEND PATCH v1 3/4] index: Integrate vn-stats.json with the scraper
       [not found] <[email protected]>
  2022-11-26 19:37 ` [RESEND PATCH v1 1/4] Add vndb-api, mongose, and dotenv module Taufiq Pohan
  2022-11-26 19:37 ` [RESEND PATCH v1 2/4] Initial VNDB scraper and storage management Taufiq Pohan
@ 2022-11-26 19:37 ` Taufiq Pohan
  2022-11-26 19:37 ` [RESEND PATCH v1 4/4] .gitignore: Add *.patch file to .gitingore Taufiq Pohan
  3 siblings, 0 replies; 5+ messages in thread
From: Taufiq Pohan @ 2022-11-26 19:37 UTC (permalink / raw)
  To: Ammar Faizi
  Cc: Taufiq Pohan, Aldy Prastyo, VNLX Kernel Department,
	GNU/Weeb Mailing List

vn-stats.json saves the last scraped VN ID from VNDB. Integrate this
state to our scraper.

Signed-off-by: Ammar Faizi <[email protected]>
Co-authored-by: Ammar Faizi <[email protected]>
Signed-off-by: Aldy Prastyo <[email protected]>
Co-authored-by: Aldy Prastyo <[email protected]>
Signed-off-by: Taufiq Pohan <[email protected]>
---
 index.js | 82 +++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 58 insertions(+), 24 deletions(-)

diff --git a/index.js b/index.js
index 9ceee76..513ffad 100644
--- a/index.js
+++ b/index.js
@@ -7,21 +7,26 @@ import fs from 'fs';
 
 config();
 
-mongoose.connect(process.env.MONGODB_URI, {
-	useNewUrlParser: true,
-	useUnifiedTopology: true
-});
-
-const init_db = () =>
-	mongoose.connection
-		.on('error', (error) => console.error(error))
-		.once('open', () => console.log('Database Connected'));
-
 async function get_vn_by_code(code)
 {
 	return await vndb.query(`get vn details,basic,stats (id = ${code})`);
 }
 
+async function get_number_of_vndb_vns()
+{
+	let res = await vndb.query("dbstats");
+
+	if (!("vn" in res))
+		throw Error("Error, vndb malformed response");
+
+	return res.vn;
+}
+
+async function get_number_of_our_vns()
+{
+	return await model.countDocuments();
+}
+
 async function insert_to_db(result)
 {
 	const body = {
@@ -65,32 +70,61 @@ function save_last_id(id)
 
 function get_last_id()
 {
-	if (fs.existsSync('./vn-stats.json')) {
-		const jsonVal = require('./vn-stats.json');
-		return jsonVal['last_vn_id'];
-	}
+	if (!fs.existsSync('./vn-stats.json'))
+		return 1;
 
-	return 1;
+	const jsonVal = fs.readFileSync('./vn-stats.json');
+	let ret = JSON.parse(jsonVal);
+	if (!("last_vn_id" in ret) || isNaN(ret.last_vn_id))
+		return 1;
+
+	return ret.last_vn_id
+}
+
+function sleep(ms)
+{
+	return new Promise((resolve) => {
+		setTimeout(resolve, ms);
+	});
 }
 
-async function main()
+async function start_scrape()
 {
-	init_db();
+	let i = get_last_id() + 1;
 
-	let code = 40029;
-	let i;
+	while (true) {
+		let nr_vns_ours = get_number_of_our_vns();
+		let nr_vns_vndb = get_number_of_vndb_vns();
+
+		if (nr_vns_vndb == nr_vns_ours)
+			break;
 
-	i = code - 5;
-	while (i++) {
 		console.log(`Scraping VN ${i}...`);
 		let ret = await scrape_vn_and_save_to_db(i);
 		if (!ret)
 			break;
+
 		console.log(`Successfully scraped VN ${i}`);
+		save_last_id(i);
+		i++;
+		await sleep(1000);
 	}
-	console.log(`Last VN ID is ${code}`);
-	save_last_id(i);
 	process.exit();
 }
 
-main();
\ No newline at end of file
+function main()
+{
+	mongoose.connect(process.env.MONGODB_URI, {
+		useNewUrlParser: true,
+		useUnifiedTopology: true
+	});
+
+	mongoose.connection
+		.on('error', (error) => console.error(error))
+		.once('open', async function () {
+			console.log('Database Connected');
+			await start_scrape();
+		});
+}
+
+main();
-- 
Taufiq Pohan


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RESEND PATCH v1 4/4] .gitignore: Add *.patch file to .gitingore
       [not found] <[email protected]>
                   ` (2 preceding siblings ...)
  2022-11-26 19:37 ` [RESEND PATCH v1 3/4] index: Integrate vn-stats.json with the scraper Taufiq Pohan
@ 2022-11-26 19:37 ` Taufiq Pohan
  3 siblings, 0 replies; 5+ messages in thread
From: Taufiq Pohan @ 2022-11-26 19:37 UTC (permalink / raw)
  To: Ammar Faizi
  Cc: Taufiq Pohan, Aldy Prastyo, VNLX Kernel Department,
	GNU/Weeb Mailing List

From: Ammar Faizi <[email protected]>

We usually generate patch file in the same directory with the
project. Ignore the generated patch file.

Signed-off-by: Ammar Faizi <[email protected]>
Signed-off-by: Taufiq Pohan <[email protected]>
---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 29ea801..477063a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 /.env
 /node_modules/
 /yarn.lock
-/vn-stats.json
\ No newline at end of file
+/vn-stats.json
+*.patch
\ No newline at end of file
-- 
Taufiq Pohan


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RESEND PATCH v1 1/4] Add vndb-api, mongose, and dotenv module
  2022-11-26 19:37 ` [RESEND PATCH v1 1/4] Add vndb-api, mongose, and dotenv module Taufiq Pohan
@ 2022-11-26 20:11   ` Ammar Faizi
  0 siblings, 0 replies; 5+ messages in thread
From: Ammar Faizi @ 2022-11-26 20:11 UTC (permalink / raw)
  To: Taufiq Pohan
  Cc: Ammar Faizi, Aldy Prastyo, VNLX Kernel Department,
	GNU/Weeb Mailing List

On Sun, 27 Nov 2022 02:37:21 +0700, Taufiq Pohan wrote:
> For initial project.
> 
> 

Applied, thanks!

[1/4] Add vndb-api, mongose, and dotenv module
      commit: 5c5380be2e4970d21729e1014a92aea25895d541
[2/4] Initial VNDB scraper and storage management
      commit: 8487845a100abf116a3ea0b2a3002f6813613ceb
[3/4] index: Integrate vn-stats.json with the scraper
      commit: 4784bb8e02e4bc7e650885b57128d6584cdaca14
[4/4] .gitignore: Add *.patch file to .gitingore
      commit: 6f43eec419f11a4850c36ec8a046a0425e38b1a2

Best regards,
-- 
Ammar Faizi <[email protected]>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-11-26 20:11 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <[email protected]>
2022-11-26 19:37 ` [RESEND PATCH v1 1/4] Add vndb-api, mongose, and dotenv module Taufiq Pohan
2022-11-26 20:11   ` Ammar Faizi
2022-11-26 19:37 ` [RESEND PATCH v1 2/4] Initial VNDB scraper and storage management Taufiq Pohan
2022-11-26 19:37 ` [RESEND PATCH v1 3/4] index: Integrate vn-stats.json with the scraper Taufiq Pohan
2022-11-26 19:37 ` [RESEND PATCH v1 4/4] .gitignore: Add *.patch file to .gitingore Taufiq Pohan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox