* [PATCH v1 1/4] Add vndb-api, mongose, and dotenv module
[not found] <[email protected]>
@ 2022-11-26 19:32 ` Taufiq Pohan
2022-11-26 19:32 ` [PATCH v1 2/4] Initial VNDB scraper and storage management Taufiq Pohan
` (2 subsequent siblings)
3 siblings, 0 replies; 4+ messages in thread
From: Taufiq Pohan @ 2022-11-26 19:32 UTC (permalink / raw)
To: Ammar Faizi
Cc: Taufiq Pohan, Aldy Prastyo, VNLX Kernel Department,
GNU/Weeb Mailing List
For initial project.
Signed-off-by: Taufiq Pohan <[email protected]>
---
.gitignore | 3 +++
index.js | 4 ++++
package.json | 14 ++++++++++++++
3 files changed, 21 insertions(+)
create mode 100644 .gitignore
create mode 100644 index.js
create mode 100644 package.json
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b855cbc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.env
+node_modules/
+yarn.lock
\ No newline at end of file
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..a3dadfb
--- /dev/null
+++ b/index.js
@@ -0,0 +1,4 @@
+import VNDB from "vndb-api";
+
+const vndb = new VNDB('atri_api');
+
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..7ef6a42
--- /dev/null
+++ b/package.json
@@ -0,0 +1,14 @@
+{
+ "name": "vndb_scraper",
+ "version": "1.0.0",
+ "description": "VNDB Scrapper for ATRI",
+ "main": "index.js",
+ "repository": "[email protected]:vnlx2/vndb_scraper.git",
+ "author": "Taufiq Pohan <[email protected]>",
+ "license": "GPLv2",
+ "dependencies": {
+ "dotenv": "^16.0.3",
+ "mongose": "^0.0.2-security",
+ "vndb-api": "^1.0.3"
+ }
+}
--
Taufiq Pohan
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v1 2/4] Initial VNDB scraper and storage management
[not found] <[email protected]>
2022-11-26 19:32 ` [PATCH v1 1/4] Add vndb-api, mongose, and dotenv module Taufiq Pohan
@ 2022-11-26 19:32 ` Taufiq Pohan
2022-11-26 19:32 ` [PATCH v1 3/4] index: Integrate vn-stats.json with the scraper Taufiq Pohan
2022-11-26 19:32 ` [PATCH v1 4/4] .gitignore: Add *.patch file to .gitingore Taufiq Pohan
3 siblings, 0 replies; 4+ messages in thread
From: Taufiq Pohan @ 2022-11-26 19:32 UTC (permalink / raw)
To: Ammar Faizi
Cc: Taufiq Pohan, Aldy Prastyo, VNLX Kernel Department,
GNU/Weeb Mailing List
Signed-off-by: Ammar Faizi <[email protected]>
Co-authored-by: Ammar Faizi <[email protected]>
Signed-off-by: Aldy Prastyo <[email protected]>
Co-authored-by: Aldy Prastyo <[email protected]>
Signed-off-by: Taufiq Pohan <[email protected]>
---
.gitignore | 7 ++--
VNDBModel.js | 34 +++++++++++++++++++
index.js | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++--
package.json | 3 ++
4 files changed, 135 insertions(+), 5 deletions(-)
create mode 100644 VNDBModel.js
diff --git a/.gitignore b/.gitignore
index b855cbc..29ea801 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
-.env
-node_modules/
-yarn.lock
\ No newline at end of file
+/.env
+/node_modules/
+/yarn.lock
+/vn-stats.json
\ No newline at end of file
diff --git a/VNDBModel.js b/VNDBModel.js
new file mode 100644
index 0000000..4ba11a0
--- /dev/null
+++ b/VNDBModel.js
@@ -0,0 +1,34 @@
+import mongoose from "mongoose";
+
+// Schema
+
+const VisualNovel = mongoose.Schema({
+ code: {
+ type: String,
+ required: true
+ },
+ title: {
+ type: String,
+ required: true
+ },
+ alias: {
+ type: String
+ },
+ length: {
+ type: Number
+ },
+ rating: {
+ type: Number,
+ },
+ description: {
+ type: String,
+ required: true
+ },
+ image: {
+ type: String
+ }
+}, {
+ timestamps: true
+});
+
+export default mongoose.model('vndb', VisualNovel);
\ No newline at end of file
diff --git a/index.js b/index.js
index a3dadfb..9ceee76 100644
--- a/index.js
+++ b/index.js
@@ -1,4 +1,96 @@
-import VNDB from "vndb-api";
-
+import VNDB from 'vndb-api';
const vndb = new VNDB('atri_api');
+import mongoose from "mongoose";
+import { config } from "dotenv";
+import model from './VNDBModel.js';
+import fs from 'fs';
+
+config();
+
+mongoose.connect(process.env.MONGODB_URI, {
+ useNewUrlParser: true,
+ useUnifiedTopology: true
+});
+
+const init_db = () =>
+ mongoose.connection
+ .on('error', (error) => console.error(error))
+ .once('open', () => console.log('Database Connected'));
+
+async function get_vn_by_code(code)
+{
+ return await vndb.query(`get vn details,basic,stats (id = ${code})`);
+}
+
+async function insert_to_db(result)
+{
+ const body = {
+ code: result.id,
+ title: result.title,
+ alias: result.alias,
+ length: result.length,
+ rating: result.rating,
+ description: result.image,
+ image: result.image
+ };
+ const response = await model(body);
+ await response.save();
+}
+
+async function scrape_vn_and_save_to_db(code)
+{
+ const result = await get_vn_by_code(code);
+ if (!result) {
+ console.log("Internal error");
+ return false;
+ }
+
+ if (result.items.length == 0) {
+ console.log(`VN ${code} is not found`);
+ return false;
+ }
+
+ insert_to_db(result.items[0]);
+ return true;
+}
+
+function save_last_id(id)
+{
+ const jsonVal = {
+ last_vn_id: id
+ };
+ fs.writeFileSync('vn-stats.json', JSON.stringify(jsonVal)+"\n");
+ return true;
+}
+
+function get_last_id()
+{
+ if (fs.existsSync('./vn-stats.json')) {
+ const jsonVal = require('./vn-stats.json');
+ return jsonVal['last_vn_id'];
+ }
+
+ return 1;
+}
+
+async function main()
+{
+ init_db();
+
+ let code = 40029;
+ let i;
+
+ i = code - 5;
+ while (i++) {
+ console.log(`Scraping VN ${i}...`);
+ let ret = await scrape_vn_and_save_to_db(i);
+ if (!ret)
+ break;
+ console.log(`Successfully scraped VN ${i}`);
+ }
+ console.log(`Last VN ID is ${code}`);
+ save_last_id(i);
+ process.exit();
+}
+main();
\ No newline at end of file
diff --git a/package.json b/package.json
index 7ef6a42..e5e9912 100644
--- a/package.json
+++ b/package.json
@@ -2,12 +2,15 @@
"name": "vndb_scraper",
"version": "1.0.0",
"description": "VNDB Scrapper for ATRI",
+ "type": "module",
"main": "index.js",
"repository": "[email protected]:vnlx2/vndb_scraper.git",
"author": "Taufiq Pohan <[email protected]>",
"license": "GPLv2",
"dependencies": {
"dotenv": "^16.0.3",
+ "fs": "^0.0.1-security",
+ "mongoose": "^6.7.3",
"mongose": "^0.0.2-security",
"vndb-api": "^1.0.3"
}
--
Taufiq Pohan
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v1 3/4] index: Integrate vn-stats.json with the scraper
[not found] <[email protected]>
2022-11-26 19:32 ` [PATCH v1 1/4] Add vndb-api, mongose, and dotenv module Taufiq Pohan
2022-11-26 19:32 ` [PATCH v1 2/4] Initial VNDB scraper and storage management Taufiq Pohan
@ 2022-11-26 19:32 ` Taufiq Pohan
2022-11-26 19:32 ` [PATCH v1 4/4] .gitignore: Add *.patch file to .gitingore Taufiq Pohan
3 siblings, 0 replies; 4+ messages in thread
From: Taufiq Pohan @ 2022-11-26 19:32 UTC (permalink / raw)
To: Ammar Faizi
Cc: Taufiq Pohan, Aldy Prastyo, VNLX Kernel Department,
GNU/Weeb Mailing List
vn-stats.json saves the last scraped VN ID from VNDB. Integrate this
state to our scraper.
Signed-off-by: Ammar Faizi <[email protected]>
Co-authored-by: Ammar Faizi <[email protected]>
Signed-off-by: Aldy Prastyo <[email protected]>
Co-authored-by: Aldy Prastyo <[email protected]>
Signed-off-by: Taufiq Pohan <[email protected]>
---
index.js | 82 +++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 58 insertions(+), 24 deletions(-)
diff --git a/index.js b/index.js
index 9ceee76..513ffad 100644
--- a/index.js
+++ b/index.js
@@ -7,21 +7,26 @@ import fs from 'fs';
config();
-mongoose.connect(process.env.MONGODB_URI, {
- useNewUrlParser: true,
- useUnifiedTopology: true
-});
-
-const init_db = () =>
- mongoose.connection
- .on('error', (error) => console.error(error))
- .once('open', () => console.log('Database Connected'));
-
async function get_vn_by_code(code)
{
return await vndb.query(`get vn details,basic,stats (id = ${code})`);
}
+async function get_number_of_vndb_vns()
+{
+ let res = await vndb.query("dbstats");
+
+ if (!("vn" in res))
+ throw Error("Error, vndb malformed response");
+
+ return res.vn;
+}
+
+async function get_number_of_our_vns()
+{
+ return await model.countDocuments();
+}
+
async function insert_to_db(result)
{
const body = {
@@ -65,32 +70,61 @@ function save_last_id(id)
function get_last_id()
{
- if (fs.existsSync('./vn-stats.json')) {
- const jsonVal = require('./vn-stats.json');
- return jsonVal['last_vn_id'];
- }
+ if (!fs.existsSync('./vn-stats.json'))
+ return 1;
- return 1;
+ const jsonVal = fs.readFileSync('./vn-stats.json');
+ let ret = JSON.parse(jsonVal);
+ if (!("last_vn_id" in ret) || isNaN(ret.last_vn_id))
+ return 1;
+
+ return ret.last_vn_id
+}
+
+function sleep(ms)
+{
+ return new Promise((resolve) => {
+ setTimeout(resolve, ms);
+ });
}
-async function main()
+async function start_scrape()
{
- init_db();
+ let i = get_last_id() + 1;
- let code = 40029;
- let i;
+ while (true) {
+ let nr_vns_ours = get_number_of_our_vns();
+ let nr_vns_vndb = get_number_of_vndb_vns();
+
+ if (nr_vns_vndb == nr_vns_ours)
+ break;
- i = code - 5;
- while (i++) {
console.log(`Scraping VN ${i}...`);
let ret = await scrape_vn_and_save_to_db(i);
if (!ret)
break;
+
console.log(`Successfully scraped VN ${i}`);
+ save_last_id(i);
+ i++;
+ await sleep(1000);
}
- console.log(`Last VN ID is ${code}`);
- save_last_id(i);
process.exit();
}
-main();
\ No newline at end of file
+function main()
+{
+ mongoose.connect(process.env.MONGODB_URI, {
+ useNewUrlParser: true,
+ useUnifiedTopology: true
+ });
+
+ mongoose.connection
+ .on('error', (error) => console.error(error))
+ .once('open', async function () {
+ console.log('Database Connected');
+ await start_scrape();
+ });
+}
+
+main();
--
Taufiq Pohan
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v1 4/4] .gitignore: Add *.patch file to .gitingore
[not found] <[email protected]>
` (2 preceding siblings ...)
2022-11-26 19:32 ` [PATCH v1 3/4] index: Integrate vn-stats.json with the scraper Taufiq Pohan
@ 2022-11-26 19:32 ` Taufiq Pohan
3 siblings, 0 replies; 4+ messages in thread
From: Taufiq Pohan @ 2022-11-26 19:32 UTC (permalink / raw)
To: Ammar Faizi
Cc: Taufiq Pohan, Aldy Prastyo, VNLX Kernel Department,
GNU/Weeb Mailing List
From: Ammar Faizi <[email protected]>
We usually generate patch file in the same directory with the
project. Ignore the generated patch file.
Signed-off-by: Ammar Faizi <[email protected]>
Signed-off-by: Taufiq Pohan <[email protected]>
---
.gitignore | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index 29ea801..477063a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
/.env
/node_modules/
/yarn.lock
-/vn-stats.json
\ No newline at end of file
+/vn-stats.json
+*.patch
\ No newline at end of file
--
Taufiq Pohan
^ permalink raw reply related [flat|nested] 4+ messages in thread