From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on gnuweeb.org X-Spam-Level: X-Spam-Status: No, score=-0.0 required=5.0 tests=DKIM_SIGNED,DKIM_VALID, DKIM_VALID_AU,DKIM_VALID_EF,FORGED_HOTMAIL_RCVD2,FREEMAIL_FROM, RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_PASS autolearn=ham autolearn_force=no version=3.4.6 Received: from APC01-SG2-obe.outbound.protection.outlook.com (mail-sgaapc01olkn2107.outbound.protection.outlook.com [40.92.53.107]) by gnuweeb.org (Postfix) with ESMTPS id 65BFA8178E for ; Sat, 26 Nov 2022 19:33:16 +0000 (UTC) Authentication-Results: gnuweeb.org; dkim=pass (2048-bit key; unprotected) header.d=hotmail.com header.i=@hotmail.com header.a=rsa-sha256 header.s=selector1 header.b=a3SMaEDY; dkim-atps=neutral ARC-Seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=cg4BQMJX7bycJjaCiCx5aYbT9icyPV6N3mEFEAGg4rfGbEx0Jt133phEo6U+uvODRA8NYJa8BO+nxy/CXPf9SzoofKrMwl1cjmKKojLW0k3dbQ7LVrFr++ufXPlFI08cNovzFDB2lycqm949prvgo38v5YAKWGykg8Wvr6G5jSF2uZO4ZPSM1gyT2y+5DLZaClXClNZfvmBJ6fbqDKqYjAJIqtJT0Jpn12FTxFONjcAdZ2nzfbkDLRv+Ls7Z4L2Z1FU7aLELzkXJG555ydrINxiTLYMIg6phaW9ADdzVMTo8cwdu++bwRYl0zMD18iDPtvTgUtDMvA9B/uSqIBWpzQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=HSwQgGhn+/5K4Y28nLerBtUJoXpbB7yIN0SO/9ju3D0=; b=atOlOXjlvr6+Qeu45+qxS2kI9M60/hD50Z2OexiTLQwxeDoBF1UAX4Vts/gLc1/78ST9WgfgLY+9bdcw1Bh5Qy2QgIVHdy121nPeiSrZWfvKMsIgDyK0NeMJ7uvwmYJHu+bI0ACOzN4CIQxwcinjHRKLN4IZveINukiOnz/XsGw/bLc9b0xxYvAWN96yYfM6G/nw00Li8jzLaAmVclE/iY1yhJvsA76ewFrSqNEbJYsqnDjcVGseEDvoHL6MV+XeS0boiHFpy6JY2tBNVQk1R2sZSKL9+G5Ev/1n6IONVxH1ivYT4UyXVCYUUpqC4b/ks13S6rHJ4FvZ/iOmX0K21A== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=none; dmarc=none; dkim=none; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=hotmail.com; s=selector1; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=HSwQgGhn+/5K4Y28nLerBtUJoXpbB7yIN0SO/9ju3D0=; b=a3SMaEDYyr/9Ic3+5QITkg5U4Ix6cBdW0fpI/7C/td04rhzj5LzMVdwkmuQKBwSfhcnz5gyIASU9JdrQHQ8HmwzClyE1JzHZCQ/AQJJaWBpNhsAbHiPjN5EVIhP1zai8RWB4OxA5yBKfc628YH9NTEbKBGRUkSYvq1coZ3AVnZydHECCQcU6YTw0rPWToNnNOnqFWU79gvU0wRHhT8dYV13Q4ZSajH6uHaLVJDMcTq8feQr+CV5lZaGNimRWh+evvx1ybV26JnA1mCXEBFFTi7widmA0ARMFtPdHU4i9Md+u9aHBhGbuk2tXHh/7ycMGcgEj3gSO/UUGBXOIPDSkKQ== Received: from TY0PR06MB5427.apcprd06.prod.outlook.com (2603:1096:400:218::11) by TYZPR06MB4127.apcprd06.prod.outlook.com (2603:1096:400:6::6) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5857.21; Sat, 26 Nov 2022 19:33:13 +0000 Received: from TY0PR06MB5427.apcprd06.prod.outlook.com ([fe80::3f7b:8f03:f2cb:9b00]) by TY0PR06MB5427.apcprd06.prod.outlook.com ([fe80::3f7b:8f03:f2cb:9b00%9]) with mapi id 15.20.5857.021; Sat, 26 Nov 2022 19:33:13 +0000 From: Taufiq Pohan To: Ammar Faizi Cc: Taufiq Pohan , Aldy Prastyo , VNLX Kernel Department , GNU/Weeb Mailing List Subject: [PATCH v1 2/4] Initial VNDB scraper and storage management Date: Sun, 27 Nov 2022 02:32:48 +0700 Message-ID: X-Mailer: git-send-email 2.34.1 In-Reply-To: <20221126193250.282678-1-m.taufiq30s@hotmail.com> References: <20221126193250.282678-1-m.taufiq30s@hotmail.com> Content-Transfer-Encoding: 8bit Content-Type: text/plain X-TMN: [XbgXghl1UYuBAXZ3VBIzXho8ZAPQR7Y3] X-ClientProxiedBy: SG2PR01CA0196.apcprd01.prod.exchangelabs.com (2603:1096:4:189::23) To TY0PR06MB5427.apcprd06.prod.outlook.com (2603:1096:400:218::11) X-Microsoft-Original-Message-ID: <20221126193250.282678-3-m.taufiq30s@hotmail.com> MIME-Version: 1.0 X-MS-Exchange-MessageSentRepresentingType: 1 X-MS-PublicTrafficType: Email X-MS-TrafficTypeDiagnostic: TY0PR06MB5427:EE_|TYZPR06MB4127:EE_ X-MS-Office365-Filtering-Correlation-Id: 93d38321-b3f7-4d30-def2-08dacfe50f1c X-MS-Exchange-SLBlob-MailProps: hQngCdii+CawJxYp56Q0hX6FiciZicWoIgeC77ra5EYQEsLp3kS4hzgM5BsrZHpAXzOGZ98ixO+2MDe6ZRKZF44vnFzqzi3u0Z7tSeVeE979BPcjtNnMPpmTLRQ3xJmHnbQ3HrPGQE2O+ZzXO7H9tTRApY23nUTe1JgsANdeHQaVfSsSnmGFKiRdBz0cN16fRJbgWOuh+z2XdS/f1BmF/ojBCN1LxpnXqaFbbxV7Y5Mrg+DfSCCq/dH/Epuqj/QEPsMfPgTd8wHpQ5qnl8V/wW71UCaJ6wqQ2kDKvb7F1D1LXsDq2KL0fZ79cXFAMboTRcuo73ks4JazQXk1ysLBNBkvbar49mEQtK5p9s4M7uT9tBsk1Vg6Id0GRg7xeSqLFHPDDgcZGTZMAbukCdeLQdGUrC/Jqveqoy7H7lgvp2n3ZUIObGFKKsX/UcR8qmOZ+QM1RT2EirvQ9azgphkJtN8lqU4vdTL4W6qUAanb0wN3lZIEVd5EwLChz7mpL9x1kJPKI4bAfLz2nIHrrRwPYOEQofGB0nDWefgXMcTV0AlhvTF5oLDrqcH7JHBy3DbsdUKNa7Xxv879Xa9czwdttzgjTod0HLxTAmi3rDnRG16ZPAOSlPZ1SDLktbRf0ZjflANzpiDXPAjSk5npgwu0ETs0wDcbAHsI8FHYqvqTNTMQpqlA1LjuOw== X-Microsoft-Antispam: BCL:0; X-Microsoft-Antispam-Message-Info: Y8TClmtCXbCmSSlw4NWePjHACa+EnU/wTTz/1m/i6m1zrJRVJkZ+Nj9O96VmTdmGASsqy+959ptNnaV60RDWGEBtTrod79sfAGw4uSaCpabve0HPjYOJM2Ptxk309uDw6DLjzrl0N86ewl5RZyQbio0Sd3eXIaLVKBpz9mmQcpGXRsS/xOoIleC3KVNF2Jhmp4dbHscgJlWGNCjRWss/a5fta5dKJ2Qcc2N0+A+SlQb5VwKJbCK4C4gy1Xoxdig0kXgdgqTkcZg8i5h8yWj+ZV/0lYyQNEcJM0FSil4A+zLxD2yMnLB6pg45GuAg3Javr0A/GAfWhm0ZkvOVw07Rg+vsxpHbXM/nwLbIeszOFD2zF5aPIrb+peuFoxSM5+0TZff/1KPHfRJuv3LsRR/7HShOeseXOHziDmKJA4OjAzZ+Eq68enBdC21CX0bDRVLgiDOGujuwLtbQmVXTVX8LLZhYRGQ5X7fmbMhLPepG0Dq0RQ2QI6RX2LyUZHcYKla4ViMyLIMih/EIe+GmzT/2fGW68EuxQ8jfQaL2EUPx41RwLyedLJNa5COYIXeel5ue2VvvG9i+MfiI4wC1eUEjlpgE/y5Cm4rAbM5vdPWZFZ9fr2PQ3oico0RsRzGfbNDzrBJ8/W85+EpqMIWhrJDWwA== X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 1 X-MS-Exchange-AntiSpam-MessageData-0: =?us-ascii?Q?RB7sXrfItTPgtkQQxUGqNqlM5jZ2ND9r/w2EVrdVwZy/KlrYHbxTciqkCB6k?= =?us-ascii?Q?mzciwjJUiZV4vgjssaFQdoM39svU10NE34SuWbqYKpXWsCtR4rEkzanvnij8?= =?us-ascii?Q?aAp3JQ39PKMd+0mYmWL/N0eI9pgxhuYh6x6N7JKzwovXI2SFSq2GqGeJzaoN?= =?us-ascii?Q?TkA5d858WtWyW0mxB2rpXcVx5/eDGUdKLMLp7TSRpv1Lrel/CtMG0AOwBHMc?= =?us-ascii?Q?VKXhkzjw8aCCqigkumKfiknKoDBy/O+CwEEi3t10bG+fsWPaKCpmRU/OXZP2?= =?us-ascii?Q?Cc2bUQD6X49JGNBPasDhj4CQApxkiKFZZd2a1oRK4znCycJzY06gfI7yibFo?= =?us-ascii?Q?Caj8ztpgIc4kIh+nTEmCgjzePTOFRDkZpC+pY7bR+vCk48HcGfOlIrhf4L/9?= =?us-ascii?Q?1ZH6L9BsGtpRZkhdRbh5yfnjdlzNGXwLceD5v3CvAVa3stTE0bxyeOa/Wfr8?= =?us-ascii?Q?ISHtRgtZhffKMkhOKYuxXmMa5ID9iVQLD/N14hH8IA39c3pXZCQ1FbIX4G7Y?= =?us-ascii?Q?V61OVYL4XkjGvyWCmwN/55sj6tnELtrpkQhoM7ocNY9qaVZCPNLp9W2hTjIh?= =?us-ascii?Q?bazQIr2CQQR00+27R2y62w/yOIUTdh0XpnfEmHjae1+fNkblbXnZc0/90keM?= =?us-ascii?Q?7G5UBTD+py2fTdxngTKkDw9IwAPpPopUT5y5zNwvNSCCJM196ZgBIgoptYIQ?= =?us-ascii?Q?ygYeGHPYNLMiOxSmqJXnnEDPzTTylp+25rhX3s7qS38gYM56fy44MJYJh7ob?= =?us-ascii?Q?i29AiGpzR3R9oJfL+hWXK26MQh7ZBvqIPjrHFXc4buIlsVLxWAcyaAQD5NSd?= =?us-ascii?Q?+Q+WJI4i2HxSLL7WamIaWrE55rtT1XDYAHxqmOx13T5b9js3ZoQGrw07bszL?= =?us-ascii?Q?riGSQvcI1MBG5sbevaFyWv/+iI21eW3DCbhCfwEWgtye0qW5IT6RJBo7I/OP?= =?us-ascii?Q?VxHXBwNDbIup8+QelSMc2zjjTOqZKtnwPVnK/8cnMls43qPcAqHvaHaVYujt?= =?us-ascii?Q?eNwKvon2IEIQTNf+vivQMEqJQFWaJI4LHIGsR+upF1B3PusgZiRjZEPtLFq/?= =?us-ascii?Q?MqmhdRFXBnEDFAwbaIMcI2ieMgG1KEYjRXusFlCSLc2512l1RpzYonJ5q8Vv?= =?us-ascii?Q?4KGz4aU0A6FQzCIF0NaOazgzj4YVP+CJXwE1fd6Mf3mJO/0bjp5Yn6TeS0/x?= =?us-ascii?Q?tvs6ZLpxsws7tJVkKkB2HyzDXW7NZFUM5bk7txcSmaDmjntuI3c0qhKxF793?= =?us-ascii?Q?qK0w8NlWfaaeNObyZ7mO1z0J6klnMbcXP8RLMsu/TQ=3D=3D?= X-OriginatorOrg: sct-15-20-4755-11-msonline-outlook-3208f.templateTenant X-MS-Exchange-CrossTenant-Network-Message-Id: 93d38321-b3f7-4d30-def2-08dacfe50f1c X-MS-Exchange-CrossTenant-AuthSource: TY0PR06MB5427.apcprd06.prod.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-OriginalArrivalTime: 26 Nov 2022 19:33:13.6232 (UTC) X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted X-MS-Exchange-CrossTenant-Id: 84df9e7f-e9f6-40af-b435-aaaaaaaaaaaa X-MS-Exchange-CrossTenant-RMS-PersistedConsumerOrg: 00000000-0000-0000-0000-000000000000 X-MS-Exchange-Transport-CrossTenantHeadersStamped: TYZPR06MB4127 List-Id: Signed-off-by: Ammar Faizi Co-authored-by: Ammar Faizi Signed-off-by: Aldy Prastyo Co-authored-by: Aldy Prastyo Signed-off-by: Taufiq Pohan --- .gitignore | 7 ++-- VNDBModel.js | 34 +++++++++++++++++++ index.js | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++-- package.json | 3 ++ 4 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 VNDBModel.js diff --git a/.gitignore b/.gitignore index b855cbc..29ea801 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ -.env -node_modules/ -yarn.lock \ No newline at end of file +/.env +/node_modules/ +/yarn.lock +/vn-stats.json \ No newline at end of file diff --git a/VNDBModel.js b/VNDBModel.js new file mode 100644 index 0000000..4ba11a0 --- /dev/null +++ b/VNDBModel.js @@ -0,0 +1,34 @@ +import mongoose from "mongoose"; + +// Schema + +const VisualNovel = mongoose.Schema({ + code: { + type: String, + required: true + }, + title: { + type: String, + required: true + }, + alias: { + type: String + }, + length: { + type: Number + }, + rating: { + type: Number, + }, + description: { + type: String, + required: true + }, + image: { + type: String + } +}, { + timestamps: true +}); + +export default mongoose.model('vndb', VisualNovel); \ No newline at end of file diff --git a/index.js b/index.js index a3dadfb..9ceee76 100644 --- a/index.js +++ b/index.js @@ -1,4 +1,96 @@ -import VNDB from "vndb-api"; - +import VNDB from 'vndb-api'; const vndb = new VNDB('atri_api'); +import mongoose from "mongoose"; +import { config } from "dotenv"; +import model from './VNDBModel.js'; +import fs from 'fs'; + +config(); + +mongoose.connect(process.env.MONGODB_URI, { + useNewUrlParser: true, + useUnifiedTopology: true +}); + +const init_db = () => + mongoose.connection + .on('error', (error) => console.error(error)) + .once('open', () => console.log('Database Connected')); + +async function get_vn_by_code(code) +{ + return await vndb.query(`get vn details,basic,stats (id = ${code})`); +} + +async function insert_to_db(result) +{ + const body = { + code: result.id, + title: result.title, + alias: result.alias, + length: result.length, + rating: result.rating, + description: result.image, + image: result.image + }; + const response = await model(body); + await response.save(); +} + +async function scrape_vn_and_save_to_db(code) +{ + const result = await get_vn_by_code(code); + if (!result) { + console.log("Internal error"); + return false; + } + + if (result.items.length == 0) { + console.log(`VN ${code} is not found`); + return false; + } + + insert_to_db(result.items[0]); + return true; +} + +function save_last_id(id) +{ + const jsonVal = { + last_vn_id: id + }; + fs.writeFileSync('vn-stats.json', JSON.stringify(jsonVal)+"\n"); + return true; +} + +function get_last_id() +{ + if (fs.existsSync('./vn-stats.json')) { + const jsonVal = require('./vn-stats.json'); + return jsonVal['last_vn_id']; + } + + return 1; +} + +async function main() +{ + init_db(); + + let code = 40029; + let i; + + i = code - 5; + while (i++) { + console.log(`Scraping VN ${i}...`); + let ret = await scrape_vn_and_save_to_db(i); + if (!ret) + break; + console.log(`Successfully scraped VN ${i}`); + } + console.log(`Last VN ID is ${code}`); + save_last_id(i); + process.exit(); +} +main(); \ No newline at end of file diff --git a/package.json b/package.json index 7ef6a42..e5e9912 100644 --- a/package.json +++ b/package.json @@ -2,12 +2,15 @@ "name": "vndb_scraper", "version": "1.0.0", "description": "VNDB Scrapper for ATRI", + "type": "module", "main": "index.js", "repository": "git@github.com:vnlx2/vndb_scraper.git", "author": "Taufiq Pohan ", "license": "GPLv2", "dependencies": { "dotenv": "^16.0.3", + "fs": "^0.0.1-security", + "mongoose": "^6.7.3", "mongose": "^0.0.2-security", "vndb-api": "^1.0.3" } -- Taufiq Pohan