From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on gnuweeb.org X-Spam-Level: X-Spam-Status: No, score=-0.0 required=5.0 tests=DKIM_SIGNED,DKIM_VALID, DKIM_VALID_AU,DKIM_VALID_EF,FORGED_HOTMAIL_RCVD2,FREEMAIL_FROM, RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_PASS autolearn=ham autolearn_force=no version=3.4.6 Received: from APC01-SG2-obe.outbound.protection.outlook.com (mail-sgaapc01olkn2070.outbound.protection.outlook.com [40.92.53.70]) by gnuweeb.org (Postfix) with ESMTPS id 650F981793 for ; Sat, 26 Nov 2022 19:37:40 +0000 (UTC) Authentication-Results: gnuweeb.org; dkim=pass (2048-bit key; unprotected) header.d=hotmail.com header.i=@hotmail.com header.a=rsa-sha256 header.s=selector1 header.b=bGj+VgCc; dkim-atps=neutral ARC-Seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=RL1Mkw1OZkMoifV8E0b/3bjvecfr0/UUOu2cs8zfiPpC0WN5Gm6wWUKJohMwmB/+yUkDG8QMcruGK6kiSoxuNd5MDw0SBbmKocETQwZiTWXRIMuOMSNymH3zhl+z0+a3xmgNDrwkZD6SCsXX8Ofcnb9ooWUErHWuJdP3nlM0odC02T62XAHyPqDO+fmrY3MP3FduX4mit4kRY0Ql9EFvZJ4Wm8mgnQm/i6KvX7jN4x+1xmXaKcwgbVNvpOUKtM2pUz8+RPRVNZq9jX9Qt6zOMGEKw8hHiG6D7c7XXH9RwR7Ndw1my0VnI0YCBUkdhtKUURxM21agoc2rcrsozJuMEQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=HSwQgGhn+/5K4Y28nLerBtUJoXpbB7yIN0SO/9ju3D0=; b=Rea4H/qx8J2t1MgLRCE7+4ACM+p67MFzjuj8/IsjSCRa26UxZZJeAc9qS5ZKw6HfBY7//a42FdA2npzXxEVbz2Bme0RjKo9sl6Xm1T/2e08cFodIBDwz0+OjLMeQCE7czjwfgJuPUXy8DHDn3J/VznVO8R/+9vfsfdDALkj/6niEyz4C/tWFnnZTtuvkPx7YPMKOOr+zko2ODCAwu6hidn/34009Khv7AdPzWcpZlztYQeKi42dzzedwm48MVosrEYMKQupxR5YOvSqJ6Ta5jOHcs9y/xk8f2mO2WPz+GOaIXEX0JG9k8o7dmp7GpFY32RXawyuJNO5afZAZhbWR5A== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=none; dmarc=none; dkim=none; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=hotmail.com; s=selector1; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=HSwQgGhn+/5K4Y28nLerBtUJoXpbB7yIN0SO/9ju3D0=; b=bGj+VgCcfDEd2WrYjN855YwOIFfVnvE0vrtYAFx5TXz1orzCtu1yI1YtgHII0OTDF1bfXb2q2hY1a3ySxhJ+vqM8TvNnCl9cdowQvx/2nSRgKbiJUwyu1GU2SQZQ1+M45lWyyUu9XmxBb9EiX8IqjLBzkruPfGJ+G+N3ADbzGiRTZNssPm/McPwH2SXwNEHdrBoOdrFPiEY+oY4zoAI2hd81g5/1GDadi+LgpauWj4c44JqceMOYTJRjLooSFiS026dNiDqj2GhginhQxHF4BqDrRpIrXU+LSdvsYWTwHZBMiQI8l9tZCDm0s53IHOok3SpYk3hxbS1lQa6abdOHHA== Received: from TY0PR06MB5427.apcprd06.prod.outlook.com (2603:1096:400:218::11) by TYZPR06MB4127.apcprd06.prod.outlook.com (2603:1096:400:6::6) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5857.21; Sat, 26 Nov 2022 19:37:37 +0000 Received: from TY0PR06MB5427.apcprd06.prod.outlook.com ([fe80::3f7b:8f03:f2cb:9b00]) by TY0PR06MB5427.apcprd06.prod.outlook.com ([fe80::3f7b:8f03:f2cb:9b00%9]) with mapi id 15.20.5857.021; Sat, 26 Nov 2022 19:37:37 +0000 From: Taufiq Pohan To: Ammar Faizi Cc: Taufiq Pohan , Aldy Prastyo , VNLX Kernel Department , GNU/Weeb Mailing List Subject: [RESEND PATCH v1 2/4] Initial VNDB scraper and storage management Date: Sun, 27 Nov 2022 02:37:22 +0700 Message-ID: X-Mailer: git-send-email 2.34.1 In-Reply-To: <20221126193724.289154-1-m.taufiq30s@hotmail.com> References: <20221126193724.289154-1-m.taufiq30s@hotmail.com> Content-Transfer-Encoding: 8bit Content-Type: text/plain X-TMN: [TccYYX/4C7fuEkaWelSh/jc/TpM123lC] X-ClientProxiedBy: SI1PR02CA0014.apcprd02.prod.outlook.com (2603:1096:4:1f7::9) To TY0PR06MB5427.apcprd06.prod.outlook.com (2603:1096:400:218::11) X-Microsoft-Original-Message-ID: <20221126193724.289154-3-m.taufiq30s@hotmail.com> MIME-Version: 1.0 X-MS-Exchange-MessageSentRepresentingType: 1 X-MS-PublicTrafficType: Email X-MS-TrafficTypeDiagnostic: TY0PR06MB5427:EE_|TYZPR06MB4127:EE_ X-MS-Office365-Filtering-Correlation-Id: 3dd56300-e57d-4763-9e58-08dacfe5ac1b X-MS-Exchange-SLBlob-MailProps: EgT5Wr3QDKwYUw59ZxKZ0arsxuW3OwS9DwCe1lWhL+W+5tdDKNgpib5E+AiQ2nWpKXzekS99bAz2aAWgZ5C6gzv2uM7a2zfWRVN6gKLDED1wVN9xmwk3xT5YrObH/EIojw8bFO7AOZ5fohPZ9yTS3COGv1RozKu8unRrbyig+sosEs4yn66wIlp53QaTpnOZJHlWj4yMjCKlaOqPrFcC8QjyCrbcjpt7Myg5MwpTdEiNb3nSDwYq2RPkpseO+cGFA2bq74cusF0V97BfXpYcABpM2vxoAI7P0FMDwWl0C9fzuH+z/5TtVy8arlB56+L76nT7nj5uZPzznvem7XOI61DD/byqSaJNw37GXKrAMc1QyrM82subSuxig8Sx3RZ8WcSBHKT3v6qWG2aWEe2v8QzlCBHqG2vmA/flx/ndmuVR6KVabZmUyf93XdCCogV42mBAD4YWntbJ58Z8wWguC0TCeWgsr8vypb6bQ2UB/kQwXPGAmOgJy+IO0Z9v3jCItjGjkZQm7fm+XDxZBeVq6l3BwEE2S6YEi/nNVLG2Y6VF7NKOJZAxfYS+YAr99FEEKED4FYigXAZ3/F4n1d5gT4jXvF3u9bVnX+uGFwwLIpc2QRY5QXVHJA8GQVTooeOBg+UCc6RUy2q8pdjwu1RdRjHRHPFBL2m9ptXeg49s/GWFP0i4GcJSHTj2hAwBFxhD5UdMVUVdqfgj21rO9yl3DAfxqS+Xz3uzxhCGIKr2pAA= X-Microsoft-Antispam: BCL:0; X-Microsoft-Antispam-Message-Info: UNkVxwtp2cpipH0W4WoJLvsjyO5mlQWl69HvWjO9TW+/mBbazsBogtbBELcrV8xM/EsQR5WIzFan9sxHqi6MF1aCcfzbXUW/kdUPlK/cqJSce1/+OxMnjvwuk75oju1SxRsrGolSDyDoDnIZZ0gbKM5GGTHLzyUjAFvX4Cm9r29dzbszZ5cjk1SRKJztr69VU/3SDb0lBydTvzHeDmpSrjaqrvfBZy/6YS1n6gMSPfejnhyATiRLGhW+CiLBt8s0T26CG/CDDiJw+pY5KCVA5W8xfuhCyvcY8MoaENL5zw70HKcbtUiYb0ITf0Yh+8fZMGX239oXXyoYntqB7hN04HaTODInMRG8jGyAgEuOVI6VX29W0AYZwsoQ8HT+rOG5uQBNRn/BMCYylpEN7wQTmIhNbGUqaYwL9ppHwrLX3sJB+OZg0FLu/EwGQ+SvIjOBpFsTlbEochQXsBLnagzlbHLApR9OceVbWrel0Wgzk6TttudTf/vEQFw11lTviPJurGCTkHJ7ml+lVn4YB64XxLALsvzAs9LI+27hoHbWJM4mEMYXYsyy9aDFXWXD1r+tHEnhZlhRwM3QvqywqiZ4bjEaqUyAvfrrVj2g4e3q/1N268M0C7T3TJp/VuS3H1hJJ+wiQJ9MZVXV8OnSKQ7A/g== X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 1 X-MS-Exchange-AntiSpam-MessageData-0: =?us-ascii?Q?EKY6sT02KSullMxijmayj9iCPulmEwI0FiW/RzRmri02iY9MGx8VjMJZJUW4?= =?us-ascii?Q?UHc4FkuRVmFML70Q0mvQGkHXBCUQzW657iLpND4tAkheRzQgVrVHPwPkVVzG?= =?us-ascii?Q?q+Mw43UfXYgH1jB8TxcsuWn1UlP76yDPlWgUstAuebs1hULcxtStAekkJvBj?= =?us-ascii?Q?1NErteqNImFnFzS3jdNLb8g5nblDjfSdM81iW8gKZ2ccUj8iv93C+gnGmjQz?= =?us-ascii?Q?nz/YFuRLYGxqMCtfdYw8UmWWC2g54dMA2IPhxw7+Odzz6Oimb0PyLXO8UmdG?= =?us-ascii?Q?+jP26FuYwtHX9pYkvop9Nyhc5wzsBPfUW+Zb0GF5XmPSQTkxvdSoz2ee8akb?= =?us-ascii?Q?9IATs9jei34iH9c+9z3PqgrLn7CqnmiRVh37QzcCETty19g1wQp0ZumQF+gW?= =?us-ascii?Q?YRhNxvrJUg/h2OuGVLilZ5FsTwtEYW3p9B1tJjil9GpYkRRVKR/J9C6XZqZ+?= =?us-ascii?Q?KscTlKe5Lwn8GfOVVvC7ORkIXW7VpEiFPSFSBske2G9/gOisCFUzvvyACRtu?= =?us-ascii?Q?o3hY2Upfy45biHvOsEwx5Cb9T2sNWdFYKaKIcKrNrbGWs26rFuSvYCXjYGOO?= =?us-ascii?Q?YgbmCHyMhBP0O0k9wKtgWGtircONlvgrKRIxgutIWGQ+BcA32cEctoRohk3r?= =?us-ascii?Q?LOX7/lmkiFw325Og86Q4LInccWTf8Kdnwr8ctTkR4wW1HeEcn/DN0NHQJuRi?= =?us-ascii?Q?abynaD+UqS1JAoZV9MO9zlDV15FqL3EFBmaGIgVUleGLo4LnHHOm1zaChedH?= =?us-ascii?Q?jlSSHwytUXJzPa5Lvtt/38cSO+MfZybt/svuJhmSOp3LqLJsyC0iRWJK34Lm?= =?us-ascii?Q?JALfFrf+De37bL5ErCg5D7Y8OYyAheVfypgsAmNjd9PFicDE1v/OI60jk7JA?= =?us-ascii?Q?fYEKtkIjK6wiVe1QgsvRgEWOPJM0hmeDE3wPkJZEDkLZTERmw3xqSOA+/Zzs?= =?us-ascii?Q?JT5eiWNCqe0zd6FT9l0eZN7ImRIgzLXRKcCm3hX6K0ump++Kb1jXeOnimYTf?= =?us-ascii?Q?CsLYvXXPpKTvvcm05hnyZ44EhW89p6svSM2zy3mlFzUHrOctsRN6/umAGYM7?= =?us-ascii?Q?IQ5wnIoh+qKy7z1CvZTJuHIea6tnIFe4AsiE4Sa3rTssaS9xEBocDimw+/zh?= =?us-ascii?Q?FjV8Y+3c78MDT/KJ6u8cFQ5A64ABi0LCUvRFXhxyLRjyf8N1iTeK+FEiMPGA?= =?us-ascii?Q?NXr1vGdQHxU+zA92zXXL31IJ7YO4PUQzcnNjEbe4PVqK7DV7+sE8d8H3Ri3g?= =?us-ascii?Q?ou0+BMFSZaI16iAa+byQP9R914tad1hBc9ll9IVgXw=3D=3D?= X-OriginatorOrg: sct-15-20-4755-11-msonline-outlook-3208f.templateTenant X-MS-Exchange-CrossTenant-Network-Message-Id: 3dd56300-e57d-4763-9e58-08dacfe5ac1b X-MS-Exchange-CrossTenant-AuthSource: TY0PR06MB5427.apcprd06.prod.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-OriginalArrivalTime: 26 Nov 2022 19:37:37.0016 (UTC) X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted X-MS-Exchange-CrossTenant-Id: 84df9e7f-e9f6-40af-b435-aaaaaaaaaaaa X-MS-Exchange-CrossTenant-RMS-PersistedConsumerOrg: 00000000-0000-0000-0000-000000000000 X-MS-Exchange-Transport-CrossTenantHeadersStamped: TYZPR06MB4127 List-Id: Signed-off-by: Ammar Faizi Co-authored-by: Ammar Faizi Signed-off-by: Aldy Prastyo Co-authored-by: Aldy Prastyo Signed-off-by: Taufiq Pohan --- .gitignore | 7 ++-- VNDBModel.js | 34 +++++++++++++++++++ index.js | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++-- package.json | 3 ++ 4 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 VNDBModel.js diff --git a/.gitignore b/.gitignore index b855cbc..29ea801 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ -.env -node_modules/ -yarn.lock \ No newline at end of file +/.env +/node_modules/ +/yarn.lock +/vn-stats.json \ No newline at end of file diff --git a/VNDBModel.js b/VNDBModel.js new file mode 100644 index 0000000..4ba11a0 --- /dev/null +++ b/VNDBModel.js @@ -0,0 +1,34 @@ +import mongoose from "mongoose"; + +// Schema + +const VisualNovel = mongoose.Schema({ + code: { + type: String, + required: true + }, + title: { + type: String, + required: true + }, + alias: { + type: String + }, + length: { + type: Number + }, + rating: { + type: Number, + }, + description: { + type: String, + required: true + }, + image: { + type: String + } +}, { + timestamps: true +}); + +export default mongoose.model('vndb', VisualNovel); \ No newline at end of file diff --git a/index.js b/index.js index a3dadfb..9ceee76 100644 --- a/index.js +++ b/index.js @@ -1,4 +1,96 @@ -import VNDB from "vndb-api"; - +import VNDB from 'vndb-api'; const vndb = new VNDB('atri_api'); +import mongoose from "mongoose"; +import { config } from "dotenv"; +import model from './VNDBModel.js'; +import fs from 'fs'; + +config(); + +mongoose.connect(process.env.MONGODB_URI, { + useNewUrlParser: true, + useUnifiedTopology: true +}); + +const init_db = () => + mongoose.connection + .on('error', (error) => console.error(error)) + .once('open', () => console.log('Database Connected')); + +async function get_vn_by_code(code) +{ + return await vndb.query(`get vn details,basic,stats (id = ${code})`); +} + +async function insert_to_db(result) +{ + const body = { + code: result.id, + title: result.title, + alias: result.alias, + length: result.length, + rating: result.rating, + description: result.image, + image: result.image + }; + const response = await model(body); + await response.save(); +} + +async function scrape_vn_and_save_to_db(code) +{ + const result = await get_vn_by_code(code); + if (!result) { + console.log("Internal error"); + return false; + } + + if (result.items.length == 0) { + console.log(`VN ${code} is not found`); + return false; + } + + insert_to_db(result.items[0]); + return true; +} + +function save_last_id(id) +{ + const jsonVal = { + last_vn_id: id + }; + fs.writeFileSync('vn-stats.json', JSON.stringify(jsonVal)+"\n"); + return true; +} + +function get_last_id() +{ + if (fs.existsSync('./vn-stats.json')) { + const jsonVal = require('./vn-stats.json'); + return jsonVal['last_vn_id']; + } + + return 1; +} + +async function main() +{ + init_db(); + + let code = 40029; + let i; + + i = code - 5; + while (i++) { + console.log(`Scraping VN ${i}...`); + let ret = await scrape_vn_and_save_to_db(i); + if (!ret) + break; + console.log(`Successfully scraped VN ${i}`); + } + console.log(`Last VN ID is ${code}`); + save_last_id(i); + process.exit(); +} +main(); \ No newline at end of file diff --git a/package.json b/package.json index 7ef6a42..e5e9912 100644 --- a/package.json +++ b/package.json @@ -2,12 +2,15 @@ "name": "vndb_scraper", "version": "1.0.0", "description": "VNDB Scrapper for ATRI", + "type": "module", "main": "index.js", "repository": "git@github.com:vnlx2/vndb_scraper.git", "author": "Taufiq Pohan ", "license": "GPLv2", "dependencies": { "dotenv": "^16.0.3", + "fs": "^0.0.1-security", + "mongoose": "^6.7.3", "mongose": "^0.0.2-security", "vndb-api": "^1.0.3" } -- Taufiq Pohan