From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on gnuweeb.org X-Spam-Level: X-Spam-Status: No, score=-0.0 required=5.0 tests=DKIM_SIGNED,DKIM_VALID, DKIM_VALID_AU,DKIM_VALID_EF,FORGED_HOTMAIL_RCVD2,FREEMAIL_FROM, RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_PASS autolearn=ham autolearn_force=no version=3.4.6 Received: from APC01-SG2-obe.outbound.protection.outlook.com (mail-sgaapc01olkn2107.outbound.protection.outlook.com [40.92.53.107]) by gnuweeb.org (Postfix) with ESMTPS id 0D4A68176A for ; Sat, 26 Nov 2022 19:33:17 +0000 (UTC) Authentication-Results: gnuweeb.org; dkim=pass (2048-bit key; unprotected) header.d=hotmail.com header.i=@hotmail.com header.a=rsa-sha256 header.s=selector1 header.b=KVI2/lVN; dkim-atps=neutral ARC-Seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=grWTqSPHuezdfqsVKnjJaARji4gjFqAA1WXLirNxRWD7ohivSAdfxhBmGuuRzULtuS8yzrbTmSEJFJnSsA3eWGGfphGwrDG4bEflDC/STmI4WTOxVAdIjSrCAsXH8zkSNA1BToL16igAECn4p7meqvDS/1BBwKDm4ZxYDnkFS/7cKVH7Vo0kvE6v9pwvjHLfAvr//Oo9x017ADBn5FkAUr3ZpfGBL1kG247D58EkEaPSNubrB50IBp0HpSZU5TGaF5xugnsHgYiHjB4obG5lfc43r0teXn0tSz1p3TLty9TkE+G91uuJm4rztp2dLPF8wKXxqDMOAlKyJZo/o54M4A== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=OiMMP0qCzJSAayEqXuVQgtXty2CEjoCr5mPbPTUiJxY=; b=MWkeLoEnSpxx6wX7DJjbnWoT+vDj9LYu1E9Xc5O1R8/5l0tTTSVAVElhvXlM8X/ODK+nzujMI8lMwBdJbO6Ewwnj4R94OQ//P1oo7UJ9a5uGQYjtmuq7bwXc6zIWPdM+SZzSsKSmSaBaL4j3iGnK89L4hDOKfMmsaNZcrNn1hxU0ItuN+tbSWeCg/XV/3OB4rBxikyEy7C11dsDPIN4opWichz624vcdOWP37f2+Kv9lrxXTajKFCOGP5VGDdowSvBva4U0Drxebazt7xf4sfeGwVKW5T4ImLiEV43Xc8E97VKaH71zYKFNXG4VCk/K47cCylNTgHGhtQq6bPmXgTA== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=none; dmarc=none; dkim=none; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=hotmail.com; s=selector1; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=OiMMP0qCzJSAayEqXuVQgtXty2CEjoCr5mPbPTUiJxY=; b=KVI2/lVNUybYy5iXQKxgoLD19Wa+CQI1bK0PCJx1I86Qftrd0TAk3uRIri4m1jM/kTebNy41UF/kmdsjslc2RfPnsN8NQA9yMGgPo8/jLqOyB2yXwATaMZMKuaVI7ETxVAaqRxnuZO0Aj11uBDsoEIWoNayfo0Eth0Gh0CRwYRdMhhaAOlEFz/oaiWZdDPnkCvLvNMUcdOQwxraOMlpGebK3s2Yk/L2RCy8JSKhXF8jI42n+zIO93fiu/zAoApzKmkop1dDL+4waGq4QlSLVHSCsT19YaT9IoGj6C5ugzVpd+CeR5wauoglgEN2TmrhzohtCd1UXNFoz4kUPojzHgw== Received: from TY0PR06MB5427.apcprd06.prod.outlook.com (2603:1096:400:218::11) by TYZPR06MB4127.apcprd06.prod.outlook.com (2603:1096:400:6::6) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5857.21; Sat, 26 Nov 2022 19:33:14 +0000 Received: from TY0PR06MB5427.apcprd06.prod.outlook.com ([fe80::3f7b:8f03:f2cb:9b00]) by TY0PR06MB5427.apcprd06.prod.outlook.com ([fe80::3f7b:8f03:f2cb:9b00%9]) with mapi id 15.20.5857.021; Sat, 26 Nov 2022 19:33:14 +0000 From: Taufiq Pohan To: Ammar Faizi Cc: Taufiq Pohan , Aldy Prastyo , VNLX Kernel Department , GNU/Weeb Mailing List Subject: [PATCH v1 3/4] index: Integrate vn-stats.json with the scraper Date: Sun, 27 Nov 2022 02:32:49 +0700 Message-ID: X-Mailer: git-send-email 2.34.1 In-Reply-To: <20221126193250.282678-1-m.taufiq30s@hotmail.com> References: <20221126193250.282678-1-m.taufiq30s@hotmail.com> Content-Transfer-Encoding: 8bit Content-Type: text/plain X-TMN: [Iz3IrxnPKROINJSdM4OqEfBCS/s/dS/i] X-ClientProxiedBy: SG2PR01CA0196.apcprd01.prod.exchangelabs.com (2603:1096:4:189::23) To TY0PR06MB5427.apcprd06.prod.outlook.com (2603:1096:400:218::11) X-Microsoft-Original-Message-ID: <20221126193250.282678-4-m.taufiq30s@hotmail.com> MIME-Version: 1.0 X-MS-Exchange-MessageSentRepresentingType: 1 X-MS-PublicTrafficType: Email X-MS-TrafficTypeDiagnostic: TY0PR06MB5427:EE_|TYZPR06MB4127:EE_ X-MS-Office365-Filtering-Correlation-Id: 3379399b-bb30-45f7-d976-08dacfe50fcf X-MS-Exchange-SLBlob-MailProps: 02NmSoc12DcIjUPJ2moxjeAUBNEB42ilvp0+MzhAQZlzvo6IfnMF70xWzVmHO4YuHagsdArT++1TCDs43LqnmHf0OLcF53TvNPsuS+ROUrOuuLf3U/ObM5ckiat7SIi/cVaYn77ioVzmZ7K9U89adBi0S3uOpaOJj1C2mggL7YDJ6eIQkc4mJ6e4kXMXe1SgBIToRkX2GhomRU4bHALleL2F2byi7KLGTKyOumGz1eji6fEZJJ3AFy2wE/A3AZmsgWroByw/Nv38dUdWl2ALdkf66d5DVBIEwjB3njGqahoPJXSF5m+71PSjugo+ckrgEdk9jS/VGIsH2aslL5uB3sX8KaXyS5+hTLPGruL3Hu11Y9ekUO9cTOJeZgh6SYAjnsylSh3enZLR2ZQoNCAzz5Y9Ah/012XQcIPAo2qspBSCp56yMnpS6yxgQGXCs5O6VR6glOVlxGvfUJUHViJl3VQdOQ+cVoaYvOHk7A4yXdmnu7zwiE031QtGkiIaRCVMM6IapE9CtShyTTym5EIa0MoZS+RMH76WMWQAZe58W1yVyRuwN/5NLUbjjtxLZtQnNd6c262zcCtP/23frRJYMJkK38G7O9uRAJbrT0recqfjcF7CuigoujAFMycbFotjp2ORWnniTNb5Ql2qt2rWeS4d9h4Fu9t12njqDCTlHmiDBh8eNYl7MM+zozFXuVqH7IA1nRZqXxKyR77uVmTHhEyEDVlOJomnFy3jMIJ53ZBDlHXv6weN/N4gs8tPFg14 X-Microsoft-Antispam: BCL:0; X-Microsoft-Antispam-Message-Info: l8J9sC8otd0O5WMx1gV1W1QTMm6UG54pTeCmyDy1UHnzSC1BjZZLtor1ksjfKg4crKzpm+dz31hoe20fdSRtrcpzHRZmHIImKKV75f7w7jT4EUUX5Foexqbfm5m8D/0w1R5IYEZnM2nZAjCXugChI3/LzNbRhTTk5QH4lPxlplkZfwxARPSKwVXHNM1TH6OYqXSGFQF9Cj5OT6BXoyDT4DQBS8sVuwAOcX/e2u6ZDGdxpd1SUINpUJ6wAwuzihMMtlWzrwSmn+2xbzW18EeE14BIj2kbM+I8zQKts6dNRTa+h4nZJ5c24LKsgsCODB2PJQJU0BKz9+TMoyoAFZD+BJt0qCaGN8O/Hqbvs6RcbuJQ6wQY8g/5h8OhULjOqpV/bYl7luBHGjJ/FL4ua1soE8XkT7RXVSQgz0wQR09sZ3USh6aJtIW3uLbK0PvIW121Dzk6QrJR4TEXpHbwIgu5F9ev4FjwN9t7Jy6wJwGqYHKWVcSSyzRo4sUDV8WyDgPOrPpsqTJo2KFhdXVTyqe9JvUlU6ukBcbNMlFh+La6JTd341JjHfWlsYvhfd6VoLV3dSCdYn+6oIgKFQhhYTJnvEaLYnt8hnT6mx5OEsHHfPKp0QhecGH1RIVT4ukFOP7mbkYig9ibLDY9Y1huPqQbBw== X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 1 X-MS-Exchange-AntiSpam-MessageData-0: =?us-ascii?Q?s+YNlV+bpLsPi9euAQn1DU/rB2eGffiG3SNVOwjgPR6gZ7UqqZragBwVEogW?= =?us-ascii?Q?tWiSlb7PgwLRyu8ez8unad13LTddrUAnYtd1mKB5UJQtGGSBs6eiO/9d3TSa?= =?us-ascii?Q?02TWpwWR/vltfvGxKQcjTXibAf6JWy2qtKwF7RrFXStiXmKOMphK/WbcKd27?= =?us-ascii?Q?zM/5vFYDgneYqurkszXB7vC89FUixRWRQ1HIeTDb8dvjdMrSVWZALb5SqVLk?= =?us-ascii?Q?AL+jbhb1VKPc5JknQrnHoGdxb6pDcwrpA2tz3Bu3XHOWmxqyDRcbcyxCIPS9?= =?us-ascii?Q?c+9bG40XTKX/WViwD9lk6cmRSDTpmcHNZ2v/TnRW/ZDyVbkzeY6FPpDauzxf?= =?us-ascii?Q?8lGZssAvc2QN2dGhL+kazaQwUTmvxpojcyN7sEfwDg1GzfejCRRi+bVDzzAi?= =?us-ascii?Q?BqbFudrRvink13tjlclw5w7AG4l/8J6Wcg3D3cLNgh4H/HQrSGP50Qc+QY95?= =?us-ascii?Q?EMpJUwQxyV+pDlH/Ig0wYPwqXgJL+kMqOS2Pk3CEH/mhFAkgySDCaONQUPp4?= =?us-ascii?Q?uBi3XchhgDFDwSD4H4dh6ICw2uYAFrfckTsom//3cC0g8yW1J1z6UsTs9nr7?= =?us-ascii?Q?hxueBXa+pYMLDJClwTvAvqJZmw83DxVnRofe8Ia3oMiOVjShLwiEyOny9JhH?= =?us-ascii?Q?1PdbG8RAIQlcICxtIULHh3meCSvRKgnoctBan0ONe81yWovwKQxVrx7Tsxv9?= =?us-ascii?Q?JzXlQjqTLe+Cll3Dlhz2gRtlSsGA/T2Ff65ijtl7N4Blq/BYr7CKXGvsJcvu?= =?us-ascii?Q?2pkwuNg+WqT2wxlDaBCOVTyUhweFyD4LUnTPIMyJ0HHF8TAuR3xFjcCIUFy1?= =?us-ascii?Q?jKdYabe5GrcqCnsfFfwLZJX7bm3GuqgFtetykgqfc5jgphVQMEtPQbLhdngJ?= =?us-ascii?Q?0ypDnt9RXQUts9L9U+NqQDyLPPnIw0EPuCt6KPShnEp/jrKocvqP08eNtDEf?= =?us-ascii?Q?uvr/AggzG+PK6FRWvKRg1An3IHFz7TioE/1OFVWuKbjTULgg43/n0aK686cc?= =?us-ascii?Q?SLzm+QKt/ADiCAe6cPBaM9CNBkLmD1Bd16RDRgIt8mdBrp8T3EDWfEsc3imT?= =?us-ascii?Q?NMZkuAp44B87LqVBcMMYX1pGZAUYkXpOnduDg4OoEBZaEILV8VN+ht0St4QX?= =?us-ascii?Q?LospSGvdPpQjP+4g7fCGAFftS1ozvtHsmJdqfqHJoD9jlqCX6ZnNBh8WSSOi?= =?us-ascii?Q?NS0RAdmiH77HyRDBf05vtHcUhykgBDUN6jrORxTS76fdhoaE+dYB15yhJq9c?= =?us-ascii?Q?F3xJDdWMzFpzhXP7M51lKh7HytcAgjcrziWoIXMsBQ=3D=3D?= X-OriginatorOrg: sct-15-20-4755-11-msonline-outlook-3208f.templateTenant X-MS-Exchange-CrossTenant-Network-Message-Id: 3379399b-bb30-45f7-d976-08dacfe50fcf X-MS-Exchange-CrossTenant-AuthSource: TY0PR06MB5427.apcprd06.prod.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-OriginalArrivalTime: 26 Nov 2022 19:33:14.7805 (UTC) X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted X-MS-Exchange-CrossTenant-Id: 84df9e7f-e9f6-40af-b435-aaaaaaaaaaaa X-MS-Exchange-CrossTenant-RMS-PersistedConsumerOrg: 00000000-0000-0000-0000-000000000000 X-MS-Exchange-Transport-CrossTenantHeadersStamped: TYZPR06MB4127 List-Id: vn-stats.json saves the last scraped VN ID from VNDB. Integrate this state to our scraper. Signed-off-by: Ammar Faizi Co-authored-by: Ammar Faizi Signed-off-by: Aldy Prastyo Co-authored-by: Aldy Prastyo Signed-off-by: Taufiq Pohan --- index.js | 82 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/index.js b/index.js index 9ceee76..513ffad 100644 --- a/index.js +++ b/index.js @@ -7,21 +7,26 @@ import fs from 'fs'; config(); -mongoose.connect(process.env.MONGODB_URI, { - useNewUrlParser: true, - useUnifiedTopology: true -}); - -const init_db = () => - mongoose.connection - .on('error', (error) => console.error(error)) - .once('open', () => console.log('Database Connected')); - async function get_vn_by_code(code) { return await vndb.query(`get vn details,basic,stats (id = ${code})`); } +async function get_number_of_vndb_vns() +{ + let res = await vndb.query("dbstats"); + + if (!("vn" in res)) + throw Error("Error, vndb malformed response"); + + return res.vn; +} + +async function get_number_of_our_vns() +{ + return await model.countDocuments(); +} + async function insert_to_db(result) { const body = { @@ -65,32 +70,61 @@ function save_last_id(id) function get_last_id() { - if (fs.existsSync('./vn-stats.json')) { - const jsonVal = require('./vn-stats.json'); - return jsonVal['last_vn_id']; - } + if (!fs.existsSync('./vn-stats.json')) + return 1; - return 1; + const jsonVal = fs.readFileSync('./vn-stats.json'); + let ret = JSON.parse(jsonVal); + if (!("last_vn_id" in ret) || isNaN(ret.last_vn_id)) + return 1; + + return ret.last_vn_id +} + +function sleep(ms) +{ + return new Promise((resolve) => { + setTimeout(resolve, ms); + }); } -async function main() +async function start_scrape() { - init_db(); + let i = get_last_id() + 1; - let code = 40029; - let i; + while (true) { + let nr_vns_ours = get_number_of_our_vns(); + let nr_vns_vndb = get_number_of_vndb_vns(); + + if (nr_vns_vndb == nr_vns_ours) + break; - i = code - 5; - while (i++) { console.log(`Scraping VN ${i}...`); let ret = await scrape_vn_and_save_to_db(i); if (!ret) break; + console.log(`Successfully scraped VN ${i}`); + save_last_id(i); + i++; + await sleep(1000); } - console.log(`Last VN ID is ${code}`); - save_last_id(i); process.exit(); } -main(); \ No newline at end of file +function main() +{ + mongoose.connect(process.env.MONGODB_URI, { + useNewUrlParser: true, + useUnifiedTopology: true + }); + + mongoose.connection + .on('error', (error) => console.error(error)) + .once('open', async function () { + console.log('Database Connected'); + await start_scrape(); + }); +} + +main(); -- Taufiq Pohan