From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on gnuweeb.org X-Spam-Level: X-Spam-Status: No, score=-0.0 required=5.0 tests=DKIM_SIGNED,DKIM_VALID, DKIM_VALID_AU,DKIM_VALID_EF,FORGED_HOTMAIL_RCVD2,FREEMAIL_FROM, RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_PASS autolearn=ham autolearn_force=no version=3.4.6 Received: from APC01-SG2-obe.outbound.protection.outlook.com (mail-sgaapc01olkn2070.outbound.protection.outlook.com [40.92.53.70]) by gnuweeb.org (Postfix) with ESMTPS id 9EFC08179B for ; Sat, 26 Nov 2022 19:37:41 +0000 (UTC) Authentication-Results: gnuweeb.org; dkim=pass (2048-bit key; unprotected) header.d=hotmail.com header.i=@hotmail.com header.a=rsa-sha256 header.s=selector1 header.b=p7FYobv2; dkim-atps=neutral ARC-Seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=hXPeX87YoIE0TSEWNsCNbbKX0Nom0mwqXOGzDwiSHVh0l8iMHsFXbp3E3My4b68cBYvoV2YSBmUJLLt9SGuKfqpb8/3v8qAdasI0aiWSlg+IfJu34EXkKrrEowOv0aySpH2RWV3wr0+gnugXxH5rtn2i531gwhOOTWHEUMrOUc/0AQOG0er6LorlVz0iWJnGtxzEsAKbUp+YxFNVln3OvCX3mG8OXUf4uzw+MCcKwL9h1k0gA8JPJWWTGaqqeXGp3RvhpUjuD0k9/q3syj5a+HQBGVfgJs+jBynVuC8eDtXH0nija+Y5DzKUIJExcXvGEL/pSPzdNQ89dxFAagBGxA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=OiMMP0qCzJSAayEqXuVQgtXty2CEjoCr5mPbPTUiJxY=; b=Ump9/iR9CCthtz6GrPVQ+SRT4FykrEmsqoiZkGWahwMW3o/ZPyrzrCBOquVKwFQLCqZbUGP2nb01qk6FTNwXk+RUBCg5Y9P5r0aZnyC5x6thA63c/Efrmd3tcQACmJGtHBcAIGXtYwTh8TOjM8tWoJBpqGMpNbH9IIzB8Yykpv0lHsVFex4v+a6cvQ2BNtNxgdlpLBnOC89rXFsZeq0juwsTjDyDWBk8WJLQy0cZlxJi/HkjjP/r/fk97jfjiD35j0VYUajFWaussJHbStwWgh1LPIqoAQkft/Yb0k58szA76TkrA5MkVfOWCgW4j9hX//Fa5iepKi7PS4ZKcomxdw== ARC-Authentication-Results: i=1; mx.microsoft.com 1; spf=none; dmarc=none; dkim=none; arc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=hotmail.com; s=selector1; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=OiMMP0qCzJSAayEqXuVQgtXty2CEjoCr5mPbPTUiJxY=; b=p7FYobv27eLoOWkX6+vnzH2TTGRWRBSa/Rt6mRmIACc0cWb4iHzXd7dCCsj/e0bFfsy59JVmNOhrUP35V58aPd+u44Cafq80sLzSkjRv6d4qcDWKTtExRc29ZEcAms+1NqzkbxdQQbk7jSaH9ZxmcSwergbKx41RIInr20RMX77+vhhnLLpGQ9k2kYWSiSqKDyblO8vaOduAnBYLCvefqHijxnCeyHweu0rE+Yobaipafp5LIzdQSr9obfyeLrHSqMzX5hhKQWhmJUVJ1VmRsY3PHShx9qOHY6lXLNsH6feniXTtekwvnA+xB3P6VyAeKA1HxRIrTrnNJIrazr16rQ== Received: from TY0PR06MB5427.apcprd06.prod.outlook.com (2603:1096:400:218::11) by TYZPR06MB4127.apcprd06.prod.outlook.com (2603:1096:400:6::6) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.5857.21; Sat, 26 Nov 2022 19:37:38 +0000 Received: from TY0PR06MB5427.apcprd06.prod.outlook.com ([fe80::3f7b:8f03:f2cb:9b00]) by TY0PR06MB5427.apcprd06.prod.outlook.com ([fe80::3f7b:8f03:f2cb:9b00%9]) with mapi id 15.20.5857.021; Sat, 26 Nov 2022 19:37:38 +0000 From: Taufiq Pohan To: Ammar Faizi Cc: Taufiq Pohan , Aldy Prastyo , VNLX Kernel Department , GNU/Weeb Mailing List Subject: [RESEND PATCH v1 3/4] index: Integrate vn-stats.json with the scraper Date: Sun, 27 Nov 2022 02:37:23 +0700 Message-ID: X-Mailer: git-send-email 2.34.1 In-Reply-To: <20221126193724.289154-1-m.taufiq30s@hotmail.com> References: <20221126193724.289154-1-m.taufiq30s@hotmail.com> Content-Transfer-Encoding: 8bit Content-Type: text/plain X-TMN: [m8tuVU4v5JbBzH8zByw0vjFFOHjf/DgU] X-ClientProxiedBy: SI1PR02CA0014.apcprd02.prod.outlook.com (2603:1096:4:1f7::9) To TY0PR06MB5427.apcprd06.prod.outlook.com (2603:1096:400:218::11) X-Microsoft-Original-Message-ID: <20221126193724.289154-4-m.taufiq30s@hotmail.com> MIME-Version: 1.0 X-MS-Exchange-MessageSentRepresentingType: 1 X-MS-PublicTrafficType: Email X-MS-TrafficTypeDiagnostic: TY0PR06MB5427:EE_|TYZPR06MB4127:EE_ X-MS-Office365-Filtering-Correlation-Id: 2c04d875-9fd4-4fe1-343f-08dacfe5accc X-MS-Exchange-SLBlob-MailProps: AZnQBsB9XmpBL2wTkE/FcQ5ooMzt3EW/csvXUHTVfjgS8DGF4/LcORE0LU8WwJgzWJCjDMBAlDpjSrJoom78qHnz8hwxSxiZdiYTZxnSegNZMoFAyQ3+wHaPDLdmYIHUxUP/7wcsUc1rhiGnDbK9OqOiqyYtbVg4rbvD9UbWZaqUBy3LnlnY1E/wee9LBE8MWXzPg5DnAuWpvYKTcpIO41ZH/AIy8Eqt9bsbPmcp3G6pXFuIKV4EA6Nj2pcz5uCC2NwKcftF7JI4qc23mtERgxQdNSqKf02n5k/w8tF9Bt/Zh1/RKZg4Popzt2fSQf5V8yVpeFB4zQpWE3RNs+G7fs7DNfPo57NQJ9lAs8JRdh7449zXPhPdVxNYJX5aOBl4cBq19LnRV0qznUGyiqERAsoMy2rAaJ8npqcPjs3ND/burJnHuKZfIkmbOOBf9CJalSpVOcdgUL++M7HWmasGbPXBanqEIm0mYVHvVNyRtXbpI73n+XPVUqtEddbS3CuKIOCWTayb2F0IzOJtT8AYF2VAX3aa+uVBEcGOc3hvAj2rOnCpxO1kDxNw26+znWcIkJTWLcbJlmrS1ReP+bprQME1L3RP+HPh2xNJmj5Ivq8xmip/h3oraY7CBee2IFDsAd7MH09R2Bs7rbLzP91hqoVZ4HTUoCY4oyeYwZ2Ubyo8F3xauOtjuPVRwJedMdkg8YXlWiaFvuzOMPHRcjaEeG5oxA0Co/I8p5tIw+BUFNf/3Hoa4g6Z2si7mJr08/xpD7Y8pXyKFmw= X-Microsoft-Antispam: BCL:0; X-Microsoft-Antispam-Message-Info: ffIdDF4ReJ/BSNg4LguyA/81bmV5MV6Gquw+HLdCsjPM7w/bJA5btTR+EZgOi3Locbsl0Jx2+Ua1CkHNxCdmemwbDLJ2SX+WC/OzimxauwFLMTpVBG/0bUZWD2FKZe8OY6pxV62U1r++kNnkE2QMmw9YPnrH0fQobnVbq5Y382bI3dB5EW24Hxzc4BcMUIZ1l/jqUe80JXJKTY1nTJ71Ko//NGWLJ94oeAUEuOtd4GeYHo2cgKITklERLbfW5eHcdPpABDSQcA71ISLt6rgj6f+Mwo1V+VeqG4Tj/ORzgjOE9cVhoCZ5UGHs4qshgC0GApJXsovaaj7FCwQFQYOSiw88ErXncFA2q2DhPC/85B0dHDe/eoksjmSs12/sT4dhlz67FyZnvaaRMm8KOQZsrkyRKE3UpgGpdvCz+oG+wprRCJayGhNpRDNazYhuGgnf8wJnQULq4ZGNl3JutJUGJ3BeZ4BAImoRQoq0dVn8y8ArcmwykqVjcSe33qZHgpzIbz1wYMT0QbX3Ofi24b5+YA5xEdNhXHsCYXAyO2DlLmgP45pxl2dhie1a/ztkkpWuezV5HWGiT4W1EbA5mO7Pxn4/49jvXx1AmcqnOLWsQ91VkGZJ7ibHdWGIY9XSrnNyaeO9z6hETM78GdnjPhONxQ== X-MS-Exchange-AntiSpam-MessageData-ChunkCount: 1 X-MS-Exchange-AntiSpam-MessageData-0: =?us-ascii?Q?MTLhCKD1Yd+lMNxnv9l5h30Fr9RDWKeeh6ohWbnexx3GK91TlZ2MlseyiVfN?= =?us-ascii?Q?BGeesLMiV+5/5ogZwNTwh7nIwlUohjGcc/tsIXPc83WgDIfgIf/Ldr7mVnCo?= =?us-ascii?Q?4/sfcFSCl98DJ2qSiod6nFkJaF640zJMzwY2NY2CtJ4iAdpNoTObXtg/6WJd?= =?us-ascii?Q?j5ZuR5Gz+GwLmYMRsJWxkzKvYObqGtGoWeO6eZKAC5/lKTNSRq6QV5OYLfC8?= =?us-ascii?Q?lDQ1DPxFlGhq+Vs1gQEuW5wcgJpmhwS0xFhsVPtzxayEztBkyL1itTSDmAMv?= =?us-ascii?Q?St78d0HJRCMZx+mEJBAu4lq5GLXRz/VBxdv+PSuXkCjZO6Qxv4ewoceiNSZG?= =?us-ascii?Q?oHcEf1biNaa2lgRc+/oS/EPGGPbZXgX2zwg/Q6ZP7RzxsJkVmS2HA/HPIAy4?= =?us-ascii?Q?30dsIF0zSQgVeSR0ru3oXizY/BS0vONkRhh053Pwxc63QVr4Gu3UJkkt0+M9?= =?us-ascii?Q?ft7jhfn+hzH825DZ9S4vwO6icKRGSroRhMMhI1QGTPr/H7uyHzpIUHqxV/Pm?= =?us-ascii?Q?lshP1vn7/e2UHLGi+FdE9HqZBBuTknn0uHFtfg1XgpMfma+EA82hTtj8Cvm5?= =?us-ascii?Q?FUVTUi8zz8QsZj/BBouG8xQLNP/9QR5Lo1O9EsJ4vtMxtHOObeL1ngpwxG8Y?= =?us-ascii?Q?p/Y3VZqSnGCGrAj+UYC0n6qHg2hKZwlXFyhnAOx6Cdb9Wz+aHYvvoh+hsh3+?= =?us-ascii?Q?KYjtfWd1x/fPSynkZBmNNb/4JLMd7SNfS5UrDwU6fCsCSUtLvwh9rhQ3SV60?= =?us-ascii?Q?3uFtabrWEaqznfCXnVYS/RN0dvCQ4nt7lBY6cZpUd5qXM9aoCqFoBAYrDHOm?= =?us-ascii?Q?5n0YSrEWVPS7Z11xdOqs1mGS+tHPZda+xLOQizkpKNnHO/ye7I3yLs4moTsg?= =?us-ascii?Q?kH5q0pz4kf00PxahxqbinGST+c48R/v0WwTJTJ7Y+k3fyJ3DazeDxtJQCLm6?= =?us-ascii?Q?CJ6w4lwLWHZm4dPY/r/zvq+Gwhdi/BL4H1gYY42QccDcyWAmcU1O3XJtl/L/?= =?us-ascii?Q?lx2QXoL0RJ98F9FtkxG8rCyYNhkfAJPQX7mmsK6S3r6qJh5e1ug5/s2LIfAS?= =?us-ascii?Q?eKE9+5YqEWEZrpGl/XaCU1hnBNrUpzIj7FjjDXgv+PtKRJOwxKJGj7N+v/TX?= =?us-ascii?Q?NHENCG9DTjDhF1bP/tMR8Gi2cW4ki4XVUOTc25cTu3UF92DiuS/I+jVSUZNQ?= =?us-ascii?Q?pZBboWiIxwv1SUQFgviUX0NoZ7uuV2/DOhrv2ISMYmj6Brarfl0zrRIDC7Lr?= =?us-ascii?Q?TjjhwZmk5hwSBGl6FSy2vX+KH5YlZfx+WnaZ2fplmg=3D=3D?= X-OriginatorOrg: sct-15-20-4755-11-msonline-outlook-3208f.templateTenant X-MS-Exchange-CrossTenant-Network-Message-Id: 2c04d875-9fd4-4fe1-343f-08dacfe5accc X-MS-Exchange-CrossTenant-AuthSource: TY0PR06MB5427.apcprd06.prod.outlook.com X-MS-Exchange-CrossTenant-AuthAs: Internal X-MS-Exchange-CrossTenant-OriginalArrivalTime: 26 Nov 2022 19:37:38.1745 (UTC) X-MS-Exchange-CrossTenant-FromEntityHeader: Hosted X-MS-Exchange-CrossTenant-Id: 84df9e7f-e9f6-40af-b435-aaaaaaaaaaaa X-MS-Exchange-CrossTenant-RMS-PersistedConsumerOrg: 00000000-0000-0000-0000-000000000000 X-MS-Exchange-Transport-CrossTenantHeadersStamped: TYZPR06MB4127 List-Id: vn-stats.json saves the last scraped VN ID from VNDB. Integrate this state to our scraper. Signed-off-by: Ammar Faizi Co-authored-by: Ammar Faizi Signed-off-by: Aldy Prastyo Co-authored-by: Aldy Prastyo Signed-off-by: Taufiq Pohan --- index.js | 82 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 24 deletions(-) diff --git a/index.js b/index.js index 9ceee76..513ffad 100644 --- a/index.js +++ b/index.js @@ -7,21 +7,26 @@ import fs from 'fs'; config(); -mongoose.connect(process.env.MONGODB_URI, { - useNewUrlParser: true, - useUnifiedTopology: true -}); - -const init_db = () => - mongoose.connection - .on('error', (error) => console.error(error)) - .once('open', () => console.log('Database Connected')); - async function get_vn_by_code(code) { return await vndb.query(`get vn details,basic,stats (id = ${code})`); } +async function get_number_of_vndb_vns() +{ + let res = await vndb.query("dbstats"); + + if (!("vn" in res)) + throw Error("Error, vndb malformed response"); + + return res.vn; +} + +async function get_number_of_our_vns() +{ + return await model.countDocuments(); +} + async function insert_to_db(result) { const body = { @@ -65,32 +70,61 @@ function save_last_id(id) function get_last_id() { - if (fs.existsSync('./vn-stats.json')) { - const jsonVal = require('./vn-stats.json'); - return jsonVal['last_vn_id']; - } + if (!fs.existsSync('./vn-stats.json')) + return 1; - return 1; + const jsonVal = fs.readFileSync('./vn-stats.json'); + let ret = JSON.parse(jsonVal); + if (!("last_vn_id" in ret) || isNaN(ret.last_vn_id)) + return 1; + + return ret.last_vn_id +} + +function sleep(ms) +{ + return new Promise((resolve) => { + setTimeout(resolve, ms); + }); } -async function main() +async function start_scrape() { - init_db(); + let i = get_last_id() + 1; - let code = 40029; - let i; + while (true) { + let nr_vns_ours = get_number_of_our_vns(); + let nr_vns_vndb = get_number_of_vndb_vns(); + + if (nr_vns_vndb == nr_vns_ours) + break; - i = code - 5; - while (i++) { console.log(`Scraping VN ${i}...`); let ret = await scrape_vn_and_save_to_db(i); if (!ret) break; + console.log(`Successfully scraped VN ${i}`); + save_last_id(i); + i++; + await sleep(1000); } - console.log(`Last VN ID is ${code}`); - save_last_id(i); process.exit(); } -main(); \ No newline at end of file +function main() +{ + mongoose.connect(process.env.MONGODB_URI, { + useNewUrlParser: true, + useUnifiedTopology: true + }); + + mongoose.connection + .on('error', (error) => console.error(error)) + .once('open', async function () { + console.log('Database Connected'); + await start_scrape(); + }); +} + +main(); -- Taufiq Pohan