Web scraping Node js liba tress
I write a simple parser for one site. I use Libu tress to install the queue, what can I replace it with? I got an error and apparently this module is a little bit for other purposes.
Mistake:
internal/validators.js:112
throw new ERR_INVALID_ARG_TYPE(name, 'string', value);
^
TypeError [ERR_INVALID_ARG_TYPE]: The "url" argument must be of type string. Received type undefined
at validateString (internal/validators.js:112:11)
at Url.parse (url.js:155:3)
at urlParse (url.js:150:13)
at Url.resolve (url.js:664:29)
at urlResolve (url.js:660:40)
at Node.<anonymous> (C:\Dev\Webscrapping\scrapHitech.js:27:24)
at initialize.exports.each (C:\Dev\Webscrapping\node_modules\cheerio\lib\api\traversing.js:300:24)
at Timeout._onTimeout (C:\Dev\Webscrapping\scrapHitech.js:26:28)
at listOnTimeout (internal/timers.js:533:17)
at processTimers (internal/timers.js:475:7) {
code: 'ERR_INVALID_ARG_TYPE'
}
Parser:
const request = require('request');
const cheerio = require('cheerio');
const tress = require('tress');
const resolve = require('url').resolve;
const fs = require('fs');
const writeStream = fs.createWriteStream('catalogHiTech.json');
const URL = 'https://hi-tech.md/catalog/komplektuyuschie';
const result = [];
request(URL, (error, response, html) => {
//const scrapedProduct = [];
const productCategory = [];
let q = tress(function (URL, callback) {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
result.push({
title: $('.product-title').text(),
price: $('.product-price').text(),
href: (resolve(URL, $('.title-5').attr('href'))),
img: $('.hoverBorderWrapper').children('img').attr('src')
});
$('.products').each(function () {
q.push(resolve(URL, $(this).attr('href')))
});
$('.next_page_link').each(function () {
q.push(resolve(URL, $(this).attr('href')))
});
callback();
}
}, 10);
q.drain = function () {
console.log(result);
//fs.writeFileSync('./data.json', JSON.stringify(result, null, 4));
};
q.push(URL);
UPD: Links led to an absolute, but now it doesn't understand what the method is .push
0