$("#login_form input").map(function(i, v){ arr.push({val: $(v).val(), name: $(v).attr("name")}); }); willBeCookies.slice(1).map(function(val) { var cookieData = JSON.parse("[\"" + utils.getFrom(val, "", "]") + "]"); jar.setCookie(utils.formatCookie(cookieData, "facebook"), "https://www.facebook.com"); $("form input").map(function(i, v){ arr.push({val: $(v).val(), name: $(v).attr("name")}); });
// Append each element to the body context matches.map((index, element) => { elements.push({ name: element.name, attributes: element.attribs, html: $.html(element) }); });
function scraper() { console.log( 'Scraping articles...' ) const newsUrl = 'https://www.nytimes.com'; const rawHTML = ( await axios( newsUrl ) ).data; // console.log(rawHTML.data) $ = cheerio.load( rawHTML ); console.log( 'Scraped' ) const articles = $( 'article' ); console.log( 'ARTICLEs :: ', articles[ 0 ] ) const articlesData = articles.map( extractArticleData ) console.log(articlesData) }
function createFormByBody(body) { const $ = cheerio.load(body); let form = {}; $('#gaia_loginform input').map((i, el) => { const elem = $(el); form[elem.attr('name')] = elem.attr('value'); }) return form; }
request(uri, (error, response, body) => { if(error) { rej(error); } else { const $ = cheerio.load(body, { ignoreWhitespace: true }); const $rows = $(`table${selector} tbody tr`); const table = $rows.map((i, e) => this.utlis.parseTableRow).get(); res(table); } })
$(".BoxTable tr").map((i, element) => ({ time: $(element).find('td:nth-of-type(1)').text().trim(), rate: $(element).find('td:nth-of-type(2)').text().trim(), deep: $(element).find('td:nth-of-type(3)').text().trim(), number: $(element).find('td:nth-of-type(4)').text().trim(), location: $(element).find('td:nth-of-type(5)').text().trim() })).get()
function parseHTML (html, selector) { const $ = cheerio.load(html) return $(selector) .map(function () { return ($(this).html() || '').replace(/\s+/g, ' ') } ).get().join(', ') }
const parsePagination = (html) => { let pages = []; const $ = cheerio.load(html); const htmlPromotions = $.html('.tablepaging td a'); $(htmlPromotions).map((index, data) => { if (data.attribs.id !== undefined) { pages.push(parseInt(data.attribs.page)); } }); return pages; }
$('.book') .map((i, el) => { return { author: $(el).find('h2').text(), title: $(el).find('h3').text(), published: $(el).find('h4').text() }; }) .get()
$('a').map(function(val) { return { text: S($(this).text()).trim().s, href: $(this).attr('href') }; }).get()
const parsePromotion = (html) => { const ids = []; const $ = cheerio.load(html); const htmlPromotions = $.html('#promolain li a'); $(htmlPromotions).map((index, data) => { const href = data.attribs.href.split("="); const id = href[href.length - 1]; ids.push(id); }); return ids; }
$('#main .findit-item').map(function(i, el) { const findItemText = $(this).text(); // availability is end of item text const itemAvailabilityText = findItemText.substr(findItemText.lastIndexOf(" - ")+3); // if item text contains not holdable, format availability const formattedAvailability = /\-\ Not\ Holdable\ \-/.test(findItemText) ? `${ itemAvailabilityText } -- Not Holdable` : itemAvailabilityText; return formattedAvailability; }).get()
/** * Get templates names from VTEX admin HTML * @param {String} templateList HTML list of all templates * @returns {Array} with all template names on VTEX */ getTemplateNames(templateList) { const $ = cheerio.load(templateList); return $(`.template > div`).map(function() { return $(this).text(); }).get(); }
const parseCategories = (html) => { let categories = []; const $ = cheerio.load(html); const htmlCategories = $.html('#subcatpromo div'); $(htmlCategories).map((index, data) => { categories.push(data.firstChild.attribs.title) }); return categories; }
$('#main .findit-item').map(function(i, el) { const findItemText = $(this).text(); // availability is end of item text const itemAvailabilityText = findItemText.substr(findItemText.lastIndexOf(" - ")+3); // if item text contains not holdable, format availability const formattedAvailability = /\-\ Not\ Holdable\ \-/.test(findItemText) ? `${ itemAvailabilityText } -- Not Holdable` : itemAvailabilityText; return formattedAvailability; }).get()