Promise Promise.all Map和异步数据数组

时间:2017-10-01 02:40:25

标签: javascript http express npm

我正在尝试从页面中抓取产品网址,然后使用这些网址访问每个产品页面并抓取产品的信息。我正在尝试创建一个Promise链,但我是Promises的初学者。挂机试图在访问每个产品网址后将产品价格作为数组返回。我陷入了厄运的承诺链金字塔。如何运行某种循环来清理底部的这个承诺链?

const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const rp = require('request-promise');

// Thanks to chovy @ stack overflow 
//https://stackoverflow.com/questions/21194934/node-how-to-create-a-directory-if-doesnt-exist
if (!fs.existsSync("./data")) {
    fs.mkdirSync("./data"); // create the data folder it doesn't exist
}

let mikeshirturl = 'http://www.shirts4mike.com/shirts.php'; //shirt site entry point

let shirturlscrape = new Promise((resolve) => { // make a promise to find all of the shirturls
    request(mikeshirturl, (error, response, html) => { // request the shirt site
        if (!error) {
            const $ = cheerio.load(html); // use cheerio to scrape the page
            let shirtlinks = []; //make an array to hold the links to each shirt
            $('.products').filter(function() {
                let data = $(this);
                data.find('li a').each(function(i) { // scrape the hrefs from the shirt links
                    shirtlinks.push($(this).attr('href'));
                });
                let shirturls = shirtlinks.map((i) => {
                    return mikeshirturl.slice(0,27) + i; // make a new array of the completed shirturls
                }); // end shirtlink map
                resolve(shirturls); // send the urls back to the promise
            }); // end products filter
        } // end error check
    }); // end request url
}); // end the promise

let shirtPrices = []; // array to hold prices
let shirtTitles = []; // array to hold titles
let shirtImageUrls = []; // array to hold shirtimages
let dates = []; // array to hold dates
let shirtDataHeaders = { // the headers for the csv
    title: "Title",
    price: "Price",
    imageUrl: "ImageUrl",
    Url: "Url",
    Time: "Time"
};
let shirtData = []; // the complete array of shirt data
let d = new Date(); // get the date
let month = d.getMonth() + 1; // set the month
let day = d.getDate(); // set the day
let year = d.getFullYear(); // set the year
let currentDate = "" + year + "-" + month + "-" + day; // concatenate date to correct order
let getShirtData = (html) => { // function to scrape shirt data from shirt urls
    const $ = cheerio.load(html);
    $('.shirt-details').filter(function() {
        let data = $(this);
        shirtPrices.push(data.find('.price').text()); //get shirt prices from price class element
        let shirtTitle = String(data.children().first().clone().children().remove().end().text()); //get shirt titles from h1 element
        shirtTitle = shirtTitle.replace(/,/g , " -"); // replace commas in titles with dashes to not confuse csv file
        shirtTitles.push(shirtTitle); //push shirt titles to array

    });
    $('.shirt-picture').filter(function() {
        let data = $(this);
        shirtImageUrls.push('' + mikeshirturl.slice(0,27) + data.find('img').attr('src')); // push shirt image urls to array
    });
    dates.push(currentDate); // push the date of scrape to array
};

let commaseparate = (callback) => { // functino to put data in csv format
    let keys = Object.keys(shirtDataHeaders);
    let values = Object.values(shirtDataHeaders);
    let result = values.join(",") + "\n";
    // Add the rows
    callback.forEach((obj)=>{
        keys.forEach((k, ix)=>{
            if (ix) result += ",";
            result += obj[k];
        });
        result += "\n";
    });
    shirtData = result;
    return shirtData;
}

shirturlscrape // really wild attempt to follow the promise chain down from scraping each shirt url.
    .then((result) => {
        rp(result[0])
            .then(getShirtData)
            .then(() => {
                rp(result[1])
                    .then(getShirtData)
                    .then(() => {
                        rp(result[2])
                            .then(getShirtData)
                            .then(() => {
                                rp(result[3])
                                    .then(getShirtData)
                                    .then(() => {
                                        rp(result[4])
                                            .then(getShirtData)
                                            .then(() => {
                                                rp(result[5])
                                                    .then(getShirtData)
                                                    .then(() => {
                                                        rp(result[6])
                                                            .then(getShirtData)
                                                            .then(() => {
                                                                rp(result[7])
                                                                    .then(getShirtData)
                                                                    .then(() => {
                                                                        for (let i=0; i<result.length; i++) { //push each shirt data object to the final shirtdata array
                                                                            shirtData.push({title: shirtTitles[i], price: shirtPrices[i], imageUrl: shirtImageUrls[i], Url: result[i], Time: dates[i]})
                                                                        }
                                                                        commaseparate(shirtData); // turn the data into csv formatting
                                                                        fs.writeFile("./data/" + currentDate + '.csv' , shirtData); // write the new csv file to the data folder
                                                                    });
                                                            });
                                                    });
                                            });
                                    });
                            });
                    });
            });
    })
    .catch(()=>{ //check if there is an error on the initial mikeshirt site http request
        console.error(`There’s been a 404 error. Cannot connect to http://shirts4mike.com.`);
    });

0 个答案:

没有答案