Puppeteer: Save data from a for loop in database

I’m web-scraping a site and managed to extract data in the for loop.

However, I don’t know how can I save it to my MongoDB database as I’m receiving an error ReferenceError: nameElement is not defined .

How can I save the results from my for loop as an object to my database?

const kclResults = [];

async function scrapeInfiniteScrollItems(
  page,
  scrollDelay = 10000
) {
  try {
    const html = await page.content();
    const $ = cheerio.load(html);

    await page.evaluate(() => {
      let elements = $("[role='listitem']")
        .find("._2DX0iPG8PDF3Si_o5PlzIj")
        .toArray();

      for (i = 0; i < elements.length; i++) {
        $(elements[i]).click();
        const nameElement = $("[data-log-name='PersonName']").text();
        const emailElement = $("[data-log-name='Email']").text();
        const allElements = $("[aria-label='Contact information']").text();
        const officeLocation = $("[data-log-name='OfficeLocation']").text();
        const position = $("[data-log-name='Company']").text();
        const jobTitle = $("[data-log-name='JobTitle']").text();
        const departament = $("[data-log-name='Department']").text();
        console.log(
          `email: ${emailElement} name: ${nameElement} allElements: ${allElements} n office location: ${officeLocation} n position: ${position} n jobTitle: ${jobTitle} n departament: ${departament}`
        );
      }
    });


    let kclResult = new KingsDB({
      nameElement,
      emailElement,
      allElements,
      officeLocation,
      position,
      jobTitle,
      departament,
    });

    kclResults.push(kclResult);
    console.log(kclResults);
    kclResult.save();
    return kclResults;
  } catch (error) {
    console.log(error);
  }
}

Answer

You are declaring nameElement (and other variables) in for loop scope and trying to access it outside that scope.

Just create an array of “elements” and iterate over it when you’re writing it to your DB. This code below should work:

const kclResults = [];

async function scrapeInfiniteScrollItems(
    page,
    scrollDelay = 10000
) {
    try {
        const html = await page.content();
        const $ = cheerio.load(html);



        const resultArr = await page.evaluate(() => {
            let elements = $("[role='listitem']")
                .find("._2DX0iPG8PDF3Si_o5PlzIj")
                .toArray();
            const resultArr = [];

            for (i = 0; i < elements.length; i++) {
                $(elements[i]).click();
                const nameElement = $("[data-log-name='PersonName']").text();
                const emailElement = $("[data-log-name='Email']").text();
                const allElements = $("[aria-label='Contact information']").text();
                const officeLocation = $("[data-log-name='OfficeLocation']").text();
                const position = $("[data-log-name='Company']").text();
                const jobTitle = $("[data-log-name='JobTitle']").text();
                const departament = $("[data-log-name='Department']").text();
                resultArr.push({
                    nameElement,
                    emailElement,
                    allElements,
                    officeLocation,
                    position,
                    jobTitle,
                    departament
                });
                console.log(
                    `email: ${emailElement} name: ${nameElement} allElements: ${allElements} n office location: ${officeLocation} n position: ${position} n jobTitle: ${jobTitle} n departament: ${departament}`
                );
            }
            return resultArr;

        });

        const kclResults = [];
        for (let result of resultArr) {
            const {
                nameElement,
                emailElement,
                allElements,
                officeLocation,
                position,
                jobTitle,
                departament
            } = result;
            let kclResult = new KingsDB({
                nameElement,
                emailElement,
                allElements,
                officeLocation,
                position,
                jobTitle,
                departament,
            });

            kclResults.push(kclResult);
            console.log(kclResults);
            kclResults.push(kclResult.save());
        }
        return kclResults;
    } catch (error) {
        console.log(error);
    }
}

PS: The function passed to pageEvaluate runs in browser context and thus doesn’t have access to your node variables, until they are explicitly passed as an argument.