Webscraping: can’t to move to the next page using page tags

This is a follow-up to my last question.

Now I can extract the data I needed, I want to move to the next page and it seems that using page tags is not working. Here is the code I used:

import pandas as pd
import urllib3
from bs4 import BeautifulSoup
import requests, re, json
from tqdm import tqdm
scrap_carrefour = pd.DataFrame() #columns =['Item_name','Item_price']


parent_url = 'https://www.carrefourkuwait.com/mafkwt/en/'
tags = [
    'Frozen-Food/c/FKWT6000000', 
    'Baby-Products/c/FKWT1000000', 
    'Beverages/c/FKWT1500000', 
    'Bakery/c/FKWT1610000'
      ]

max_page = 100 # page counter
page_size = 700 # number of product shown on the page

for tag in tags: 
    for x in tqdm(range(1,max_page)):
        scrap_page = pd.DataFrame()
        r = requests.get(parent_url+tag+'?currentPage='+str(x)
                         +'&filter=&nextPageOffset=0&pageSize=700'
                         ,
                         headers = {'User-Agent':'Mozilla/5.0'})

        data = json.loads(re.search(r'({"prop.*})', r.text).group(1))
        data = data['props']['initialState']['search']['products']
        scrap_page['item_desc'] = [i['name'] for i in data]
        scrap_page['item_price'] = [i['originalPrice'] for i in data]

        scrap_carrefour = pd.concat([scrap_carrefour,scrap_page]).drop_duplicates()

What happens is that it keeps web scraping the same page and even with a different page number.

Also, I would like the code to scrape all the products that’s why I have an issue with page size as well.

I would appreciate some help on this!

Answer

This should do it.

import requests
from pprint import pprint
 
url = 'https://www.carrefourkuwait.com/api/v5/zones/ZONE-1-kwt-051/search/categories/{}'
 
categories = [
    'Frozen-Food/c/FKWT6000000', 
    'Baby-Products/c/FKWT1000000', 
    'Beverages/c/FKWT1500000', 
    'Bakery/c/FKWT1610000'
]

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
    'storeid': 'mafkwt', 
    'appid': 'Reactweb'
}
 
params = {
    'filter': '',
    'sortBy': 'relevance',
    'currentPage': 0,
    'pageSize': '700',
    'areaCode': 'Zahra - Hawally',
    'lang': 'en',
    'expressPos': '051',
    'displayCurr': 'KWD',
    'foodPos': 'ZONE-1-kwt-051',
    'nonFoodPos': 'ZONE-1-kwt-051'
}

for cat in categories:
    cat_num = cat.split("/")[-1]
    api_link = url.format(cat_num)
    params['currentPage'] = 0

    while True:
        r = requests.get(api_link,params=params,headers=headers)
        if not r.json()['products']:break
        for item in r.json()['products']:
            print(item['name'])

        params['currentPage']+=1