Fetching JSON from API, adding it to sqlite3 database and automatically fetching the next page

I’m busy trying to add data that is being collected from an API to a sqlite3 database and afterwards extracting the next page, loading that page and adding that data to the database until there is no more next page (next_is_after).

Loading the data is working well (thanks to another user) but when I’m trying to load it as a list the formatting is faulty and when I’m loading it as a dictionary it works well but I’m struggling to load it into the database.

I have no clue how to achieve the loop of loading the next page as it isn’t a “normal” page number but a code (e.g. “WzExNDEuMTYsNTYwNTkzNTFd”).

I have attached the code where I’m at below. I would again appreciate any help.

import requests
import json
import sqlite3

con = sqlite3.connect('takealot.db')
cur = con.cursor()

cur.execute('''CREATE TABLE IF NOT EXISTS products
                    (sku text PRIMARY KEY, title text, slug text, reviews integer, star_rating real, listing_price real, pretty_price real)''')

baseurl = 'https://api.takealot.com/rest/v-1-10-0/searches/products,filters,facets,sort_options,breadcrumbs,slots_audience,context,seo'
endpoint = '?after=WzExNDEuMTYsNTYwNTkzNTFd'

def main_request(baseurl, endpoint):
    res = requests.get(baseurl + endpoint)
    return res.json()

daten = main_request(baseurl, endpoint)

next_is_after = daten['sections']['products']['paging']['next_is_after']

prodlist = []
for data in daten['sections']['products']['results']:
    prod = {
        'sku': data['product_views']['core']['id'],
        'title': data['product_views']['core']['title'],
        'slug': data['product_views']['core']['slug'],
        'reviews': data['product_views']['core']['reviews'],
        'star_rating': data['product_views']['core']['star_rating'],
        'listing_price': data['product_views']['buybox_summary']['listing_price'],
        'pretty_price': data['product_views']['buybox_summary']['pretty_price']
    }
    prodlist.append(prod)


cur.executemany("INSERT OR IGNORE INTO products VALUES (?, ?, ?, ?, ?, ?, ?)", [prod['sku'], prod['title'], prod['slug'], prod['reviews'], prod['star_rating'], prod['listing_price'], prod['pretty_price']])
con.commit()

for row in cur.execute('''SELECT * FROM products'''):
    print(row)

Answer

You just have to add a while loop around it like:

import requests
import json
import sqlite3

con = sqlite3.connect('takealot.db')
cur = con.cursor()

cur.execute('''CREATE TABLE IF NOT EXISTS products
                    (sku text PRIMARY KEY, title text, slug text, reviews integer, star_rating real, listing_price real, pretty_price real)''')

baseurl = 'https://api.takealot.com/rest/v-1-10-0/searches/products,filters,facets,sort_options,breadcrumbs,slots_audience,context,seo'
endpoint = '?after='

def main_request(baseurl, endpoint):
    res = requests.get(baseurl + endpoint)
    return res.json()

# This seems to work 

next_is_after = """"
while next_is_after != "" :
    daten = main_request(baseurl, endpoint+next_is_after)
    next_is_after = daten['sections']['products']['paging']['next_is_after']
    prodlist = []
    for data in daten['sections']['products']['results']:
        prod = (
                data['product_views']['core']['id'],
                data['product_views']['core']['title'],
                data['product_views']['core']['slug'],
                data['product_views']['core']['reviews'],
                data['product_views']['core']['star_rating'],
                data['product_views']['buybox_summary']['listing_price'],
                data['product_views']['buybox_summary']['pretty_price'])        
        prodlist.append(prod)
    cur.executemany("INSERT OR IGNORE INTO products VALUES (?, ?, ?, ?, ?, ?, ?)", prodlist)
con.commit()

for row in cur.execute('''SELECT * FROM products'''):
    print(row)

So you get the next starting point from the response, where you will find the next starting point and so on.

This assumes that (like with the previous_is_before) when you reach the end its just an empty string.