converting get_text() output from bs4 into a csv with headers

i m building a webscraper and a bit stuck trying to manipulate the data i get out of bs4. i m trying to get the text of the (‘div’, class_=’listing__content__wrapper’) nice organized into their 4 headers (headerList = [‘streetName’, ‘city’, ‘province’, ‘postalCode’])

i got as far as getting it into a csv file but I can’t get it into rows and columns.

All the help I can get is appreciated.

here is my code so far:

import requests
from bs4 import BeautifulSoup, SoupStrainer
import pandas as pd
import csv

headers = {
    "User-agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}

# we can ask for the url of the page you want to scrape here, remove after tests are successful.
# url = input("Enter url to scrape: ")


# for testing
url = 'https://www.yellowpages.ca/search/si/1/gym/Toronto+ON'
page = requests.get(url, headers=headers)

# tag and class of interest to parse
parse_only = SoupStrainer(
    'div', class_='listing__content__wrapper')

soup = BeautifulSoup(page.content, 'html.parser', parse_only=parse_only)

streetaddress = (soup.find_all('span', class_='jsMapBubbleAddress'))

with open('test.csv', 'a') as csv_file:
    writer = csv.writer(csv_file)
    for line in streetaddress:
        writer.writerow(line.get_text())

# using a function works but still can't get all the data under the 4 columns/headerList


def append_to_csv(input_string):
    with open("test2.csv", "a") as csv_file:
        csv_file.write(input_string.get_text().strip() +
                       "|")


for line in streetaddress:
    append_to_csv(line)

# for listing in streetaddress:
#     print((listing.get_text()), file=open('streetaddresses.csv', 'a'), sep='|')


Answer

I think this will do what you want.

fields = ['streetAddress','addressLocality','addressRegion','postalCode']
gather = {}
with open('test.csv', 'a') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fields)
    writer.writeheader()
    for line in streetaddress:
        gather[line.attrs["itemprop"]] = line.get_text()
        if line.attrs["itemprop"] == "postalCode":
            writer.writerow(gather)
            gather = {}