#I want to extract description and ratings of review comments where rating is 4 or 5. Tried multiple options with latest being as below: Can someone please help
import requests from bs4 import BeautifulSoup from scrapy.http import HtmlResponse response = HtmlResponse(url='https://www.bankbazaar.com/credit-card.html') url ='https://www.bankbazaar.com/credit-card.html' r = requests.get(url) soup = BeautifulSoup(r.content, 'html5lib') quotes=[] # a list to store quotes table = soup.find('div', attrs = {'class':'reviewbox-container'}) for row in table.findAll('li', attrs = {'class':'review-box'}): quote = {} quote['rating'] = row.find('input', name_ = 'review.reviewRating.customer.0') quote['descr'] = row.find('div',class_ = 'text_here review-desc-more') print(quotes.append(quote))
Answer
First off, you are grabbing the tag elements, when it looks like you actually want the text from the descriptions. Secondly, the name
attribute changes for each element, so you’d either need to use regex to search for the name element that contains or starts with review.reviewRating.customer.
Or, just simply grab the <input>
tag, don’t need to use the name
attribute. Lastly, you don’t want to print(quotes.append(quote))
as it’s going to return None
. I think what you want to print is the actual quote
import requests from bs4 import BeautifulSoup import re from scrapy.http import HtmlResponse response = HtmlResponse(url='https://www.bankbazaar.com/credit-card.html') url ='https://www.bankbazaar.com/credit-card.html' r = requests.get(url) soup = BeautifulSoup(r.content, 'html5lib') quotes=[] # a list to store quotes table = soup.find('div', attrs = {'class':'reviewbox-container'}) for row in table.findAll('li', attrs = {'class':'review-box'}): quote = {} if float(row.find('input')['value']) != 5.0: continue quote['rating'] = row.find('input')['value'] quote['descr'] = row.find('div',class_ = 'text_here review-desc-more').text.strip() quotes.append(quote) print (quote)