I am working on a selenium script with python, and want to download the audio coming from a certain page.
the page looks like this :
the HTML code of the page :
<html> <head> <meta name="viewport" content="width=device-width"> </head> <body> <video controls="" autoplay="" name="media"> <source src="https://website//id=47c484fc7f8f" type="audio/mp3"> </video> </body> </html>
my code so far:
from seleniumwire import webdriver import sys from webdriver_manager.chrome import ChromeDriverManager import time import pyaudio import wave from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument("--headless") # for linux/Ubuntu only #chrome_options.add_argument("--no-sandbox") browser = webdriver.Chrome(ChromeDriverManager().install(), chrome_options=chrome_options) browser.get("website") search = browser.find_element_by_id("text-area") search.clear() text = input("text here : ") search.send_keys(text) #print(data) time.sleep(2) browser.find_element_by_id("btn").click() # Access and print requests via the `requests` attribute for request in browser.requests: if request.response and request.url.__contains__('website//id'): browser.get(request.url)
I am open to work with any language to achieve the goal
Answer
You don’t need Selenium for this, requests library is enough. You must provide a unique identifier to your post request as sessionID, so you can pick up the generated file in the next get request.
Use the following snippet as an example, it saves the generated file under provided sessionID name.
import requests sessionID = '78aa8dd0-9529-11eb-a8b3-0242ac130003' payload = {'ssmlText': '<prosody pitch="default" rate="-0%">Roses are red, violets are blue</prosody>', 'sessionID': sessionID} r1 = requests.post("https://www.ibm.com/demos/live/tts-demo/api/tts/store", data = payload) r1.raise_for_status() print(r1.status_code, r1.reason) tts_url = 'https://www.ibm.com/demos/live/tts-demo/api/tts/newSynthesize?voice=en-US_OliviaV3Voice&id=' + sessionID try: r2 = requests.get(tts_url, timeout = 10, cookies = r1.cookies) print(r2.status_code, r2.reason) try: with open(sessionID + '.mp3', "w+b") as f: f.write(r2.content) except IOError: print("IOError: could not write a file") except requests.exceptions.Timeout as err: print("Timeout: could not get response from the server")