Not able to parse JSON API data in python pandas

I am trying to parse the following JSON text in python but I am getting error. I have the following data taken from an API. I am trying to access the nested JSON API Data “XrfSurveys”,”XrfQaqcSurveys”,”XrfCorrectedSurveys”, & “XrfQaqcCorrectedSurveys” using a Python script.

Here is the JSON file

{
"ProjectName":"Carra_Geo",
"Drillholes":[
{
"DrillholeName":"TESTING",
"EzGammaSurveys":[
],
"EzGammaInOutUpdates":[
],
"Tn14Readings":[
],
"Tn14Updates":[
],
"EzGyroSurveys":[
],
"EzGyroUpdates":[
],
"SprintIqSurveys":[
],
"SprintIqUpdates":[
],
"OmniSurveys":[
],
"EzTracSurveys":[
],
"EzTracUpdates":[
],
"XrfSurveys":[
{
"Id":"89c5b556-d638-ea11-a1cc-000d3a807867",
"TestDate":"2020-01-08T03:17:47",
"Status":"Pending",
"Prospect":"",
"DepthFrom":0.0,
"DepthTo":1.0,
"Grid":"",
"Easting":0.0,
"Northing":0.0,
"SamplerName":"",
"SampleId":"CP000579b",
"SampleType":"",
"Comments":"",
"ReadingNumber":1107,
"Mode":"geoChem-VMR-OAS",
"LodSigma":1.0,
"InstrumentSn":"801465",
"InstrumentModel":"Vanta VMR",
"TubeAnode":"Rh",
"QaqcType":"",
"QaqcName":"",
"UserFactorName":"0",
"Packaging":"",
"Preparation":"",
"AmbientPressure":994,
"LiveTime1":24229.212663625793,
"LiveTime2":47613.816757492736,
"LiveTime3":"None",
"LiveTimeTotal":71843.02942111852,
"ElapsedTime1":30000.0,
"ElapsedTime2":60000.0,
"ElapsedTime3":"None",
"ElapsedTimeTotal":90000.0,
},
{
"Id":"0255be50-d943-ea11-a1cc-000d3a807867",
"TestDate":"2020-01-31T01:56:29",
"Status":"Pending",
"Prospect":"",
"DepthFrom":7.0,
"DepthTo":8.0,
"Grid":"",
"Easting":0.0,
"Northing":0.0,
"SamplerName":"",
"SampleId":"CP006758",
"SampleType":"",
"Comments":"",
"ReadingNumber":1312,
"Mode":"geoChem-VMR-OAS",
"LodSigma":1.0,
"InstrumentSn":"801465",
"InstrumentModel":"Vanta VMR",
"TubeAnode":"Rh",
"QaqcType":"",
"QaqcName":"",
"UserFactorName":"0",
"Packaging":"",
"Preparation":"",
"AmbientPressure":990,
"LiveTime1":24103.274058863128,
"LiveTime2":45549.414143729475,
"LiveTime3":"None",
"LiveTimeTotal":69652.6882025926,
"ElapsedTime1":30000.0,
"ElapsedTime2":60000.0,
"ElapsedTime3":"None",
"ElapsedTimeTotal":90000.0,
}
],
"XrfQaqcSurveys":[
{
"Id":"3ff08a5e-d638-ea11-a1cc-000d3a807867",
"TestDate":"2020-01-08T03:14:58",
"Status":"Pending",
"Prospect":"",
"DepthFrom":0.0,
"DepthTo":0.0,
"Grid":"",
"Easting":0.0,
"Northing":0.0,
"SamplerName":"",
"SampleId":"PHRC-17",
"SampleType":"",
"Comments":"",
"ReadingNumber":1106,
"Mode":"geoChem-VMR-OAS",
"LodSigma":1.0,
"InstrumentSn":"801465",
"InstrumentModel":"Vanta VMR",
"TubeAnode":"Rh",
"QaqcType":"STD",
"QaqcName":"PHRC-17",
"UserFactorName":"0",
"Packaging":"",
"Preparation":"",
"AmbientPressure":994,
"LiveTime1":23844.133703840424,
"LiveTime2":42678.55913274793,
"LiveTime3":"None",
"LiveTimeTotal":66522.69283658835,
"ElapsedTime1":30000.0,
"ElapsedTime2":60000.0,
"ElapsedTime3":"None",
"ElapsedTimeTotal":90000.0,
}
],
"XrfCorrectedSurveys":[
{
"Id":"89c5b556-d638-ea11-a1cc-000d3a807867",
"TestDate":"2020-01-08T03:17:47",
"Status":"Pending",
"Prospect":"",
"DepthFrom":0.0,
"DepthTo":1.0,
"Grid":"",
"Easting":0.0,
"Northing":0.0,
"SamplerName":"",
"SampleId":"CP000579b",
"SampleType":"",
"Comments":"",
"ReadingNumber":1107,
"Mode":"geoChem-VMR-OAS",
"LodSigma":1.0,
"InstrumentSn":"801465",
"InstrumentModel":"Vanta VMR",
"TubeAnode":"Rh",
"QaqcType":"",
"QaqcName":"",
"UserFactorName":"0",
"Packaging":"",
"Preparation":"",
"CalibrationTable":"None",
"CalibrationTableVersion":"None",
"AppliedBy":"None",
"AppliedDate":"None",
"AppliedDateString":"None",
"AmbientPressure":994,
"LiveTime1":24229.212663625793,
"LiveTime2":47613.816757492736,
"LiveTime3":"None",
"LiveTimeTotal":71843.02942111852,
"ElapsedTime1":30000.0,
"ElapsedTime2":60000.0,
"ElapsedTime3":"None",
"ElapsedTimeTotal":90000.0,
},
{
"Id":"0255be50-d943-ea11-a1cc-000d3a807867",
"TestDate":"2020-01-31T01:56:29",
"Status":"Pending",
"Prospect":"",
"DepthFrom":7.0,
"DepthTo":8.0,
"Grid":"",
"Easting":0.0,
"Northing":0.0,
"SamplerName":"",
"SampleId":"CP006758",
"SampleType":"",
"Comments":"",
"ReadingNumber":1312,
"Mode":"geoChem-VMR-OAS",
"LodSigma":1.0,
"InstrumentSn":"801465",
"InstrumentModel":"Vanta VMR",
"TubeAnode":"Rh",
"QaqcType":"",
"QaqcName":"",
"UserFactorName":"0",
"Packaging":"",
"Preparation":"",
"CalibrationTable":"None",
"CalibrationTableVersion":"None",
"AppliedBy":"None",
"AppliedDate":"None",
"AppliedDateString":"None",
"AmbientPressure":990,
"LiveTime1":24103.274058863128,
"LiveTime2":45549.414143729475,
"LiveTime3":"None",
"LiveTimeTotal":69652.6882025926,
"ElapsedTime1":30000.0,
"ElapsedTime2":60000.0,
"ElapsedTime3":"None",
"ElapsedTimeTotal":90000.0,
}
],
"XrfCorrectedUpdates":[
],
"XrfQaqcCorrectedSurveys":[
{
"Id":"3ff08a5e-d638-ea11-a1cc-000d3a807867",
"TestDate":"2020-01-08T03:14:58",
"Status":"Pending",
"Prospect":"",
"DepthFrom":0.0,
"DepthTo":0.0,
"Grid":"",
"Easting":0.0,
"Northing":0.0,
"SamplerName":"",
"SampleId":"PHRC-17",
"SampleType":"",
"Comments":"",
"ReadingNumber":1106,
"Mode":"geoChem-VMR-OAS",
"LodSigma":1.0,
"InstrumentSn":"801465",
"InstrumentModel":"Vanta VMR",
"TubeAnode":"Rh",
"QaqcType":"STD",
"QaqcName":"PHRC-17",
"UserFactorName":"0",
"Packaging":"",
"Preparation":"",
"CalibrationTable":"None",
"CalibrationTableVersion":"None",
"AppliedBy":"None",
"AppliedDate":"None",
"AppliedDateString":"None",
"AmbientPressure":994,
"LiveTime1":23844.133703840424,
"LiveTime2":42678.55913274793,
"LiveTime3":"None",
"LiveTimeTotal":66522.69283658835,
"ElapsedTime1":30000.0,
"ElapsedTime2":60000.0,
"ElapsedTime3":"None",
"ElapsedTimeTotal":90000.0,
}
],
"XrfQaqcCorrectedUpdates":[
],
"IqLoggerStructuralReadings":[
],
"IqLoggerStructuralUpdates":[
],
"Act3OrientationShots":[
]
}
]
}

Here are my python script:

import urllib.request 
import urllib.parse 
import urllib.error
import requests
import time
import pyodbc
import argparse
import pandas as pd 
parser = argparse.ArgumentParser(description="Process some integers.")
parser.add_argument("--verbose", help="display processing information")
start = time.time()
def main(verbose):
#API Data
headers = {
# Request headers
'IntegrationKey':'',
'Ocp-Apim-Subscription-Key': '',
'Authorization': ''
}
hostname= "https://api.imdexhubhe.com/api/integration/"
try:
response = requests.get(hostname,headers=headers)
# extract JSON payload of response as Python dictionary
json_payload = response.json()
# raise an Exception if we encoutnered any HTTP error codes like 404
response.raise_for_status()
except requests.exceptions.ConnectionError as e:
# handle any typo errors in url or endpoint, or just patchy internet connection
print(e)
else:
json_payload = response.json()[0]
print(json_payload)
df2 = pd.DataFrame(json_payload["Drillholes"])
df3 = pd.DataFrame(json_payload["XrfSurveys"])
df4 = pd.DataFrame(json_payload["XrfQaqcSurveys"])
df5 = pd.DataFrame(json_payload["XrfCorrectedSurveys"])
df6 = pd.DataFrame(json_payload["XrfQaqcCorrectedSurveys"])
#df7 = pd.DataFrame(json_payload["SurfaceSamples"])
#df8 = pd.DataFrame(json_payload["QASamples"])   
df2.to_csv("Drillholes.csv",index=False)
df3.to_csv("XrfSurveys.csv",index=False)
df4.to_csv("XrfQaqcSurveys.csv",index=False)
df5.to_csv("XrfCorrectedSurveys.csv",index=False)
df5.to_csv("XrfQaqcCorrectedSurveys.csv",index=False)
df6.to_csv("XrfQaqcCorrectedSurveys.csv",index=False)
#df7.to_csv("SurfaceSamples.csv",index=False)
#df8.to_csv("QASamples.csv",index=False)  
if __name__ == '__main__':
start = time.time()
args = parser.parse_args()
verbose = args.verbose
main(verbose)  # Calling Main Function
print("Processed time:", time.time() - start)  # Total Time        

Tried different combinations to access JSON data but these type of errors are coming.

Error:KeyError: ‘XrfSurveys’

Error:TypeError: list indices must be integers or slices, not str

Can someone please help me what is wrong here?

Answer

Try this:

json_payload = response.json()
json_payload["Drillholes"][0]["XrfSurveys"]
json_payload["Drillholes"][0]["XrfQaqcSurveys"]
json_payload["Drillholes"][0]["XrfCorrectedSurveys"]

and so on..

“Drillholes” is a list containing all of the values which consists of dictionaries and more lists. So calling that list by index 0(because it’s the first list) and then calling the key for the value you want should work.