JSON with Nested Array from Pandas DataFrame

Generally I’m working to query data from Snowflake, format it into JSON, and then push that JSON into an API.

I’m very close, but struggling with the needed format that the API requires.. The fields key needs to have a nested array instead of a nested list of objects. The example of the JSON is for a single record. I’ve tried multiple things with the formatting options available in pandas to_dict module including to_dict(‘list’), but am whiffing. Any ideas are appreciated.

Current code and output:

j = (df.groupby(['text','date','channel','sentiment'], as_index=False)
             .apply(lambda x:x[[
                 'product',
                 'segment',
                 '2b6276da-b135-4258-9971-cb08c070d859',
                 '7b84b8fc-5494-4fcb-bac7-ca91dc8faa32',
                 '5042388c-3144-4b5d-9aab-f0d03345646b',
                 '27cf2f54-3686-48c9-bfe4-5a6c70e90854',
                 '03de58c3-4ea0-4286-b5c4-1b8cef53646d',
                 'edee1277-1668-4e89-b206-5de08e8b3dc5',
                 'c9db7ba2-3c9f-40a8-852e-20ce5e8a5e8f',
                 'cb8d1d94-8976-4b31-9844-e47857226c2d',
                 '806335a9-e8ea-45b4-9904-54c52f1698e4',
                 'b2dfd157-436f-43a2-8ca2-36b5fe1fae54',
                 '511cfd95-8250-4796-97e1-9b02fb91e147',
                 '69c06db4-cc43-4dbb-abcb-6d5f40bfef08',
                 'ecdf55c5-bce9-4bc6-bca2-921d7c140dc2',
                 '6b711ef9-b789-48b5-97f3-7183bc5d6fa7',
                 'bfbc0bf1-49ca-4cb0-a76c-82999034e7cc',
                 'ee64e90c-0116-4fba-992d-6f6df1b0cfef',
                 '3c6edd01-bfa6-46c0-a9ea-5ffc01453f51'
             ]].to_dict('records'))
             .rename(columns={None:'fields'})
             .to_json(orient='records'))
data = json.dumps(json.loads(j), indent=2, sort_keys=True)
print(data)
[
  {
    "channel": "Zendesk",
    "date": 1630465892000,
    "sentiment": "predict",
    "text": "n STACK UP TOPIC SUGGESTIONnnoden",
    "fields": {
      "03de58c3-4ea0-4286-b5c4-1b8cef53646d": "005j000000FVPO3AAP",
      "27cf2f54-3686-48c9-bfe4-5a6c70e90854": 110010.0,
      "2b6276da-b135-4258-9971-cb08c070d859": null,
      "3c6edd01-bfa6-46c0-a9ea-5ffc01453f51": "Zendesk-959731",
      "5042388c-3144-4b5d-9aab-f0d03345646b": "Financial Services - Banking",
      "511cfd95-8250-4796-97e1-9b02fb91e147": "001j000000a7NKEAA2",
      "69c06db4-cc43-4dbb-abcb-6d5f40bfef08": "Berkadia",
      "6b711ef9-b789-48b5-97f3-7183bc5d6fa7": 1616070511039,
      "7b84b8fc-5494-4fcb-bac7-ca91dc8faa32": "North America",
      "806335a9-e8ea-45b4-9904-54c52f1698e4": 0,
      "b2dfd157-436f-43a2-8ca2-36b5fe1fae54": 1,
      "bfbc0bf1-49ca-4cb0-a76c-82999034e7cc": "Skills: Strategy-Driven",
      "c9db7ba2-3c9f-40a8-852e-20ce5e8a5e8f": "005j0000000jdqXAAQ",
      "cb8d1d94-8976-4b31-9844-e47857226c2d": 0,
      "ecdf55c5-bce9-4bc6-bca2-921d7c140dc2": "B2B",
      "edee1277-1668-4e89-b206-5de08e8b3dc5": 190.0,
      "ee64e90c-0116-4fba-992d-6f6df1b0cfef": "4602c31e-d3e0-464b-8c11-75391f4ecece",
      "product": null,
      "segment": "Commercial 2"
    }
  }
]

The needed format is as such:

[
  {
    "channel": "Zendesk",
    "date": 1630465892000,
    "sentiment": "predict",
    "text": "n STACK UP TOPIC SUGGESTIONnnoden",
    "fields": [
     {
      "03de58c3-4ea0-4286-b5c4-1b8cef53646d": "005j000000FVPO3AAP",
      "27cf2f54-3686-48c9-bfe4-5a6c70e90854": 110010.0,
      "2b6276da-b135-4258-9971-cb08c070d859": null,
      "3c6edd01-bfa6-46c0-a9ea-5ffc01453f51": "Zendesk-959731",
      "5042388c-3144-4b5d-9aab-f0d03345646b": "Financial Services - Banking",
      "511cfd95-8250-4796-97e1-9b02fb91e147": "001j000000a7NKEAA2",
      "69c06db4-cc43-4dbb-abcb-6d5f40bfef08": "Berkadia",
      "6b711ef9-b789-48b5-97f3-7183bc5d6fa7": 1616070511039,
      "7b84b8fc-5494-4fcb-bac7-ca91dc8faa32": "North America",
      "806335a9-e8ea-45b4-9904-54c52f1698e4": 0,
      "b2dfd157-436f-43a2-8ca2-36b5fe1fae54": 1,
      "bfbc0bf1-49ca-4cb0-a76c-82999034e7cc": "Skills: Strategy-Driven",
      "c9db7ba2-3c9f-40a8-852e-20ce5e8a5e8f": "005j0000000jdqXAAQ",
      "cb8d1d94-8976-4b31-9844-e47857226c2d": 0,
      "ecdf55c5-bce9-4bc6-bca2-921d7c140dc2": "B2B",
      "edee1277-1668-4e89-b206-5de08e8b3dc5": 190.0,
      "ee64e90c-0116-4fba-992d-6f6df1b0cfef": "4602c31e-d3e0-464b-8c11-75391f4ecece",
      "product": null,
      "segment": "Commercial 2"
    }
   ]
  }
]

Answer

Why not just wrap it in an array like this?

j = (df.groupby(['text','date','channel','sentiment'], as_index=False)
             .apply(lambda x:[x[[
                 'product',
                 'segment',
                 '2b6276da-b135-4258-9971-cb08c070d859',
                 '7b84b8fc-5494-4fcb-bac7-ca91dc8faa32',
                 '5042388c-3144-4b5d-9aab-f0d03345646b',
                 '27cf2f54-3686-48c9-bfe4-5a6c70e90854',
                 '03de58c3-4ea0-4286-b5c4-1b8cef53646d',
                 'edee1277-1668-4e89-b206-5de08e8b3dc5',
                 'c9db7ba2-3c9f-40a8-852e-20ce5e8a5e8f',
                 'cb8d1d94-8976-4b31-9844-e47857226c2d',
                 '806335a9-e8ea-45b4-9904-54c52f1698e4',
                 'b2dfd157-436f-43a2-8ca2-36b5fe1fae54',
                 '511cfd95-8250-4796-97e1-9b02fb91e147',
                 '69c06db4-cc43-4dbb-abcb-6d5f40bfef08',
                 'ecdf55c5-bce9-4bc6-bca2-921d7c140dc2',
                 '6b711ef9-b789-48b5-97f3-7183bc5d6fa7',
                 'bfbc0bf1-49ca-4cb0-a76c-82999034e7cc',
                 'ee64e90c-0116-4fba-992d-6f6df1b0cfef',
                 '3c6edd01-bfa6-46c0-a9ea-5ffc01453f51'
             ]].to_dict('records')])
             .rename(columns={None:'fields'})
             .to_json(orient='records'))
data = json.dumps(json.loads(j), indent=2, sort_keys=True)
print(data)