Я пытаюсь проанализировать ответ json от API, и мне нужны эти поля: «id», «label», «степень», «pep», «санкционированный», «страны», «адреса», «relationship_count». Я подумал, что, несмотря на неэлегантность, было бы легко вызвать pd.json_normalize(entity.json(), 'data')
для данных, а затем выбрать нужные столбцы. Но я не получаю все поля под данными. Вот столбцы, которые я получаю:
Index(['id', 'label', 'degree', 'pep', 'sanctioned', 'psa_count', 'type',
'entity_url', 'identifiers', 'countries', 'addresses',
'source_count.7a92887d4f18fc21abe0d658b25364e7.count',
'source_count.7a92887d4f18fc21abe0d658b25364e7.label', 'matches.name',
'matches.address',
'source_count.54243e61aaa4ce9289f34558f67d2e40.count',
'source_count.54243e61aaa4ce9289f34558f67d2e40.label',
'matches.business_purpose',
'source_count.82ca2242478ec8330c861c6a3acd7ed1.count',
'source_count.82ca2242478ec8330c861c6a3acd7ed1.label',
'source_count.e0a238bcfc2f81ed9e5f345c0c7068f7.count',
'source_count.e0a238bcfc2f81ed9e5f345c0c7068f7.label'],
dtype='object')
А вот как выглядит json/словарь:
{'offset': 0,
'limit': 100,
'next': False,
'size': {'count': 5, 'qualifier': 'eq'},
'data':
[{'id': 'q3oR3y2vi6l9REyJeQXKyQ',
'label': 'CPV MANUFACTURING, INC.',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/q3oR3y2vi6l9REyJeQXKyQ',
'identifiers': [{'value': '002302313',
'type': 'duns_number',
'label': 'Duns Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
'851 PRESTON ST, PHILADELPHIA, PA, 191041563, UNITED STATES',
'851 PRESTON ST, PHILADELPHIA, PA, 191041563, USA'],
'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 1462,
'label': 'USA USASpending.gov Profiles Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, CHESTER, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES',
'851 PRESTON ST, PHILADELPHIA, <em>PA</em>, 191041563, USA']}},
{'id': 'NI85zIpOHQLiAQuQ4lqQkQ',
'label': 'ADMIRAL VALVE, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/NI85zIpOHQLiAQuQ4lqQkQ',
'identifiers': [],
'countries': ['USA'],
'addresses': ['503 Schoolhouse Rd, KENNETT SQUARE, PA, 19348'],
'source_count': {'54243e61aaa4ce9289f34558f67d2e40': {'count': 2,
'label': 'USA Paycheck Protection Program (PPP) $150k+ Loan Recipients Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 Schoolhouse Rd, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348'],
'business_purpose': ['Industrial <em>Valve</em> Manufacturing']}},
{'id': '7f--6WFMJkzZTdjiaX4hTQ',
'label': 'ADMIRAL VALVE, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/7f--6WFMJkzZTdjiaX4hTQ',
'identifiers': [{'value': '079228019',
'type': 'duns_number',
'label': 'Duns Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
'503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, UNITED STATES'],
'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 6,
'label': 'USA USASpending.gov Profiles Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES']}},
{'id': 'bCYPMeeymyiveMS0EW700g',
'label': 'Admiral Valve, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 0,
'type': 'company',
'entity_url': '/v1/entity/bCYPMeeymyiveMS0EW700g',
'identifiers': [{'value': '4238068',
'type': 'usa_pa_corporate_registry_id',
'label': 'Usa Pa Corporate Registry Id'}],
'countries': ['USA'],
'addresses': ['503 School House Road Kennett Square PA 19348 Chester'],
'source_count': {'82ca2242478ec8330c861c6a3acd7ed1': {'count': 2,
'label': 'USA Pennsylvania Secretary of State'}},
'relationship_count': {},
'matches': {'name': ['<em>Admiral</em> <em>Valve</em>, LLC',
'Admiral <em>Valve</em>, <em>LLC</em>'],
'address': ['503 School House Road <em>Kennett</em> <em>Square</em> <em>PA</em> 19348 Chester']}},
{'id': '_9oASm59LBr-iEs4gHj_lQ',
'label': 'ADMIRAL VALVE LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/_9oASm59LBr-iEs4gHj_lQ',
'identifiers': [{'value': '901034776',
'type': 'usa_fei_number',
'label': 'Usa Fei Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOLHOUSE RD, KENNETT SQUARE, PA, 19348'],
'source_count': {'e0a238bcfc2f81ed9e5f345c0c7068f7': {'count': 5,
'label': 'USA Department of Labor Form 5500 Filings Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em> LLC',
'ADMIRAL <em>VALVE</em> <em>LLC</em>'],
'address': ['503 SCHOOLHOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348']}}]}
Я чувствую, что какая-то комбинация настройки аргумента record_path
или meta
должна привести меня туда, но я не понял этого.
Заранее спасибо!
Возможное решение следующее:
import pandas as pd
data = {'offset': 0,
'limit': 100,
'next': False,
'size': {'count': 5, 'qualifier': 'eq'},
'data':
[{'id': 'q3oR3y2vi6l9REyJeQXKyQ',
'label': 'CPV MANUFACTURING, INC.',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/q3oR3y2vi6l9REyJeQXKyQ',
'identifiers': [{'value': '002302313',
'type': 'duns_number',
'label': 'Duns Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
'851 PRESTON ST, PHILADELPHIA, PA, 191041563, UNITED STATES',
'851 PRESTON ST, PHILADELPHIA, PA, 191041563, USA'],
'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 1462,
'label': 'USA USASpending.gov Profiles Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, CHESTER, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES',
'851 PRESTON ST, PHILADELPHIA, <em>PA</em>, 191041563, USA']}},
{'id': 'NI85zIpOHQLiAQuQ4lqQkQ',
'label': 'ADMIRAL VALVE, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/NI85zIpOHQLiAQuQ4lqQkQ',
'identifiers': [],
'countries': ['USA'],
'addresses': ['503 Schoolhouse Rd, KENNETT SQUARE, PA, 19348'],
'source_count': {'54243e61aaa4ce9289f34558f67d2e40': {'count': 2,
'label': 'USA Paycheck Protection Program (PPP) $150k+ Loan Recipients Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 Schoolhouse Rd, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348'],
'business_purpose': ['Industrial <em>Valve</em> Manufacturing']}},
{'id': '7f--6WFMJkzZTdjiaX4hTQ',
'label': 'ADMIRAL VALVE, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/7f--6WFMJkzZTdjiaX4hTQ',
'identifiers': [{'value': '079228019',
'type': 'duns_number',
'label': 'Duns Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, USA',
'503 SCHOOL HOUSE RD, KENNETT SQUARE, PA, 193481741, UNITED STATES'],
'source_count': {'7a92887d4f18fc21abe0d658b25364e7': {'count': 6,
'label': 'USA USASpending.gov Profiles Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em>, LLC',
'ADMIRAL <em>VALVE</em>, <em>LLC</em>'],
'address': ['503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, USA',
'503 SCHOOL HOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 193481741, UNITED STATES']}},
{'id': 'bCYPMeeymyiveMS0EW700g',
'label': 'Admiral Valve, LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 0,
'type': 'company',
'entity_url': '/v1/entity/bCYPMeeymyiveMS0EW700g',
'identifiers': [{'value': '4238068',
'type': 'usa_pa_corporate_registry_id',
'label': 'Usa Pa Corporate Registry Id'}],
'countries': ['USA'],
'addresses': ['503 School House Road Kennett Square PA 19348 Chester'],
'source_count': {'82ca2242478ec8330c861c6a3acd7ed1': {'count': 2,
'label': 'USA Pennsylvania Secretary of State'}},
'relationship_count': {},
'matches': {'name': ['<em>Admiral</em> <em>Valve</em>, LLC',
'Admiral <em>Valve</em>, <em>LLC</em>'],
'address': ['503 School House Road <em>Kennett</em> <em>Square</em> <em>PA</em> 19348 Chester']}},
{'id': '_9oASm59LBr-iEs4gHj_lQ',
'label': 'ADMIRAL VALVE LLC',
'degree': 0,
'pep': False,
'sanctioned': False,
'psa_count': 4,
'type': 'company',
'entity_url': '/v1/entity/_9oASm59LBr-iEs4gHj_lQ',
'identifiers': [{'value': '901034776',
'type': 'usa_fei_number',
'label': 'Usa Fei Number'}],
'countries': ['USA'],
'addresses': ['503 SCHOOLHOUSE RD, KENNETT SQUARE, PA, 19348'],
'source_count': {'e0a238bcfc2f81ed9e5f345c0c7068f7': {'count': 5,
'label': 'USA Department of Labor Form 5500 Filings Database'}},
'relationship_count': {},
'matches': {'name': ['<em>ADMIRAL</em> <em>VALVE</em> LLC',
'ADMIRAL <em>VALVE</em> <em>LLC</em>'],
'address': ['503 SCHOOLHOUSE RD, <em>KENNETT</em> <em>SQUARE</em>, <em>PA</em>, 19348']}}]}
df = pd.json_normalize(data['data'])
df
Возвращает
Пытаться:
data = pd.json_normalize(json_data, 'data')
data = data.drop(columns=['identifiers']).join(pd.json_normalize(data['identifiers'].explode()), rsuffix='_identifiers')
У меня все работало нормально с данными json, указанными в вопросе.
Моя проблема в том, что json_normalize
не вернет значение, если оно пустое. Так что все, что мне нужно было сделать, это pd.DataFrame(json_data['data'])
.
Я получаю эти ошибки:
AttributeError: 'float' object has no attribute 'items'