# Script
from itertools import islice
import sodapy
client = sodapy.Socrata("data.cdc.gov", None)
# Set limit to 2, so each page is size 2.
results = client.get_all("rdmq-nq56", content_type="csv", limit=2)
# Get only the first 10 rows.
print(list(islice(results, 10)))
# Returns
['week_end', 'geography', 'county', 'percent_visits_combined', 'percent_visits_covid', 'percent_visits_influenza', 'percent_visits_rsv', 'percent_visits_smoothed', 'percent_visits_smoothed_covid', 'percent_visits_smoothed_1', 'percent_visits_smoothed_rsv', 'ed_trends_covid', 'ed_trends_influenza', 'ed_trends_rsv', 'hsa', 'hsa_counties', 'hsa_nci_id', 'fips', 'trend_source', 'buildnumber']
['2022-10-01T00:00:00.000', 'Alabama', 'Bibb', '', '', '', '', '', '', '', '', 'Data Unavailable', 'Data Unavailable', 'Data Unavailable', 'Jefferson (Birmingham), AL - Shelby, AL', 'Bibb, Blount, Chilton, Cullman, Jefferson, Shelby, St. Clair, Walker', '150', '1007', 'HSA', '2025-06-27']
['2022-10-01T00:00:00.000', 'Alabama', 'Calhoun', '', '', '', '', '', '', '', '', 'Data Unavailable', 'Data Unavailable', 'Data Unavailable', 'Calhoun (Anniston), AL - Cleburne, AL', 'Calhoun, Cleburne', '177', '1015', 'HSA', '2025-06-27']
['week_end', 'geography', 'county', 'percent_visits_combined', 'percent_visits_covid', 'percent_visits_influenza', 'percent_visits_rsv', 'percent_visits_smoothed', 'percent_visits_smoothed_covid', 'percent_visits_smoothed_1', 'percent_visits_smoothed_rsv', 'ed_trends_covid', 'ed_trends_influenza', 'ed_trends_rsv', 'hsa', 'hsa_counties', 'hsa_nci_id', 'fips', 'trend_source', 'buildnumber']
['2022-10-01T00:00:00.000', 'Alabama', 'Chilton', '', '', '', '', '', '', '', '', 'Data Unavailable', 'Data Unavailable', 'Data Unavailable', 'Jefferson (Birmingham), AL - Shelby, AL', 'Bibb, Blount, Chilton, Cullman, Jefferson, Shelby, St. Clair, Walker', '150', '1021', 'HSA', '2025-06-27']
['2022-10-01T00:00:00.000', 'Alabama', 'Cleburne', '', '', '', '', '', '', '', '', 'Data Unavailable', 'Data Unavailable', 'Data Unavailable', 'Calhoun (Anniston), AL - Cleburne, AL', 'Calhoun, Cleburne', '177', '1029', 'HSA', '2025-06-27']
['week_end', 'geography', 'county', 'percent_visits_combined', 'percent_visits_covid', 'percent_visits_influenza', 'percent_visits_rsv', 'percent_visits_smoothed', 'percent_visits_smoothed_covid', 'percent_visits_smoothed_1', 'percent_visits_smoothed_rsv', 'ed_trends_covid', 'ed_trends_influenza', 'ed_trends_rsv', 'hsa', 'hsa_counties', 'hsa_nci_id', 'fips', 'trend_source', 'buildnumber']
['2022-10-01T00:00:00.000', 'Alabama', 'Coosa', '', '', '', '', '', '', '', '', 'Data Unavailable', 'Data Unavailable', 'Data Unavailable', 'Talladega, AL - Clay, AL', 'Clay, Coosa, Talladega', '241', '1037', 'HSA', '2025-06-27']
['2022-10-01T00:00:00.000', 'Alabama', 'Covington', '', '', '', '', '', '', '', '', 'Data Unavailable', 'Data Unavailable', 'Data Unavailable', 'Montgomery (Montgomery), AL - Autauga, AL', 'Autauga, Bullock, Covington, Crenshaw, Lowndes, Montgomery, Pike', '171', '1039', 'HSA', '2025-06-27']
['week_end', 'geography', 'county', 'percent_visits_combined', 'percent_visits_covid', 'percent_visits_influenza', 'percent_visits_rsv', 'percent_visits_smoothed', 'percent_visits_smoothed_covid', 'percent_visits_smoothed_1', 'percent_visits_smoothed_rsv', 'ed_trends_covid', 'ed_trends_influenza', 'ed_trends_rsv', 'hsa', 'hsa_counties', 'hsa_nci_id', 'fips', 'trend_source', 'buildnumber']
The
get_allfunction paginates through the dataset by making multiple requests and iterates through the rows of the dataset. Looking at the source, it's unsurprising that the header is repeated every time a new request is made withcontent_type="csv", but without knowing these internals, it's pretty surprising and might lead to errors. My current workaround in my downstream processor is to store the header the first time and filter it out the next time it comes around.