Template request | Bug report | Generate Data Product
Tags: #xml #file #tool #operations #automation #dataframe
Author: Jeremy Ravenel
Description: This notebook demonstrates how to convert an XML sitemap into a dataframe for further analysis.
import naas
import json
try:
import xmltodict
except:
!pip install xmltodict
import xmltodict
import pandas as pd
import requests
website = "https://zapier.com"
def sitemap_to_df(url):
df = None
key = "urlset.url.url"
r = requests.get(f"{url}/sitemap.xml")
data_dict = xmltodict.parse(r.content)
if key and len(key.split(".")) > 0:
keys = key.split(".")
keys.reverse()
data = data_dict.get(keys.pop())
while len(keys) > 1:
data = data.get(keys.pop())
df = pd.DataFrame.from_dict(data=data)
elif key and data_dict.get(key):
df = pd.DataFrame.from_dict(data=data_dict.get(key))
else:
df = pd.DataFrame.from_dict(data=data_dict)
return df
df = sitemap_to_df(website)
df
naas.get_remote_timezone()
naas.set_remote_timezone("Europe/Lisbon")