Skip to content

Commit 1244332

Browse files
fix: use api instead of bs4 for data retrieval
1 parent 627e53f commit 1244332

File tree

1 file changed

+5
-20
lines changed

1 file changed

+5
-20
lines changed

src/diffpy/utils/tools.py

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import numpy as np
77
import requests
8-
from bs4 import BeautifulSoup
98
from scipy.optimize import dual_annealing
109
from scipy.signal import convolve
1110
from xraydb import material_mu
@@ -235,31 +234,17 @@ def fetch_cif_filenames(hill_formula):
235234
------
236235
ValueError
237236
If no CIF files are found for the given formula.
238-
239-
Notes
240-
-----
241-
The data is retrieved from the Crystallography Open Database (COD).
242-
If you use COD data in your research,
243-
please acknowledge the COD project as described at
244-
https://www.crystallography.net/cod/acknowledgements.html.
245237
"""
246-
search_url = (
247-
f"https://www.crystallography.net/cod/"
248-
f"result.php?formula={hill_formula}"
249-
)
250-
response = requests.get(search_url)
238+
base_url = "https://www.crystallography.net/cod/result.php"
239+
params = {"formula": hill_formula, "format": "json"}
240+
response = requests.get(base_url, params=params)
251241
if response.status_code != 200:
252242
raise Exception(
253243
f"Failed to retrieve search results. "
254244
f"HTTP status code: {response.status_code}."
255245
)
256-
cif_links = BeautifulSoup(response.text, "html.parser").find_all("a")
257-
cif_filenames = []
258-
for link in cif_links:
259-
href = link.get("href", "")
260-
if href.endswith(".cif"):
261-
filename = href.split("/")[-1]
262-
cif_filenames.append(filename)
246+
data = response.json()
247+
cif_filenames = [str(entry["file"]) + ".cif" for entry in data]
263248
if len(cif_filenames) == 0:
264249
raise ValueError(
265250
f"No CIF files found for the given formula: {hill_formula}. "

0 commit comments

Comments
 (0)