|
| 1 | +from io import BytesIO |
| 2 | +from zipfile import ZipFile |
| 3 | +import zipfile |
| 4 | +from pathlib import Path |
| 5 | +from dwcahandler import DwcaHandler |
| 6 | +import logging |
| 7 | +import pytest |
| 8 | + |
| 9 | +input_folder = "./input_files/dwca" |
| 10 | + |
| 11 | + |
| 12 | +def make_zip_from_folder_contents(folder: str): |
| 13 | + zip_buffer = BytesIO() |
| 14 | + with ZipFile(file=zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED, allowZip64=True) as zf: |
| 15 | + for path in Path(folder).rglob("*"): |
| 16 | + zf.write(path, arcname=path.name) |
| 17 | + zf.close() |
| 18 | + return zip_buffer |
| 19 | + |
| 20 | + |
| 21 | +class TestValidateDwca: |
| 22 | + |
| 23 | + def test_validate_dwca(self): |
| 24 | + """ |
| 25 | + Test for read and extract dwca. Validate core content |
| 26 | + """ |
| 27 | + simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample1") |
| 28 | + keys_lookup = {'occurrence': 'occurrenceID'} |
| 29 | + dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup) |
| 30 | + assert dwca_result |
| 31 | + |
| 32 | + def test_validate_dwca2(self): |
| 33 | + """ |
| 34 | + Test for read and extract dwca. Validate core content |
| 35 | + """ |
| 36 | + simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample2") |
| 37 | + keys_lookup = {'occurrence': 'occurrenceID'} |
| 38 | + dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup) |
| 39 | + assert dwca_result |
| 40 | + |
| 41 | + def test_empty_keys(self, caplog): |
| 42 | + """ |
| 43 | + Test for read and extract dwca. Validate core content with empty keys |
| 44 | + """ |
| 45 | + caplog.set_level(logging.INFO) |
| 46 | + simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample3") |
| 47 | + keys_lookup = {'occurrence': 'occurrenceID'} |
| 48 | + dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup) |
| 49 | + assert not dwca_result |
| 50 | + assert "Empty values found in ['occurrenceID']. Total rows affected: 1" in caplog.messages |
| 51 | + assert "Empty values found in dataframe row: [0]" in caplog.messages |
| 52 | + |
| 53 | + def test_duplicate_key(self, caplog): |
| 54 | + """ |
| 55 | + Test for read and extract dwca. Validate core content with duplicate keys |
| 56 | + """ |
| 57 | + caplog.set_level(logging.INFO) |
| 58 | + simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample4") |
| 59 | + keys_lookup = {'occurrence': 'catalogNumber'} |
| 60 | + dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup) |
| 61 | + assert not dwca_result |
| 62 | + assert "Duplicate ['catalogNumber'] found. Total rows affected: 3" in caplog.messages |
| 63 | + assert "Duplicate values: ['014800' '014823']" in caplog.messages |
| 64 | + |
| 65 | + def test_duplicate_columns_in_dwca(self): |
| 66 | + """ |
| 67 | + Test for read and extract dwca. Validate duplicate columns specified in metadata of dwca |
| 68 | + """ |
| 69 | + simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample5") |
| 70 | + keys_lookup = {'occurrence': 'catalogNumber'} |
| 71 | + |
| 72 | + with pytest.raises(ValueError) as exc_info: |
| 73 | + DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup) |
| 74 | + |
| 75 | + assert "Duplicate columns ['catalogNumber'] specified in the " \ |
| 76 | + "metadata for occurrence.csv" in str(exc_info.value) |
0 commit comments