Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions lambdas/services/post_fhir_document_reference_service.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

from botocore.exceptions import ClientError
from pydantic import ValidationError

Expand Down Expand Up @@ -164,11 +166,15 @@ def _create_document_reference(
logger.error("FHIR document validation error: attachment.title missing")
raise DocumentRefException(400, LambdaError.DocRefNoParse)

sub_folder, raw_request = (
("user_upload", None)
if doc_type != SnomedCodes.PATIENT_DATA.value
else (f"fhir_upload/{doc_type.code}", raw_fhir_doc)
)
if doc_type != SnomedCodes.PATIENT_DATA.value:
sub_folder, raw_request = "user_upload", None
else:
raw_request_dict = json.loads(raw_fhir_doc)
for content_item in raw_request_dict.get("content", []):
attachment = content_item.get("attachment", {})
attachment.pop("data", None)
sub_folder = f"fhir_upload/{doc_type.code}"
raw_request = json.dumps(raw_request_dict)

document_reference = DocumentReference(
id=document_id,
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
pdm_data_helper = PdmDataHelper()


def strip_attachment_data(payload: str) -> str:
"""Strip base64 data from content attachments to match stored raw_request."""
parsed = json.loads(payload)
for content_item in parsed.get("content", []):
content_item.get("attachment", {}).pop("data", None)
return json.dumps(parsed)


def test_create_document_base64(test_data):
record = {
"ods": "H81109",
Expand Down Expand Up @@ -58,6 +66,55 @@ def condition(response_json):
assert base64.b64decode(base64_data, validate=True)


def test_create_document_base64_medium_file(test_data):
"""Test uploading a ~1.5MB PDF file via base64 succeeds."""
record = {
"ods": "H81109",
"nhs_number": TEST_NHS_NUMBER,
}

sample_pdf_path = os.path.join(
os.path.dirname(__file__),
"files",
"medium-dummy.pdf",
)
with open(sample_pdf_path, "rb") as f:
file_content = f.read()
record["data"] = base64.b64encode(file_content).decode("utf-8")

payload = pdm_data_helper.create_upload_payload(record)

raw_upload_response = upload_document(payload)
assert raw_upload_response.status_code == 201
upload_response = raw_upload_response.json()
record["id"] = upload_response["id"].split("~")[1]
test_data.append(record)

assert "Location" in raw_upload_response.headers
expected_location = f"https://{APIM_ENDPOINT}/national-document-repository/FHIR/R4/DocumentReference/{upload_response['id']}"
assert raw_upload_response.headers["Location"] == expected_location

attachment_url = upload_response["content"][0]["attachment"]["url"]
assert (
f"https://{APIM_ENDPOINT}/national-document-repository/FHIR/R4/DocumentReference/{PDM_SNOMED}~"
in attachment_url
)

def condition(response_json):
logging.info(response_json)
return response_json["content"][0]["attachment"].get("data", False)

raw_retrieve_response = retrieve_document_with_retry(
upload_response["id"],
condition,
)
retrieve_response = raw_retrieve_response.json()

base64_data = retrieve_response["content"][0]["attachment"]["data"]
decoded = base64.b64decode(base64_data, validate=True)
assert decoded == file_content


def test_create_document_saves_raw(test_data):
record = {
"ods": "H81109",
Expand All @@ -84,7 +141,7 @@ def test_create_document_saves_raw(test_data):
assert "RawRequest" in doc_ref["Item"]
assert "Author" in doc_ref["Item"]
assert doc_ref["Item"]["RawRequest"]
assert doc_ref["Item"]["RawRequest"] == payload
assert doc_ref["Item"]["RawRequest"] == strip_attachment_data(payload)


def test_create_document_without_author_or_type(test_data):
Expand Down Expand Up @@ -118,7 +175,7 @@ def test_create_document_without_author_or_type(test_data):
assert "RawRequest" in doc_ref["Item"]
assert "Author" not in doc_ref["Item"]
assert doc_ref["Item"]["RawRequest"]
assert doc_ref["Item"]["RawRequest"] == payload
assert doc_ref["Item"]["RawRequest"] == strip_attachment_data(payload)
for field in ["type", "author"]:
assert field not in doc_ref["Item"]["RawRequest"]

Expand Down Expand Up @@ -148,7 +205,9 @@ def test_create_document_without_title(test_data):
doc_ref = pdm_data_helper.retrieve_document_reference(record=record)
assert "Item" in doc_ref
assert "RawRequest" in doc_ref["Item"]
assert doc_ref["Item"]["RawRequest"] == payload

assert doc_ref["Item"]["RawRequest"] == strip_attachment_data(payload)

raw_request = json.loads(doc_ref["Item"]["RawRequest"])
assert "content" in raw_request
content = raw_request["content"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,74 @@ def test_create_pdm_document_reference_with_raw_request(
assert result.author == "B67890" # Verify author is set


def test_create_pdm_document_reference_strips_data_from_raw_request(
mock_fhir_doc_ref_base_service,
mock_post_fhir_doc_ref_service,
mocker,
):
"""Test _create_document_reference strips attachment.data from raw_request."""

fhir_doc = mocker.MagicMock(spec=FhirDocumentReference)
fhir_doc.content = [
DocumentReferenceContent(
attachment=Attachment(
contentType="application/pdf",
title="test-file.pdf",
creation="2023-01-01T12:00:00Z",
),
),
]
fhir_doc.custodian = Reference(
identifier=Identifier(
system="https://fhir.nhs.uk/Id/ods-organization-code",
value="A12345",
),
)
fhir_doc.author = [
Reference(
identifier=Identifier(
system="https://fhir.nhs.uk/Id/ods-organization-code",
value="B67890",
),
),
]

raw_fhir_doc_with_data = json.dumps(
{
"resourceType": "DocumentReference",
"content": [
{
"attachment": {
"contentType": "application/pdf",
"title": "test-file.pdf",
"data": "dGVzdCBiYXNlNjQgZGF0YQ==",
},
},
],
},
)

doc_type = SnomedCodes.PATIENT_DATA.value

result = mock_post_fhir_doc_ref_service._create_document_reference(
nhs_number="9000000009",
author="B67890",
doc_type=doc_type,
fhir_doc=fhir_doc,
current_gp_ods="C13579",
raw_fhir_doc=raw_fhir_doc_with_data,
)

raw_request_parsed = json.loads(result.raw_request)
assert "data" not in raw_request_parsed["content"][0]["attachment"]
assert (
raw_request_parsed["content"][0]["attachment"]["contentType"]
== "application/pdf"
)
assert raw_request_parsed["content"][0]["attachment"]["title"] == "test-file.pdf"
assert raw_request_parsed["resourceType"] == "DocumentReference"


def test_create_lg_document_reference_with_raw_request(
mock_fhir_doc_ref_base_service,
mock_post_fhir_doc_ref_service,
Expand Down
Loading