Skip to content

Improve image handling in markdown statements #318

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 17 additions & 14 deletions problemtools/md2html.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import hashlib
import html
import os
from pathlib import Path
Expand All @@ -15,7 +16,7 @@


def convert(problem_root: Path, options: argparse.Namespace, statement_file: Path) -> bool:
"""Convert a Markdown statement to HTML
"""Convert a Markdown statement to HTML. Writes output to current working directory.

Args:
problem: path to problem directory
Expand Down Expand Up @@ -85,7 +86,7 @@ def is_fn_id(s):
allowed_classes = ('sample', 'problemheader', 'problembody', 'sampleinteractionwrite', 'sampleinteractionread')

# Annoying: nh3 will ignore exceptions in attribute_filter
image_fail_reason: str | None = None
image_fail_reason: list[Exception] = []

def attribute_filter(tag, attribute, value):
if attribute == 'class' and value in allowed_classes:
Expand All @@ -103,10 +104,9 @@ def attribute_filter(tag, attribute, value):
statement_util.assert_image_is_valid(statement_dir, value)
except Exception as e:
nonlocal image_fail_reason
image_fail_reason = str(e)
image_fail_reason.append(e)
return None
copy_image(statement_dir, value)
return value
return copy_image(statement_dir, value)
return None

statement_html = nh3.clean(
Expand All @@ -126,22 +126,25 @@ def attribute_filter(tag, attribute, value):
)

if image_fail_reason:
assert isinstance(image_fail_reason, str)
if 'Unsupported' in image_fail_reason:
raise ValueError(image_fail_reason)
raise FileNotFoundError(image_fail_reason)
# We don't have a great way to emit multiple errors from here, so just re-raise the first error
raise image_fail_reason[0]

return statement_html


def copy_image(statement_dir: Path, img_src: str) -> None:
"""Copy image to output directory
def copy_image(statement_dir: Path, img_src: str) -> str:
"""Copy image to working directory (with new filename) and returns the new filename

Args:
statement_dir: the directory with problem statement files
img_src: the image source as in the Markdown statement
"""

if os.path.isfile(img_src): # already copied
return
shutil.copyfile(statement_dir / img_src, img_src)
# We rename to sha256 of contents, and preserve the suffix. This flattens
# the directory structure to a single folders in a simple way.
with open(statement_dir / img_src, 'rb') as f:
filename = hashlib.file_digest(f, 'sha256').hexdigest() + Path(img_src).suffix

if not os.path.isfile(filename): # check if already copied
shutil.copyfile(statement_dir / img_src, filename)
return filename
9 changes: 8 additions & 1 deletion problemtools/statement_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import tempfile
from pathlib import Path
from typing import Optional, List, Tuple
from urllib.parse import urlparse

from . import metadata
from .formatversion import FormatVersion, get_format_version
Expand Down Expand Up @@ -106,10 +107,16 @@ def foreach_image(statement_path: Path, callback):

def assert_image_is_valid(statement_dir: Path, img_src: str) -> None:
"""Check that the image exists and uses an allowed extension"""
extension = Path(img_src).suffix
img_path = Path(img_src)
extension = img_path.suffix
# TODO: fix svg sanitization and allow svg
if extension not in ALLOWED_IMAGE_EXTENSIONS:
raise ValueError(f'Unsupported image extension {extension} for image {img_src}')
if img_path.is_absolute():
raise ValueError(f'Image path must be relative, but {img_src} is not.')
as_url = urlparse(img_src)
if as_url.scheme:
raise ValueError(f'Image path must not be an URL with a scheme, but {img_src} is.')

source_file = statement_dir / img_src
if not source_file.exists():
Expand Down
2 changes: 1 addition & 1 deletion tests/problems/imgrequest2/statement/problem.en.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Make web request via image

<img src="http:picsum.photos/400">
<img src="https:open.kattis.com/images/site/header/logo-empty.png">