Skip to content

Commit 6d43e60

Browse files
authored
Merge pull request #318 from gkreitz/markdown_image_fixes
Improve image handling in markdown statements
2 parents 1234a05 + f136b8e commit 6d43e60

File tree

3 files changed

+26
-16
lines changed

3 files changed

+26
-16
lines changed

problemtools/md2html.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#! /usr/bin/env python3
22
# -*- coding: utf-8 -*-
33
import argparse
4+
import hashlib
45
import html
56
import os
67
from pathlib import Path
@@ -15,7 +16,7 @@
1516

1617

1718
def convert(problem_root: Path, options: argparse.Namespace, statement_file: Path) -> bool:
18-
"""Convert a Markdown statement to HTML
19+
"""Convert a Markdown statement to HTML. Writes output to current working directory.
1920
2021
Args:
2122
problem: path to problem directory
@@ -85,7 +86,7 @@ def is_fn_id(s):
8586
allowed_classes = ('sample', 'problemheader', 'problembody', 'sampleinteractionwrite', 'sampleinteractionread')
8687

8788
# Annoying: nh3 will ignore exceptions in attribute_filter
88-
image_fail_reason: str | None = None
89+
image_fail_reason: list[Exception] = []
8990

9091
def attribute_filter(tag, attribute, value):
9192
if attribute == 'class' and value in allowed_classes:
@@ -103,10 +104,9 @@ def attribute_filter(tag, attribute, value):
103104
statement_util.assert_image_is_valid(statement_dir, value)
104105
except Exception as e:
105106
nonlocal image_fail_reason
106-
image_fail_reason = str(e)
107+
image_fail_reason.append(e)
107108
return None
108-
copy_image(statement_dir, value)
109-
return value
109+
return copy_image(statement_dir, value)
110110
return None
111111

112112
statement_html = nh3.clean(
@@ -126,22 +126,25 @@ def attribute_filter(tag, attribute, value):
126126
)
127127

128128
if image_fail_reason:
129-
assert isinstance(image_fail_reason, str)
130-
if 'Unsupported' in image_fail_reason:
131-
raise ValueError(image_fail_reason)
132-
raise FileNotFoundError(image_fail_reason)
129+
# We don't have a great way to emit multiple errors from here, so just re-raise the first error
130+
raise image_fail_reason[0]
133131

134132
return statement_html
135133

136134

137-
def copy_image(statement_dir: Path, img_src: str) -> None:
138-
"""Copy image to output directory
135+
def copy_image(statement_dir: Path, img_src: str) -> str:
136+
"""Copy image to working directory (with new filename) and returns the new filename
139137
140138
Args:
141139
statement_dir: the directory with problem statement files
142140
img_src: the image source as in the Markdown statement
143141
"""
144142

145-
if os.path.isfile(img_src): # already copied
146-
return
147-
shutil.copyfile(statement_dir / img_src, img_src)
143+
# We rename to sha256 of contents, and preserve the suffix. This flattens
144+
# the directory structure to a single folders in a simple way.
145+
with open(statement_dir / img_src, 'rb') as f:
146+
filename = hashlib.file_digest(f, 'sha256').hexdigest() + Path(img_src).suffix
147+
148+
if not os.path.isfile(filename): # check if already copied
149+
shutil.copyfile(statement_dir / img_src, filename)
150+
return filename

problemtools/statement_util.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import tempfile
88
from pathlib import Path
99
from typing import Optional, List, Tuple
10+
from urllib.parse import urlparse
1011

1112
from . import metadata
1213
from .formatversion import FormatVersion, get_format_version
@@ -106,10 +107,16 @@ def foreach_image(statement_path: Path, callback):
106107

107108
def assert_image_is_valid(statement_dir: Path, img_src: str) -> None:
108109
"""Check that the image exists and uses an allowed extension"""
109-
extension = Path(img_src).suffix
110+
img_path = Path(img_src)
111+
extension = img_path.suffix
110112
# TODO: fix svg sanitization and allow svg
111113
if extension not in ALLOWED_IMAGE_EXTENSIONS:
112114
raise ValueError(f'Unsupported image extension {extension} for image {img_src}')
115+
if img_path.is_absolute():
116+
raise ValueError(f'Image path must be relative, but {img_src} is not.')
117+
as_url = urlparse(img_src)
118+
if as_url.scheme:
119+
raise ValueError(f'Image path must not be an URL with a scheme, but {img_src} is.')
113120

114121
source_file = statement_dir / img_src
115122
if not source_file.exists():
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
Make web request via image
22

3-
<img src="http:picsum.photos/400">
3+
<img src="https:open.kattis.com/images/site/header/logo-empty.png">

0 commit comments

Comments
 (0)