Merge pull request #315 from gkreitz/refactor_formatversion

gkreitz · web-flow · commit bd92eeeb67a8 · 2025-05-16T16:10:44.000+02:00
Replace formatversion.FormatData with a StrEnum
diff --git a/problemtools/formatversion.py b/problemtools/formatversion.py
@@ -1,92 +1,47 @@
-import os
 import yaml
-from dataclasses import dataclass
-
-
-VERSION_LEGACY = 'legacy'
-VERSION_2023_07 = '2023-07-draft'
-
-
-@dataclass(frozen=True)
-class FormatData:
-    """
-    A class containing data specific to the format version.
-    name: the version name.
-    statement_directory: the directory where the statements should be found.
-    statement_extensions: the allowed extensions for the statements.
-    """
-
-    name: str
-    statement_directory: str
-    statement_extensions: list[str]
-    output_validator_directory: str
-
-
-FORMAT_DATACLASSES = {
-    VERSION_LEGACY: FormatData(
-        name=VERSION_LEGACY,
-        statement_directory='problem_statement',
-        statement_extensions=['tex'],
-        output_validator_directory='output_validators',
-    ),
-    VERSION_2023_07: FormatData(
-        name=VERSION_2023_07,
-        statement_directory='statement',
-        statement_extensions=['md', 'tex'],
-        output_validator_directory='output_validator',
-    ),
-}
-FORMAT_DATACLASSES['2023-07'] = FORMAT_DATACLASSES[VERSION_2023_07]  # Accept non-draft version string too
-
-
-def detect_problem_version(path: str) -> str:
-    """
-    Returns the problem version value of problem.yaml or throws an error if it is unable to read the file.
-    Args:
-        path: the problem path
-
-    Returns:
-        the version name as a String
-
-    """
-    config_path = os.path.join(path, 'problem.yaml')
-    try:
-        with open(config_path) as f:
-            config: dict = yaml.safe_load(f) or {}
-    except Exception as e:
-        raise VersionError(f'Error reading problem.yaml: {e}')
-    return config.get('problem_format_version', VERSION_LEGACY)
-
-
-def get_format_data(path: str) -> FormatData:
-    """
-    Gets the dataclass object containing the necessary data for a problem format.
-    Args:
-        path: the problem path
-
-    Returns:
-        the dataclass object containing the necessary data for a problem format
-
-    """
-    return get_format_data_by_name(detect_problem_version(path))
-
-
-def get_format_data_by_name(name: str) -> FormatData:
-    """
-    Gets the dataclass object containing the necessary data for a problem format given the format name.
-    Args:
-        name: the format name
-
-    Returns:
-        the dataclass object containing the necessary data for a problem format
-
-    """
-    data = FORMAT_DATACLASSES.get(name)
-    if not data:
-        raise VersionError(f'No version found with name {name}')
-    else:
-        return data
-
-
-class VersionError(Exception):
-    pass
+from enum import StrEnum
+from pathlib import Path
+
+
+class FormatVersion(StrEnum):
+    LEGACY = 'legacy'
+    V_2023_07 = '2023-07-draft'  # When 2023-07 is finalized, replace this and update _missing_
+
+    @property
+    def statement_directory(self) -> str:
+        match self:
+            case FormatVersion.LEGACY:
+                return 'problem_statement'
+            case FormatVersion.V_2023_07:
+                return 'statement'
+
+    @property
+    def statement_extensions(self) -> list[str]:
+        match self:
+            case FormatVersion.LEGACY:
+                return ['tex']
+            case FormatVersion.V_2023_07:
+                return ['md', 'tex']
+
+    @property
+    def output_validator_directory(self) -> str:
+        match self:
+            case FormatVersion.LEGACY:
+                return 'output_validators'
+            case FormatVersion.V_2023_07:
+                return 'output_validator'
+
+    # Support 2023-07 and 2023-07-draft strings.
+    # This method should be replaced with an alias once we require python 3.13
+    @classmethod
+    def _missing_(cls, value):
+        if value == '2023-07':
+            return cls.V_2023_07
+        return None
+
+
+def get_format_version(problem_root: Path) -> FormatVersion:
+    """Loads the version from the problem in problem_root"""
+    with open(problem_root / 'problem.yaml') as f:
+        config: dict = yaml.safe_load(f) or {}
+    return FormatVersion(config.get('problem_format_version', FormatVersion.LEGACY))
diff --git a/problemtools/metadata.py b/problemtools/metadata.py
@@ -11,8 +11,8 @@
 import yaml
 
 from . import config
-from . import formatversion
 from . import statement_util
+from .formatversion import FormatVersion
 
 
 class ProblemType(StrEnum):
@@ -164,7 +164,7 @@ class MetadataLegacy(BaseModel):
     which pre-date the version called legacy).
     """
 
-    problem_format_version: str = formatversion.VERSION_LEGACY
+    problem_format_version: FormatVersion = FormatVersion.LEGACY
     type: Literal['pass-fail'] | Literal['scoring'] = 'pass-fail'
     name: str | None = None
     uuid: UUID | None = None
@@ -191,7 +191,7 @@ class Metadata(BaseModel):
     Metadata serializes to a valid 2023-07-draft configuration.
     """
 
-    problem_format_version: str
+    problem_format_version: FormatVersion
     type: list[ProblemType]
     name: dict[str, str]
     uuid: UUID | None
@@ -309,7 +309,7 @@ def parse_person(person: str | Person) -> Person:
 
 
 def parse_metadata(
-    version: formatversion.FormatData,
+    version: FormatVersion,
     problem_yaml_data: dict[str, Any],
     names_from_statements: dict[str, str] | None = None,
 ) -> Metadata:
@@ -326,11 +326,11 @@ def parse_metadata(
         system_defaults = config.load_config('problem.yaml')
         data['limits'] = system_defaults['limits'] | data.get('limits', {})
 
-    if version.name == formatversion.VERSION_LEGACY:
+    if version is FormatVersion.LEGACY:
         legacy_model = MetadataLegacy.model_validate(data)
         return Metadata.from_legacy(legacy_model, names_from_statements or {})
     else:
-        assert version.name == formatversion.VERSION_2023_07
+        assert version is FormatVersion.V_2023_07
         model_2023_07 = Metadata2023_07.model_validate(data)
         return Metadata.from_2023_07(model_2023_07)
 
@@ -347,8 +347,8 @@ def load_metadata(problem_root: Path) -> tuple[Metadata, dict]:
         if data is None:  # Loading empty yaml returns None
             data = {}
 
-    version = formatversion.get_format_data_by_name(data.get('problem_format_version', formatversion.VERSION_LEGACY))
-    if version.name == formatversion.VERSION_LEGACY:
+    version = FormatVersion(data.get('problem_format_version', FormatVersion.LEGACY))
+    if version is FormatVersion.LEGACY:
         names_from_statements = statement_util.load_names_from_statements(problem_root, version)
     else:
         names_from_statements = None
diff --git a/problemtools/statement_util.py b/problemtools/statement_util.py
@@ -8,14 +8,14 @@
 from pathlib import Path
 from typing import Optional, List, Tuple
 
-from . import formatversion
 from . import metadata
+from .formatversion import FormatVersion, get_format_version
 
 ALLOWED_IMAGE_EXTENSIONS = ('.png', '.jpg', '.jpeg')  # ".svg"
 FOOTNOTES_STRINGS = ['<section class="footnotes">', '<aside class="footnotes">']
 
 
-def find_statements(problem_root: Path, version: formatversion.FormatData) -> dict[str, list[Path]]:
+def find_statements(problem_root: Path, version: FormatVersion) -> dict[str, list[Path]]:
     """Returns a dict mapping language code to a list of paths to statements (relative to problem_root)
 
     Note that in well-formed problem packages, there should only be a single
@@ -30,17 +30,17 @@ def find_statements(problem_root: Path, version: formatversion.FormatData) -> di
         for file in directory.iterdir():
             if m := filename_re.search(file.name):
                 if m.group(2) is None:  # problem.tex is allowed and assumed to be 'en' in legacy. We ignore it in newer formats.
-                    if version.name == formatversion.VERSION_LEGACY:
+                    if version is FormatVersion.LEGACY:
                         ret['en'].append(file)
                 else:
                     ret[m.group(2)].append(file)
     return dict(ret)
 
 
-def load_names_from_statements(problem_root: Path, version: formatversion.FormatData) -> dict[str, str]:
+def load_names_from_statements(problem_root: Path, version: FormatVersion) -> dict[str, str]:
     """Returns a dict mapping language code => problem name"""
 
-    assert version.name == formatversion.VERSION_LEGACY, 'load_names_from_statements only makes sense for legacy format'
+    assert version is FormatVersion.LEGACY, 'load_names_from_statements only makes sense for legacy format'
     ret: dict[str, str] = {}
     for lang, files in find_statements(problem_root, version).items():
         hit = re.search(r'\\problemname{(.*)}', files[0].read_text(), re.MULTILINE)
@@ -56,7 +56,7 @@ def find_statement(problem_root: Path, language: str) -> Path:
         ValueError: if there are multiple statements in language.
         FileNotFoundError: if there are no statements in language.
     """
-    candidates = find_statements(problem_root, formatversion.get_format_data(str(problem_root)))
+    candidates = find_statements(problem_root, get_format_version(problem_root))
     if language not in candidates:
         raise FileNotFoundError(f'No statement found in language {language}. Found languages: {", ".join(candidates)}')
     elif len(candidates[language]) > 1:
diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py
@@ -28,12 +28,12 @@
 
 from . import config
 from . import languages
-from . import formatversion
 from . import metadata
 from . import problem2html
 from . import problem2pdf
 from . import run
 from . import statement_util
+from .formatversion import FormatVersion, get_format_version
 
 from abc import ABC
 from typing import Any, Callable, ClassVar, Literal, Pattern, Match, ParamSpec, Type, TypeVar
@@ -819,16 +819,12 @@ def setup(self):
             error_str = '\n'.join([f'    {"->".join((str(loc) for loc in err["loc"]))}: {err["msg"]}' for err in e.errors()])
             self.error(f'Failed parsing problem.yaml. Found {len(e.errors())} errors:\n{error_str}')
             # For now, set metadata to an empty legacy config to avoid crashing.
-            self.problem.setMetadata(
-                metadata.parse_metadata(formatversion.get_format_data_by_name(formatversion.VERSION_LEGACY), {})
-            )
+            self.problem.setMetadata(metadata.parse_metadata(FormatVersion.LEGACY, {}))
         except Exception as e:
             # This should likely be a fatal error, but I'm not sure there's a clean way to fail from setup
             self.error(f'Failed loading problem configuration: {e}')
             # For now, set metadata to an empty legacy config to avoid crashing.
-            self.problem.setMetadata(
-                metadata.parse_metadata(formatversion.get_format_data_by_name(formatversion.VERSION_LEGACY), {})
-            )
+            self.problem.setMetadata(metadata.parse_metadata(FormatVersion.LEGACY, {}))
         return {}
 
     def __str__(self) -> str:
@@ -853,7 +849,7 @@ def check(self, context: Context) -> bool:
 
         if self._metadata.uuid is None:
             uuid_msg = f'Missing uuid from problem.yaml. Add "uuid: {uuid.uuid4()}" to problem.yaml.'
-            if self.problem.format.name == formatversion.VERSION_LEGACY:
+            if self.problem.format is FormatVersion.LEGACY:
                 self.warning(uuid_msg)
             else:
                 self.error(uuid_msg)
@@ -864,7 +860,7 @@ def check(self, context: Context) -> bool:
             not self._metadata.is_pass_fail()
             and self.problem.get(ProblemTestCases)['root_group'].has_custom_groups()
             and 'show_test_data_groups' not in self._origdata.get('grading', {})
-            and self.problem.format.name == formatversion.VERSION_LEGACY
+            and self.problem.format is FormatVersion.LEGACY
         ):
             self.warning(
                 'Problem has custom testcase groups, but does not specify a value for grading.show_test_data_groups; defaulting to false'
@@ -1217,10 +1213,7 @@ class OutputValidators(ProblemPart):
     PART_NAME = 'output_validator'
 
     def setup(self):
-        if (
-            self.problem.format.name != formatversion.VERSION_LEGACY
-            and (Path(self.problem.probdir) / 'output_validators').exists()
-        ):
+        if self.problem.format is FormatVersion.LEGACY and (Path(self.problem.probdir) / 'output_validators').exists():
             self.error('output_validators is not supported after Legacy; please use output_validator instead')
 
         self._validators = run.find_programs(
@@ -1351,7 +1344,7 @@ def _parse_validator_results(self, val, status: int, feedbackdir, testcase: Test
     def _actual_validators(self) -> list:
         vals = self._validators
         if self.problem.getMetadata().legacy_validation == 'default' or (
-            self.problem.format.name == formatversion.VERSION_2023_07 and not vals
+            self.problem.format is FormatVersion.V_2023_07 and not vals
         ):
             vals = [self._default_validator]
         return [val for val in vals if val is not None]
@@ -1739,16 +1732,16 @@ def check(self, context: Context) -> bool:
         return self._check_res
 
 
-PROBLEM_FORMATS: dict[str, dict[str, list[Type[ProblemPart]]]] = {
-    formatversion.VERSION_LEGACY: {
+PROBLEM_FORMATS: dict[FormatVersion, dict[str, list[Type[ProblemPart]]]] = {
+    FormatVersion.LEGACY: {
         'config': [ProblemConfig],
         'statement': [ProblemStatement, Attachments],
         'validators': [InputValidators, OutputValidators],
         'graders': [Graders],
         'data': [ProblemTestCases],
         'submissions': [Submissions],
     },
-    formatversion.VERSION_2023_07: {  # TODO: Add all the parts
+    FormatVersion.V_2023_07: {  # TODO: Add all the parts
         'config': [ProblemConfig],
         'statement': [ProblemStatement, Attachments],
         'validators': [InputValidators, OutputValidators],
@@ -1773,14 +1766,14 @@ class Problem(ProblemAspect):
     of category -> part-types. You could for example have 'validators' -> [InputValidators, OutputValidators].
     """
 
-    def __init__(self, probdir: str, parts: dict[str, list[type]] = PROBLEM_FORMATS[formatversion.VERSION_LEGACY]):
+    def __init__(self, probdir: str, parts: dict[str, list[type]] = PROBLEM_FORMATS[FormatVersion.LEGACY]):
         self.part_mapping: dict[str, list[Type[ProblemPart]]] = parts
         self.aspects: set[type] = {v for s in parts.values() for v in s}
         self.probdir = os.path.realpath(probdir)
         self.shortname: str | None = os.path.basename(self.probdir)
         super().__init__(self.shortname)
         self.language_config = languages.load_language_config()
-        self.format = formatversion.get_format_data(self.probdir)
+        self.format = get_format_version(Path(self.probdir))
         self._data: dict[str, dict] = {}
         self._metadata: metadata.Metadata | None = None
         self.debug(f'Problem-format: {parts}')
@@ -1860,8 +1853,8 @@ def check(self, args: argparse.Namespace) -> tuple[int, int]:
         try:
             if not re.match('^[a-z0-9]+$', self.shortname):
                 self.error(f"Invalid shortname '{self.shortname}' (must be [a-z0-9]+)")
-            if self.format.name == formatversion.VERSION_2023_07:
-                self.warning(f'Support for version {self.format.name} is very incomplete. Verification may not work as expected.')
+            if self.format is FormatVersion.V_2023_07:
+                self.warning(f'Support for version {self.format} is very incomplete. Verification may not work as expected.')
 
             self._check_symlinks()
 
@@ -2007,17 +2000,16 @@ def main() -> None:
         for problemdir in args.problemdir:
             try:
                 if args.problem_format == 'automatic':
-                    version_data = formatversion.get_format_data(problemdir)
+                    formatversion = get_format_version(Path(problemdir))
                 else:
-                    version_data = formatversion.get_format_data_by_name(args.problem_format)
-            except formatversion.VersionError as e:
+                    formatversion = FormatVersion(args.problem_format)
+            except Exception as e:
                 total_errors += 1
                 print(f'ERROR: problem version could not be decided for {os.path.basename(os.path.realpath(problemdir))}: {e}')
                 continue
 
-            print(f'Loading problem {os.path.basename(os.path.realpath(problemdir))} with format version {version_data.name}')
-            format = PROBLEM_FORMATS[version_data.name]
-            with Problem(problemdir, format) as prob:
+            print(f'Loading problem {os.path.basename(os.path.realpath(problemdir))} with format version {formatversion}')
+            with Problem(problemdir, PROBLEM_FORMATS[formatversion]) as prob:
                 errors, warnings = prob.check(args)
 
                 def p(x: int) -> str:
diff --git a/tests/test_metadata.py b/tests/test_metadata.py