1
1
"""The main input for in2lambda, defining both the CLT and main library function."""
2
2
3
+ #This commented block makes it run the local files rather than the pip library (I think, I don't understand it. Kevin wrote it.)
4
+ #
5
+ # import sys
6
+ # import os
7
+ # sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
8
+
9
+
3
10
import importlib
4
11
import pkgutil
5
12
from typing import Optional
10
17
import in2lambda .filters
11
18
from in2lambda .api .module import Module
12
19
20
+ import subprocess
21
+
22
+ #Converts .docx files to markdown
23
+ def docx_to_md (docx_file : str ) -> str :
24
+ md_output = subprocess .check_output (['pandoc' , docx_file , '-t' , 'markdown' ])
25
+ return md_output .decode ('utf-8' )
13
26
14
27
def file_type (file : str ) -> str :
15
28
"""Determines which pandoc file format to use for a given file.
@@ -52,7 +65,7 @@ def file_type(file: str) -> str:
52
65
):
53
66
return "markdown"
54
67
case "docx" :
55
- return "docx" # Pandoc doesn't seem to support doc
68
+ return "docx" # Pandoc doesn't seem to support . doc, and panflute doesn't like .docx.
56
69
raise RuntimeError (f"Unsupported file extension: .{ extension } " )
57
70
58
71
@@ -90,14 +103,21 @@ def runner(
90
103
# Dynamically import the correct pandoc filter depending on the subject.
91
104
filter_module = importlib .import_module (f"in2lambda.filters.{ chosen_filter } .filter" )
92
105
93
- with open (question_file , "r" , encoding = "utf-8" ) as file :
94
- text = file .read ()
106
+
107
+ if file_type (question_file ) == 'docx' :
108
+ # Convert .docx to md using Pandoc and proceed
109
+ text = docx_to_md (question_file )
110
+ input_format = "markdown"
111
+ else :
112
+ with open (question_file , "r" , encoding = "utf-8" ) as file :
113
+ text = file .read ()
114
+ input_format = file_type (question_file )
95
115
96
116
# Parse the Pandoc AST using the relevant panflute filter.
97
117
pf .run_filter (
98
118
filter_module .pandoc_filter ,
99
119
doc = pf .convert_text (
100
- text , input_format = file_type ( question_file ) , standalone = True
120
+ text , input_format = input_format , standalone = True
101
121
),
102
122
module = module ,
103
123
tex_file = question_file ,
@@ -106,13 +126,18 @@ def runner(
106
126
107
127
# If separate answer TeX file provided, parse that as well.
108
128
if answer_file :
109
- with open (answer_file , "r" , encoding = "utf-8" ) as file :
110
- answer_text = file .read ()
129
+ if file_type (answer_file ) == 'docx' :
130
+ answer_text = docx_to_md (answer_file )
131
+ answer_format = "markdown"
132
+ else :
133
+ with open (answer_file , "r" , encoding = "utf-8" ) as file :
134
+ answer_text = file .read ()
135
+ answer_format = file_type (answer_file )
111
136
112
137
pf .run_filter (
113
138
filter_module .pandoc_filter ,
114
139
doc = pf .convert_text (
115
- answer_text , input_format = file_type ( answer_file ) , standalone = True
140
+ answer_text , input_format = answer_format , standalone = True
116
141
),
117
142
module = module ,
118
143
tex_file = answer_file ,
0 commit comments