Skip to content

Commit

Permalink
Merge pull request #924 from WolframResearch/bugfix/more-tex-fixes
Browse files Browse the repository at this point in the history
Added more heuristics to correct some common LLM TeX mistakes
  • Loading branch information
rhennigan authored Nov 15, 2024
2 parents a626a8b + 0a9e79c commit 3260efb
Show file tree
Hide file tree
Showing 4 changed files with 425 additions and 54 deletions.
1 change: 1 addition & 0 deletions Source/Chatbook/CommonSymbols.wl
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ BeginPackage[ "Wolfram`Chatbook`Common`" ];
`makeInteractiveCodeCell;
`makeModelSelector;
`makeOutputDingbat;
`makeTeXBoxes;
`makeTokenBudget;
`makeToolConfiguration;
`makeToolResponseString;
Expand Down
55 changes: 1 addition & 54 deletions Source/Chatbook/Formatting.wl
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ makeResultCell0[ mathCell[ math_String ] ] /; StringMatchQ[ math, (DigitCharacte
math;

makeResultCell0[ mathCell[ math_String ] ] :=
With[ { boxes = Quiet @ InputAssistant`TeXAssistant @ preprocessMathString @ math },
With[ { boxes = makeTeXBoxes @ math },
If[ MatchQ[ boxes, _RawBoxes ],
Cell @ BoxData @ toTeXBoxes @ boxes,
makeResultCell0 @ inlineCodeCell @ math
Expand Down Expand Up @@ -437,59 +437,6 @@ makeDiscardedMaterialCell[ stuff___ ] := {

makeDiscardedMaterialCell // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*preprocessMathString*)
preprocessMathString // beginDefinition;

preprocessMathString[ math_String ] := FixedPoint[
StringReplace @ $preprocessMathRules,
texUTF8Convert @ StringTrim @ math,
3
];

preprocessMathString // endDefinition;


$preprocessMathRules = {
(* Remove commas from large numbers: *)
n: (Repeated[ DigitCharacter, { 3 } ] ~~ ("," ~~ Repeated[ DigitCharacter, { 3 } ])..) :> StringDelete[ n, "," ],
(* Add missing brackets to superscripts: *)
"^\\text{" ~~ s: LetterCharacter.. ~~ "}" :> "^{\\text{"<>s<>"}}",
(* Format superscript text: *)
n: DigitCharacter ~~ "^{" ~~ s: "st"|"nd"|"rd"|"th" ~~ "}" :> n<>"^{\\text{"<>s<>"}}"
};

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*texUTF8Convert*)
texUTF8Convert // beginDefinition;

texUTF8Convert[ string_String ] := Enclose[
Catch @ Module[ { chars, texChars, rules },
chars = Select[ Union @ Characters @ string, Max @ ToCharacterCode[ # ] > 255 & ];
texChars = ConfirmMatch[ texUTF8Convert0 /@ chars, { ___String }, "Characters" ];
rules = DeleteCases[ Thread[ chars -> texChars ], _ -> "" ];
texUTF8Convert[ string ] = ConfirmBy[ StringReplace[ string, rules ], StringQ, "Converted" ]
],
throwInternalFailure
];

texUTF8Convert // endDefinition;


texUTF8Convert0 // beginDefinition;

texUTF8Convert0[ c_String ] := texUTF8Convert0[ c ] = StringReplace[
StringTrim @ Replace[ Quiet @ ExportString[ c, "TeXFragment" ], Except[ _String ] :> "" ],
{
StartOfString ~~ "\\[" ~~ tex: ("\\" ~~ WordCharacter..) ~~ "\\]" ~~ EndOfString :> tex,
StartOfString ~~ __ ~~ EndOfString :> ""
}
];

texUTF8Convert0 // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*makeTableCell*)
Expand Down
1 change: 1 addition & 0 deletions Source/Chatbook/Main.wl
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ $ChatbookContexts = {
"Wolfram`Chatbook`Services`",
"Wolfram`Chatbook`Settings`",
"Wolfram`Chatbook`Storage`",
"Wolfram`Chatbook`TeXBoxes`",
"Wolfram`Chatbook`ToolManager`",
"Wolfram`Chatbook`Tools`",
"Wolfram`Chatbook`UI`",
Expand Down
Loading

0 comments on commit 3260efb

Please sign in to comment.