From 75e65968ae9ce63864380b498797cf6e5a2b73d9 Mon Sep 17 00:00:00 2001 From: Darcy Shen Date: Wed, 19 Jun 2024 21:26:03 +0800 Subject: [PATCH] [66_3] TMU 1.0.0: identical to TM format --- TeXmacs/plugins/tmu/progs/data/tmu.scm | 30 ++ TeXmacs/progs/init-research.scm | 3 +- src/Data/Convert/Mogan/from_tmu.cpp | 416 +++++++++++++++++++++++++ src/Data/Convert/Mogan/to_tmu.cpp | 357 +++++++++++++++++++++ src/Data/Convert/convert.hpp | 5 + src/Scheme/L4/glue_convert.lua | 24 ++ 6 files changed, 834 insertions(+), 1 deletion(-) create mode 100644 TeXmacs/plugins/tmu/progs/data/tmu.scm create mode 100644 src/Data/Convert/Mogan/from_tmu.cpp create mode 100644 src/Data/Convert/Mogan/to_tmu.cpp diff --git a/TeXmacs/plugins/tmu/progs/data/tmu.scm b/TeXmacs/plugins/tmu/progs/data/tmu.scm new file mode 100644 index 0000000000..df4ab633ce --- /dev/null +++ b/TeXmacs/plugins/tmu/progs/data/tmu.scm @@ -0,0 +1,30 @@ + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; MODULE : data/tmu.scm +;; DESCRIPTION : tmu data format +;; COPYRIGHT : (C) 2024 Darcy Shen +;; +;; This software falls under the GNU general public license version 3 or later. +;; It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE +;; in the root directory or . +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(texmacs-module (data tmu)) + +(define-format tmu + (:name "TMU") + (:suffix "tmu" "tsu")) + +(converter tmu-document texmacs-tree + (:function parse-tmu)) + +(converter texmacs-tree tmu-document + (:function serialize-tmu)) + +(converter tmu-snippet texmacs-tree + (:function parse-tmu-snippet)) + +(converter texmacs-tree tmu-snippet + (:function serialize-tmu)) diff --git a/TeXmacs/progs/init-research.scm b/TeXmacs/progs/init-research.scm index 46da5b2ffa..e9b01611d7 100644 --- a/TeXmacs/progs/init-research.scm +++ b/TeXmacs/progs/init-research.scm @@ -335,8 +335,9 @@ gif jpeg png ppm tif webp xpm) (lazy-format (convert rewrite init-rewrite) texmacs verbatim) -(lazy-format (data stm) stm) (lazy-format (data mgs) mgs) +(lazy-format (data stm) stm) +(lazy-format (data tmu) tmu) (lazy-format (convert latex init-latex) latex) (lazy-format (convert html init-html) html) (lazy-format (convert bibtex init-bibtex) bibtex) diff --git a/src/Data/Convert/Mogan/from_tmu.cpp b/src/Data/Convert/Mogan/from_tmu.cpp new file mode 100644 index 0000000000..fc7f904686 --- /dev/null +++ b/src/Data/Convert/Mogan/from_tmu.cpp @@ -0,0 +1,416 @@ + +/****************************************************************************** + * MODULE : fromtm.cpp + * DESCRIPTION: conversion from the TeXmacs file format to TeXmacs trees + * older versions are automatically converted into the present one + * COPYRIGHT : (C) 1999 Joris van der Hoeven + ******************************************************************************* + * This software falls under the GNU general public license version 3 or later. + * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE + * in the root directory or . + ******************************************************************************/ + +#include "convert.hpp" +#include "path.hpp" +#include "preferences.hpp" +#include "tree_helper.hpp" + +#include +#include +#include +#include + +using lolly::data::decode_from_utf8; +using lolly::data::from_hex; +using lolly::data::to_Hex; +using moebius::drd::STD_CODE; + +using namespace moebius; + +/****************************************************************************** + * Conversion of TeXmacs strings of the present format to TeXmacs trees + ******************************************************************************/ + +struct tmu_reader { + string version; // document was composed using this version + hashmap codes; // codes for to present version + tree_label EXPAND_APPLY; // APPLY (version < 0.3.3.22) or EXPAND (otherw) + bool backslash_ok; // true for versions >= 1.0.1.23 + bool with_extensions; // true for versions >= 1.0.2.4 + string buf; // the string being read from + int pos; // the current position of the reader + string last; // last read string + + tmu_reader (string buf2) + : version (TEXMACS_VERSION), codes (STD_CODE), EXPAND_APPLY (EXPAND), + backslash_ok (true), with_extensions (true), buf (buf2), pos (0), + last ("") {} + tmu_reader (string buf2, string version2) + : version (version2), codes (get_codes (version)), + EXPAND_APPLY (version_inf (version, "0.3.3.22") ? APPLY : EXPAND), + backslash_ok (version_inf (version, "1.0.1.23") ? false : true), + with_extensions (version_inf (version, "1.0.2.4") ? false : true), + buf (buf2), pos (0), last ("") {} + + int skip_blank (); + string decode (string s); + string read_char (); + string read_next (); + string read_function_name (); + tree read_apply (string s, bool skip_flag); + tree read (bool skip_flag); +}; + +int +tmu_reader::skip_blank () { + int n= 0; + for (; pos < N (buf); pos++) { + if (buf[pos] == ' ') continue; + if (buf[pos] == '\t') continue; + if (buf[pos] == '\r') continue; + if (buf[pos] == '\n') { + n++; + continue; + } + break; + } + return n; +} + +string +tmu_reader::decode (string s) { + int i, n= N (s); + string r; + for (i= 0; i < n; i++) + if (((i + 1) < n) && (s[i] == '\\')) { + i++; + if (s[i] == ';') + ; + else if (s[i] == '0') r << '\0'; + else if (s[i] == 't') r << '\t'; + else if (s[i] == 'r') r << '\r'; + else if (s[i] == 'n') r << '\n'; + else if (s[i] == '\\') r << '\\'; + else if ((s[i] >= '@') && (s[i] < '`')) r << (s[i] - '@'); + else r << s[i]; + } + else r << s[i]; + return r; +} + +string +tmu_reader::read_char () { + while (((pos + 1) < N (buf)) && (buf[pos] == '\\') && + (buf[pos + 1] == '\n')) { + pos+= 2; + skip_spaces (buf, pos); + } + if (pos >= N (buf)) return ""; + pos++; + return buf (pos - 1, pos); +} + +string +tmu_reader::read_next () { + int old_pos= pos; + string c = read_char (); + if (c == "") return c; + switch (c[0]) { + case '\t': + case '\n': + case '\r': + case ' ': + pos--; + if (skip_blank () <= 1) return " "; + else return "\n"; + case '<': { + old_pos= pos; + c = read_char (); + if (c == "") return ""; + if (c == "#") return "<#"; + if ((c == "\\") || (c == "|") || (c == "/")) return "<" * c; + if (is_iso_alpha (c[0]) || (c == ">")) { + pos= old_pos; + return "<"; + } + pos= old_pos; + return "<"; + /* + string d= read_char (); + if ((d == "\\") || (d == "|") || (d == "/")) return "<" * c * d; + pos= old_pos; + return "<" * c; + */ + } + case '|': + case '>': + return c; + } + + string r; + pos= old_pos; + while (true) { + old_pos= pos; + c = read_char (); + if (c == "") return r; + else if (c == "\\") { + if ((pos < N (buf)) && (buf[pos] == '\\') && backslash_ok) { + r << c << "\\"; + pos++; + } + else r << c << read_char (); + } + else if (c == "\t") break; + else if (c == "\r") break; + else if (c == "\n") break; + else if (c == " ") break; + else if (c == "<") break; + else if (c == "|") break; + else if (c == ">") break; + else r << c; + } + pos= old_pos; + return r; +} + +string +tmu_reader::read_function_name () { + string name= decode (read_next ()); + // cout << "==> " << name << "\n"; + while (true) { + last= read_next (); + // cout << "~~> " << last << "\n"; + if ((last == "") || (last == "|") || (last == ">")) break; + } + return name; +} + +static void +get_collection (tree& u, tree t) { + if (is_func (t, COLLECTION) || is_func (t, DOCUMENT) || is_func (t, CONCAT)) { + int i; + for (i= 0; i < N (t); i++) + get_collection (u, t[i]); + } + else if (is_compound (t)) u << t; +} + +tree +tmu_reader::read_apply (string name, bool skip_flag) { + // cout << "Read apply " << name << INDENT << LF; + tree t (make_tree_label (name)); + if (!with_extensions) t= tree (EXPAND_APPLY, name); + if (codes->contains (name)) { + // cout << " " << name << " -> " << as_string ((tree_label) codes [name]) + // << "\n"; + t= tree ((tree_label) codes[name]); + } + + bool closed= !skip_flag; + while (pos < N (buf)) { + // cout << "last= " << last << LF; + bool sub_flag= (skip_flag) && ((last == "") || (last[N (last) - 1] != '|')); + if (sub_flag) (void) skip_blank (); + t << read (sub_flag); + if ((last == "/>") || (last == "/|")) closed= true; + if (closed && ((last == ">") || (last == "/>"))) break; + } + // cout << "last= " << last << UNINDENT << LF; + // cout << "Done" << LF; + + if (is_func (t, COLLECTION)) { + tree u (COLLECTION); + get_collection (u, t); + return u; + } + return t; +} + +static void +flush (tree& D, tree& C, string& S, bool& spc_flag, bool& ret_flag) { + if (spc_flag) S << " "; + if (S != "") { + if ((N (C) == 0) || (!is_atomic (C[N (C) - 1]))) C << S; + else C[N (C) - 1]->label << S; + S = ""; + spc_flag= false; + } + + if (ret_flag) { + if (N (C) == 0) D << ""; + else if (N (C) == 1) D << C[0]; + else D << C; + C = tree (CONCAT); + ret_flag= false; + } +} + +tree +tmu_reader::read (bool skip_flag) { + tree D (DOCUMENT); + tree C (CONCAT); + string S (""); + bool spc_flag= false; + bool ret_flag= false; + + while (true) { + last= read_next (); + // cout << "--> " << last << "\n"; + if (last == "") break; + if (last == "|") break; + if (last == ">") break; + + if (last[0] == '<') { + if (last[N (last) - 1] == '\\') { + flush (D, C, S, spc_flag, ret_flag); + string name= read_function_name (); + if (last == ">") last= "\\>"; + else last= "\\|"; + C << read_apply (name, true); + } + else if (last[N (last) - 1] == '|') { + (void) read_function_name (); + if (last == ">") last= "|>"; + else last= "||"; + break; + } + else if (last[N (last) - 1] == '/') { + (void) read_function_name (); + if (last == ">") last= "/>"; + else last= "/|"; + break; + } + else if (last[N (last) - 1] == '#') { + string r; + while ((buf[pos] != '>') && (pos + 2 < N (buf))) { + r << ((char) from_hex (buf (pos, pos + 2))); + pos+= 2; + } + if (buf[pos] == '>') pos++; + flush (D, C, S, spc_flag, ret_flag); + C << tree (RAW_DATA, r); + last= read_next (); + break; + } + else { + flush (D, C, S, spc_flag, ret_flag); + string name= decode (read_next ()); + string sep = ">"; + if (name == ">") name= ""; + else sep= read_next (); + // cout << "==> " << name << "\n"; + // cout << "~~> " << sep << "\n"; + if (sep == "|") { + last= "|"; + C << read_apply (name, false); + } + else { + tree t (make_tree_label (name)); + if (!with_extensions) t= tree (EXPAND_APPLY, name); + if (codes->contains (name)) { + // cout << name << " -> " << as_string ((tree_label) codes [name]) + // << "\n"; + t= tree ((tree_label) codes[name]); + } + C << t; + } + } + } + else if (last == " ") spc_flag= true; + else if (last == "\n") ret_flag= true; + else { + flush (D, C, S, spc_flag, ret_flag); + // cout << "<<< " << last << "\n"; + // cout << ">>> " << decode (last) << "\n"; + S << decode (last); + if ((S == "") && (N (C) == 0)) C << ""; + } + } + + if (skip_flag) spc_flag= ret_flag= false; + flush (D, C, S, spc_flag, ret_flag); + if (N (C) == 1) D << C[0]; + else if (N (C) > 1) D << C; + // cout << "*** " << D << "\n"; + if (N (D) == 0) return ""; + if (N (D) == 1) { + if (!skip_flag) return D[0]; + if (version_inf_eq (version, "0.3.4.10")) return D[0]; + if (is_func (D[0], COLLECTION)) return D[0]; + } + return D; +} + +tree +tmu_to_tree (string s) { + tmu_reader tmr (s); + return tmr.read (true); +} + +tree +tmu_to_tree (string s, string version) { + tmu_reader tmr (s, version); + return tmr.read (true); +} + +/****************************************************************************** + * Conversion of TeXmacs strings to TeXmacs trees + ******************************************************************************/ + +inline bool +is_apply (tree t, string s, int n) { + return (L (t) == APPLY) && (N (t) == (n + 1)) && (t[0] == s); +} + +static bool +is_expand (tree t, string s, int n) { + return (L (t) == EXPAND) && (N (t) == n + 1) && (t[0] == s); +} + +tree +tmu_document_to_tree (string s) { + tree error (ERROR, "bad format or data"); + if (starts (s, "edit") || starts (s, "TeXmacs") || + starts (s, "\\(\\)(TeXmacs")) { + string version= "0.0.0.0"; + tree t = string_to_tree (s, version); + if (is_tuple (t) && (N (t) > 0)) t= t (1, N (t)); + int n= arity (t); + + tree doc (DOCUMENT); + doc << compound ("TeXmacs", version); + if (n < 3) return error; + else if (n < 4) + doc << compound ("body", t[2]) << compound ("style", t[0]) + << compound ("initial", t[1]); + else if (n < 7) + doc << compound ("body", t[0]) << compound ("style", t[1]) + << compound ("initial", t[2]) << compound ("references", t[3]); + else + doc << compound ("body", t[0]) << compound ("project", t[1]) + << compound ("style", t[2]) << compound ("initial", t[3]) + << compound ("final", t[4]) << compound ("references", t[5]) + << compound ("auxiliary", t[6]); + return upgrade (doc, version); + } + + if (starts (s, "') break; + string version= s (9, i); + tree doc = tmu_to_tree (s, version); + if (is_compound (doc, "TeXmacs", 1) || is_expand (doc, "TeXmacs", 1) || + is_apply (doc, "TeXmacs", 1)) + doc= tree (DOCUMENT, doc); + if (!is_document (doc)) return error; + if (N (doc) == 0 || !is_compound (doc[0], "TeXmacs", 1)) { + tree d (DOCUMENT); + d << compound ("TeXmacs", version); + d << A (doc); + doc= d; + } + return upgrade (doc, version); + } + return error; +} diff --git a/src/Data/Convert/Mogan/to_tmu.cpp b/src/Data/Convert/Mogan/to_tmu.cpp new file mode 100644 index 0000000000..f1c7e63153 --- /dev/null +++ b/src/Data/Convert/Mogan/to_tmu.cpp @@ -0,0 +1,357 @@ + +/****************************************************************************** + * MODULE : totm.cpp + * DESCRIPTION: conversion of TeXmacs trees to the TeXmacs file format + * COPYRIGHT : (C) 1999 Joris van der Hoeven + ******************************************************************************* + * This software falls under the GNU general public license version 3 or later. + * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE + * in the root directory or . + ******************************************************************************/ + +#include "convert.hpp" +#include "tree_helper.hpp" + +#include +#include + +using namespace moebius; +using lolly::data::as_hexadecimal; +using moebius::drd::std_contains; + +/****************************************************************************** + * Conversion of TeXmacs trees to the present TeXmacs string format + ******************************************************************************/ + +struct tmu_writer { + string buf; // the resulting string + string spc; // "" or " " + string tmp; // not yet flushed characters + int mode; // normal: 0, verbatim: 1, mathematics: 2 + + int tab; // number of tabs after CR + int xpos; // current horizontal position in buf + bool spc_flag; // true if last printed character was a space or CR + bool ret_flag; // true if last printed character was a CR + + tmu_writer () + : buf (""), spc (""), tmp (""), mode (0), tab (0), xpos (0), + spc_flag (true), ret_flag (true) {} + + void cr (); + void flush (); + void write_space (); + void write_return (); + void write (string s, bool flag= true, bool encode_space= false); + void br (int indent= 0); + void tag (string before, string s, string after); + void apply (string func, array args); + void write (tree t); +}; + +void +tmu_writer::cr () { + int i, n= N (buf); + for (i= n - 1; i >= 0; i--) + if ((buf[i] != ' ') || ((i > 0) && (buf[i - 1] == '\\'))) break; + if (i < n - 1) { + buf= buf (0, i + 1); + n = n - N (buf); + for (i= 0; i < n; i++) + buf << "\\ "; + } + buf << '\n'; + for (i= 0; i < min (tab, 20); i++) + buf << ' '; + xpos= min (tab, 20); +} + +void +tmu_writer::flush () { + int i, m= N (spc), n= N (tmp); + if ((m + n) == 0) return; + if ((xpos + m + n) < 78) { + buf << spc << tmp; + xpos+= m + n; + } + else { + if (spc == " ") { + if (xpos > 40) cr (); + else { + buf << " "; + xpos++; + } + } + if ((xpos + n) < 78) { + buf << tmp; + xpos+= n; + } + else + for (i= 0; i < n;) { + if (((i + 1) < n) && (tmp[i] == '\\') && (tmp[i + 1] == ' ')) { + /* not nice when searching text in a .tm file + if (xpos >= 76) { + buf << "\\"; + cr (); + } + */ + buf << "\\ "; + xpos+= 2; + i+= 2; + } + else { + /* not nice when searching text in a .tm file + if (xpos >= 77) { + buf << "\\"; + cr (); + } + */ + buf << tmp[i]; + xpos++; + i++; + } + } + } + spc= ""; + tmp= ""; +} + +void +tmu_writer::write_space () { + if (spc_flag) tmp << "\\ "; + else { + flush (); + spc= " "; + } + spc_flag= true; + ret_flag= false; +} + +void +tmu_writer::write_return () { + if (ret_flag) { + buf << "\\;\n"; + cr (); + } + else { + if ((spc == " ") && (tmp == "")) { + spc= ""; + tmp= "\\ "; + } + flush (); + buf << "\n"; + cr (); + } + spc_flag= true; + ret_flag= true; +} + +void +tmu_writer::write (string s, bool flag, bool encode_space) { + if (flag) { + int i, n= N (s); + for (i= 0; i < n; i++) { + char c= s[i]; + if ((c == ' ') && (!encode_space)) write_space (); + else { + if (c == ' ') tmp << "\\ "; + else if (c == '\n') tmp << "\\n"; + else if (c == '\t') tmp << "\\t"; + else if (c == '\0') tmp << "\\0"; + else if (c == '\\') tmp << "\\\\"; + else if (c == '<') tmp << "\\<"; + else if (c == '|') tmp << "\\|"; + else if (c == '>') tmp << "\\>"; + else if (c == '\34') tmp << c; + else if (((unsigned char) c) < ' ') tmp << '\\' << (c + '@'); + else tmp << c; + spc_flag= false; + ret_flag= false; + } + } + } + else { + tmp << s; + if (N (s) != 0) { + spc_flag= false; + ret_flag= false; + } + } +} + +void +tmu_writer::br (int indent) { + int i; + flush (); + tab+= indent; + for (i= N (buf) - 1; i >= 0; i--) { + if (buf[i] == '\n') return; + if (buf[i] != ' ') { + cr (); + spc_flag= true; + ret_flag= false; + return; + } + } +} + +void +tmu_writer::tag (string before, string s, string after) { + write (before, false); + write (s); + write (after, false); +} + +void +tmu_writer::apply (string func, array args) { + int i, last, n= N (args); + for (i= n - 1; i >= 0; i--) + if (is_document (args[i]) || is_func (args[i], COLLECTION)) break; + last= i; + + if (last >= 0) { + /* + tag ("<\\", func, ">"); + for (i=0; i"); + } + tag (""); + */ + + for (i= 0; i <= n; i++) { + bool flag= + (i < n) && (is_document (args[i]) || is_func (args[i], COLLECTION)); + if (i == 0) { + write ("<\\", false); + write (func, true, true); + } + else if (i == last + 1) { + write ("", false); + break; + } + + if (flag) { + write (">", false); + br (2); + write (args[i]); + br (-2); + } + else { + write ("|", false); + write (args[i]); + } + } + } + else { + write ("<", false); + write (func, true, true); + for (i= 0; i < n; i++) { + write ("|", false); + write (args[i]); + } + write (">", false); + } +} + +void +tmu_writer::write (tree t) { + if (is_atomic (t)) { + write (t->label); + return; + } + + int i, n= N (t); + switch (L (t)) { + case RAW_DATA: { + write ("<#", false); + string s= as_string (t[0]); + for (i= 0; i < N (s); i++) + write (as_hexadecimal ((unsigned char) s[i], 2), false); + write (">", false); + break; + } + case DOCUMENT: + spc_flag= true; + ret_flag= true; + for (i= 0; i < n; i++) { + write (t[i]); + if (i < (n - 1)) write_return (); + else if (ret_flag) write ("\\;", false); + } + break; + case CONCAT: + for (i= 0; i < n; i++) + write (t[i]); + break; + case EXPAND: + if ((n >= 1) && is_atomic (t[0])) { + string s= t[0]->label; + if (std_contains (s)) + ; + else if ((N (s) > 0) && (!is_iso_alpha (s))) + ; + else { + apply (s, A (t (1, n))); + break; + } + } + apply (as_string (EXPAND), A (t)); + break; + case COLLECTION: + tag ("<\\", as_string (COLLECTION), ">"); + if (n == 0) br (); + else { + br (2); + for (i= 0; i < n; i++) { + write (t[i]); + if (i < (n - 1)) br (); + } + br (-2); + } + tag (""); + break; + default: + apply (as_string (L (t)), A (t)); + break; + } +} + +/****************************************************************************** + * Conversion of TeXmacs trees to TeXmacs strings + ******************************************************************************/ + +string +tree_to_tmu (tree t) { + if (!is_snippet (t)) { + int i, n= N (t); + tree r (t, n); + for (i= 0; i < n; i++) + if (is_compound (t[i], "style", 1)) { + tree style= t[i][0]; + if (is_func (style, TUPLE, 1)) style= style[0]; + r[i] = copy (t[i]); + r[i][0]= style; + } + else r[i]= t[i]; + t= r; + } + + tmu_writer tmw; + tmw.write (t); + tmw.flush (); + return tmw.buf; +} diff --git a/src/Data/Convert/convert.hpp b/src/Data/Convert/convert.hpp index 41aff8c1b8..5e3db2c3b9 100644 --- a/src/Data/Convert/convert.hpp +++ b/src/Data/Convert/convert.hpp @@ -53,6 +53,11 @@ tree nonumber_to_eqnumber (tree t); tree eqnumber_to_nonumber (tree t); string search_metadata (tree doc, string kind); +/*** TMU ***/ +tree tmu_to_tree (string s); +tree tmu_document_to_tree (string s); +string tree_to_tmu (tree t); + /*** Verbatim ***/ string tree_to_verbatim (tree t, bool wrap= false, string enc= "default"); tree verbatim_to_tree (string s, bool wrap= false, string enc= "default"); diff --git a/src/Scheme/L4/glue_convert.lua b/src/Scheme/L4/glue_convert.lua index 1662c78910..257b4556a5 100644 --- a/src/Scheme/L4/glue_convert.lua +++ b/src/Scheme/L4/glue_convert.lua @@ -47,6 +47,30 @@ function main() "tree" } }, + { + scm_name = "parse-tmu", + cpp_name = "tmu_document_to_tree", + ret_type = "tree", + arg_list = { + "string" + } + }, + { + scm_name = "serialize-tmu", + cpp_name = "tree_to_tmu", + ret_type = "string", + arg_list = { + "tree" + } + }, + { + scm_name = "parse-tmu-snippet", + cpp_name = "tmu_to_tree", + ret_type = "tree", + arg_list = { + "string" + } + }, { scm_name = "texmacs->stm", cpp_name = "tree_to_scheme",