From 8cdad6ebd9856a294e4034877151a591df6e3108 Mon Sep 17 00:00:00 2001 From: Trondtr Date: Fri, 5 Apr 2024 09:52:26 +0000 Subject: [PATCH] deploy: dd9efaa1700ae4d0682813461ab27f119e5718d4 --- Links.md | 1 - Makefile.in | 8 +++++++ index-header.md | 1 + index.md | 2 +- test-diary.md | 56 +++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 test-diary.md diff --git a/Links.md b/Links.md index 811812c8e..2107a7ae4 100644 --- a/Links.md +++ b/Links.md @@ -35,7 +35,6 @@ * `tools/` * `grammarcheckers/` * [grammarchecker.cg3](tools-grammarcheckers-grammarchecker.cg3.html) ([src](https://github.com/giellalt/lang-fkv/blob/main/tools/grammarcheckers/grammarchecker.cg3)) - * `/` * [grc-disambiguator.cg3](tools-grammarcheckers-grc-disambiguator.cg3.html) ([src](https://github.com/giellalt/lang-fkv/blob/main/tools/grammarcheckers/grc-disambiguator.cg3)) * `tokenisers/` * [tokeniser-disamb-gt-desc.pmscript](tools-tokenisers-tokeniser-disamb-gt-desc.pmscript.html) ([src](https://github.com/giellalt/lang-fkv/blob/main/tools/tokenisers/tokeniser-disamb-gt-desc.pmscript)) diff --git a/Makefile.in b/Makefile.in index df735a1c5..41d66cd40 100644 --- a/Makefile.in +++ b/Makefile.in @@ -999,24 +999,32 @@ $(LINKS): if test "x$$d1" != "x$$oldd1" ; then \ echo "* \`$$d1/\`" ;\ oldd1=$$d1 ;\ + oldd2="";\ + oldd3="";\ + oldd4="";\ fi ; \ if test "x$$d2" = x ; then \ echo " * [$$docname]($$html) ([src]($(REPOURL)/$$doc))" ;\ elif test "x$$d2" != "x$$oldd2" ; then \ echo " * \`$$d2/\`" ;\ oldd2=$$d2 ;\ + oldd3="";\ + oldd4="";\ + oldd5="";\ fi ; \ if test "x$$d3" = x -a "x$$d2" != x; then \ echo " * [$$docname]($$html) ([src]($(REPOURL)/$$doc))" ;\ elif test "x$$d3" != "x$$oldd3" ; then \ echo " * \`$$d3/\`" ;\ oldd3=$$d3 ;\ + oldd4="";\ fi ; \ if test "x$$d4" = x -a "x$$d3" != x ; then \ echo " * [$$docname]($$html) ([src]($(REPOURL)/$$doc))" ;\ elif test "x$$d4" != "x$$oldd4" ; then \ echo " * \`$$d4/\`" ;\ oldd4=$$d4 ;\ + oldd5="";\ fi ; \ if test "x$$d5" = x -a "x$$d4" != x ; then \ echo " * [$$docname]($$html) ([src]($(REPOURL)/$$doc))" ;\ diff --git a/index-header.md b/index-header.md index 985ee55a7..4b0014adb 100644 --- a/index-header.md +++ b/index-header.md @@ -57,6 +57,7 @@ and in a Kven spellchecker. * [Yamltestit maaliskuu 2019](YamltestitMaaliskuu2019.html) * [n_11-feilit](n_11-feilit.html) * [Kaikki generoidut paradigmat](KaikkiGeneroidutParadigmat.html) +* [Test diary](test-diary.md) # In-source documentation diff --git a/index.md b/index.md index bf0a3a064..608236c11 100644 --- a/index.md +++ b/index.md @@ -57,6 +57,7 @@ and in a Kven spellchecker. * [Yamltestit maaliskuu 2019](YamltestitMaaliskuu2019.html) * [n_11-feilit](n_11-feilit.html) * [Kaikki generoidut paradigmat](KaikkiGeneroidutParadigmat.html) +* [Test diary](test-diary.md) # In-source documentation @@ -99,7 +100,6 @@ Below is an autogenerated list of documentation pages built from structured comm * `tools/` * `grammarcheckers/` * [grammarchecker.cg3](tools-grammarcheckers-grammarchecker.cg3.html) ([src](https://github.com/giellalt/lang-fkv/blob/main/tools/grammarcheckers/grammarchecker.cg3)) - * `/` * [grc-disambiguator.cg3](tools-grammarcheckers-grc-disambiguator.cg3.html) ([src](https://github.com/giellalt/lang-fkv/blob/main/tools/grammarcheckers/grc-disambiguator.cg3)) * `tokenisers/` * [tokeniser-disamb-gt-desc.pmscript](tools-tokenisers-tokeniser-disamb-gt-desc.pmscript.html) ([src](https://github.com/giellalt/lang-fkv/blob/main/tools/tokenisers/tokeniser-disamb-gt-desc.pmscript)) diff --git a/test-diary.md b/test-diary.md new file mode 100644 index 000000000..1bf690812 --- /dev/null +++ b/test-diary.md @@ -0,0 +1,56 @@ +Test diary +========== + +This document writes down test statistics + +The overal test command: `make check` + +## yaml + +The command: + +`sh test/yaml-check.sh` + +(data forthcoming) + +## Lexical coverage +fkv +Number of words (standing in `lang-fkv`): + +``` +cat test/data/freecorpus.txt |\ +hfst-tokenise tools/tokenisers/tokeniser-disamb-gt-desc.pmhfst |wc -l +``` + +Number of unknown words: + +``` +cat test/data/freecorpus.txt |\ + hfst-tokenise tools/tokenisers/tokeniser-disamb-gt-desc.pmhfst |\ + preprocess --corr=test/data/typos.txt|\ + hfst-tokenise -cg tools/tokenisers/tokeniser-disamb-gt-desc.pmhfst |\ + grep " ?"|cut -d'"' -f2|wc -l +``` + +Test with the full corpus (free + bound): + + + +### Lexical coverage of freecorpus + +The file is `test/data/freecorpus.txt`. + +Coverage: + +- 240405: 1-(42819/607401) = 0.9295 + +### Lexical coverage of free + bound + +Coverage: + + + + + + +