From 17320643d7c288ac0607a82307d4b2948c4a3f8a Mon Sep 17 00:00:00 2001 From: honzaflash Date: Thu, 7 Oct 2021 15:14:59 +0200 Subject: [PATCH] =?UTF-8?q?script=20na=20vyta=C5=BEen=C3=AD=20v=C5=A1ech?= =?UTF-8?q?=20pou=C5=BEit=C3=BDch=20tag=C5=AF=20pas=C3=A1=C5=BE=C3=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- perl_scripts/gather-tags.pl | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 perl_scripts/gather-tags.pl diff --git a/perl_scripts/gather-tags.pl b/perl_scripts/gather-tags.pl new file mode 100644 index 0000000..199b5be --- /dev/null +++ b/perl_scripts/gather-tags.pl @@ -0,0 +1,32 @@ +#!/usr/bin/perl + +my $usage = + "usage: ./gather-tags.pl TWINE_HTML [-l]\n" . + " prints all existing passage tags\n"; + + +if (not defined $ARGV[0]) { + print $usage; + exit 1; +} + +open(HTML, '<', $ARGV[0]) or die "couldn't open the file: $ARGV[0]"; + +my %tags; +while () { + if ($_ =~ /]* tags="([^"]*)"/) { + for my $word (split(/\s+/, $1)) { + $tags{$word} = 1; + } + } +} + +for (keys %tags) { + if ($ARGV[1] eq "-l") { + print "$_\n"; + } else { + print "$_, "; + } +} + +