-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathintramine_linker.pl
2253 lines (1999 loc) · 71.6 KB
/
intramine_linker.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# intramine_linker.pl: IntraMine's autolinker (in combination with reverse_filepaths.pm).
# Pass in plain text containing mentions of other files, and get back either:
# text with links - see NonCmLinks()
# or a JSON summary of the links that can be used to create an overlay for CodeMirror
# - see CmLinks().
# File and web links are handled, "internal" links within a document are done
# in intramine_viewer.pl.
# FullPathForPartial() will return its best guess at a full path, given a single partial path
# (use when there is no relevant "context").
#
# See also Documentation/Linker.html.
#
# Links suported here:
# - FLASH links: if you type just a file name in a text file or a source file,
# IntraMine will link it to the closest known instance of that file, from amongst
# all the files of interest to you. Links to locations within files are also
# supported, such as "Linker.html#What the Linker does differently" or
# reverse_filepaths.pm#FullPathInContextNS().
# - web links: http(s), eg https://www.google.com, [Goggle](https://www.google.com)
# - Perl module links eg "use X::Y;"
#
# A "partial path" is a file name with extension) preceded by zero or more directory
# names, in any order, not necessariy consecutive, with slashes after each directory name
# (and of course a drive name can be supplied). For example, if
# C:/projects/project51/src/run.cpp
# is the desired path, then the partial path could be
# run.cpp or projects/run.cpp or C:/run.cpp or project51/projects.cpp etc.
#
# FLASH links also use the notion of "context directory", the location of the file
# mentioning the partial path. You might for example be writing
# in project51/docs/Log_Sept_20201.txt and type in "run.cpp": if there are several
# run.cpp files in your indexed folders, then the run.cpp closest to the Log_Sept_20201.txt
# will be chosen for the link - in the above example, that would be project51/src/run.cpp.
# This server is a second level server without any displayed interface, and all calls
# to retrieve links are done with JavaScript in the Viewer and Files servers.
# In the Viewer service's front end JavaScript:
# - To trace CodeMirror autolinks, start with cmAutoLinks.js#addAutoLinks(), which ends up
# calling back to CmLinks() via "req=cmLinks".
# - To trace non-CodeMirror (mainly .txt and Perl) autolinks, start with autoLinks.js#addAutoLinks(),
# which ends up calling back to NonCmLinks() via "req=nonCmLinks".
# In the Files service's front end JavaScript:
# - FullPathForPartial() is called by files.js#openAutoLink(), via "req=autolink".
# perl C:\perlprogs\intramine\intramine_linker.pl
use strict;
use warnings;
use utf8;
use FileHandle;
use Encode;
use Encode::Guess;
use HTML::Entities;
use URI::Escape;
use Time::HiRes qw ( time );
use JSON::MaybeXS qw(encode_json);
use Path::Tiny qw(path);
use lib path($0)->absolute->parent->child('libs')->stringify;
use common;
use swarmserver;
use reverse_filepaths;
use win_wide_filepaths;
use ext; # for ext.pm#IsTextExtensionNoPeriod() etc.
use intramine_glossary;
use intramine_spellcheck;
Encode::Guess->add_suspects(qw/iso-8859-1/);
$| = 1;
my $PAGENAME = '';
my $SHORTNAME = '';
my $server_port = '';
my $port_listen = '';
SSInitialize(\$PAGENAME, \$SHORTNAME, \$server_port, \$port_listen);
# For calling a service by its Short name.
my $VIEWERNAME = CVal('VIEWERSHORTNAME');
my $EDITORNAME = CVal('EDITORSHORTNAME');
my $OPENERNAME = CVal('OPENERSHORTNAME');
my $FILESNAME = CVal('FILESSHORTNAME');
my $VIDEONAME = CVal('VIDEOSHORTNAME');
# Common locations for images.
my $IMAGES_DIR = FullDirectoryPath('IMAGES_DIR');
my $COMMON_IMAGES_DIR = CVal('COMMON_IMAGES_DIR');
if (FileOrDirExistsWide($COMMON_IMAGES_DIR) != 2)
{
#print("No common images dir, setting \$COMMON_IMAGES_DIR to ''\n");
$COMMON_IMAGES_DIR = '';
}
# Edit control.
my $UseAppForLocalEditing = CVal('USE_APP_FOR_EDITING');
my $UseAppForRemoteEditing = CVal('USE_APP_FOR_REMOTE_EDITING');
my $AllowLocalEditing = CVal('ALLOW_LOCAL_EDITING');
my $AllowRemoteEditing = CVal('ALLOW_REMOTE_EDITING');
my $kLOGMESSAGES = 0; # 1 == Log Output() messages
my $kDISPLAYMESSAGES = 0; # 1 == print messages from Output() to console window
# Log is at logs/IntraMine/$SHORTNAME $port_listen datestamp.txt in the IntraMine folder.
# Use the Output() sub for routine log/print.
StartNewLog($kLOGMESSAGES, $kDISPLAYMESSAGES);
Output("Starting $SHORTNAME on port $port_listen\n\n");
# Actions. Respond to requests for links, from CodeMirror views, text views, and the Files page.
my %RequestAction;
$RequestAction{'signal'} = \&HandleBroadcastRequest; # signal=reindex or folderrenamed
$RequestAction{'req|cmLinks'} = \&CmLinks; # req=cmLinks... - linking for CodeMirror files
$RequestAction{'req|nonCmLinks'} = \&NonCmLinks; # req=nonCmLinks... - linking for non-CodeMirror files
$RequestAction{'req|autolink'} = \&FullPathForPartial; # req=autolink&partialpath=...
$RequestAction{'/test/'} = \&SelfTest; # Ask this server to test itself.
# List of English words, for spell checking.
my $wordListPath = BaseDirectory() . 'data/EnglishWords.txt';
#print("Loading list of English words for spell checking in .txt files from $wordListPath.\n");
InitDictionary($wordListPath);
# Start up db for tracking deleted files.
InitDeletesDB();
# Over to swarmserver.pm. The callback sub loads in a hash of full paths for partial paths,
# which can take some time.
MainLoop(\%RequestAction, undef, undef, \&callbackInitPathsAndGlossary);
################### subs
# Generic 'signal' handler, 'signal=reindex' means that
# new files have come along, so incorporate them into full path and partial path
# lists, for putting in file links with AddFileWebAndFileLinksToLine() etc.
# See reverse_filepaths.pm#LoadIncrementalDirectoryFinderLists().
# The list of new files is in C:/fwws/fullpaths2.log.
# 'signal=folderrenamed' means a folder has been renamed (and so potentially
# many full paths could change).
sub HandleBroadcastRequest {
my ($obj, $formH, $peeraddress) = @_;
if (defined($formH->{'signal'}))
{
if ($formH->{'signal'} eq 'reindex')
{
Output("Reindexing.\n");
# Load list of new file paths.
my $FileWatcherDir = CVal('FILEWATCHERDIRECTORY');
my $fullFilePathListPath = $FileWatcherDir . CVal('FULL_PATH_LIST_NAME'); # .../fullpaths.out
LoadIncrementalDirectoryFinderLists($fullFilePathListPath);
LoadAndRemoveDeletesFromHashes();
}
elsif ($formH->{'signal'} eq 'folderrenamed')
{
Output("Folder renamed, doing maintenance.\n");
# An arbitrary number of files could have new paths. This is hard to handle.
# For now, just re-init all the paths. This puts the Linker(s) out of action for
# a bit, but if two or more instances of this Linker are running then they will
# be taken out of service for maintenance one at a time.
# See also intramine_main.pl#BroadcastSignal()
# and intramine_main.pl#HandleMaintenanceSignal().
print("Linker on port <$port_listen> is pausing to reload changed paths due to file or folder rename.\n");
print(" On this server instance only, new read-only views will not be available,\n");
print(" and autolinks will not be shown in CodeMirror views after scrolling.\n");
print(" Other Linker instances running will not be affected, and Main will redirect\n");
print(" requests for links to avoid this Linker while it's busy.\n");
print("Reloading...\n");
my $startTime = time;
ReinitDirFinder();
RequestBroadcast('signal=backinservice&sender=Linker&respondingto=folderrenamed');
my $endTime = time;
my $elapsedSecs = int($endTime - $startTime + 0.5);
print("Linker on port $port_listen is back. Update took $elapsedSecs s.\n");
}
elsif ($formH->{'signal'} eq 'glossaryChanged')
{
my $filePath = defined($formH->{'path'}) ? $formH->{'path'} : 'BOGUS PATH';
if ($filePath =~ m!glossary\.txt$!i) # standalone glossary file
{
my $context = DirectoryFromPathTS($filePath);
LoadGlossary($filePath, $context, 1);
my $contextDir = lc(DirectoryFromPathTS($filePath));
ForgetDirChecked($contextDir);
}
else
{
LoadGlossary($filePath);
}
}
elsif ($formH->{'signal'} eq 'dictionaryChanged')
{
my $filePath = defined($formH->{'path'}) ? $formH->{'path'} : 'BOGUS PATH';
my $dict = DictionaryPath();
$dict = lc($dict);
$filePath = lc($filePath);
$dict =~ s!\\!/!g;
$filePath =~ s!\\!/!g;
if ($dict eq $filePath)
{
print("Dictionary has changed, reloading.\n");
ReadDictionary();
print("Dictionary reloaded.\n");
}
}
}
# Returned value is ignored by broadcaster - this is more of a "UDP"
# than "TCP" approach to communicating.
return('OK');
}
# Load list of all files and directories, and create a hash holding lists of all
# corresponding known full paths for partial paths, for autolinks.
# Also load all glossary entries.
sub callbackInitPathsAndGlossary {
my $FileWatcherDir = CVal('FILEWATCHERDIRECTORY');
my $fullFilePathListPath = $FileWatcherDir . CVal('FULL_PATH_LIST_NAME'); # .../fullpaths.out
# reverse_filepaths.pm#InitDirectoryFinder()
my $filePathCount = InitDirectoryFinder($fullFilePathListPath);
LoadAllGlossaryFiles();
}
sub LoadAllGlossaryFiles {
my $glossaryFileName = lc(CVal('GLOSSARYFILENAME'));
if ($glossaryFileName eq '')
{
print("WARNING, GLOSSARYFILENAME not found in data/intramine_config_4.txt. No glossaries loaded.\n");
}
my $paths = GetAllPathsForFileName($glossaryFileName);
if ($paths ne '')
{
print("Loading glossaries...\n");
LoadAllGlossaries($paths, $IMAGES_DIR, $COMMON_IMAGES_DIR,
\&FullPathInContextNS, \&BestMatchingFullDirectoryPath);
}
else
{
print("No files called $glossaryFileName were found, no glossaries loaded.\n");
}
}
# Completely reload list of all files and directories. Called by HandleBroadcastRequest() above.
sub ReinitDirFinder {
my $FileWatcherDir = CVal('FILEWATCHERDIRECTORY');
my $fullFilePathListPath = $FileWatcherDir . CVal('FULL_PATH_LIST_NAME'); # .../fullpaths.out
my $filePathCount = ReinitDirectoryFinder($fullFilePathListPath); # reverse_filepaths.pm#ReinitDirectoryFinder()
}
# For all files where the view is generated by CodeMirror ("CodeMirror files").
# Get links for all local file, image, and web links in $formH->{'text'}.
# Links are added on demand for visible lines only.
# Invoked by xmlHttpRequest in cmAutoLinks.js#requestLinkMarkup().
# request.open('get', 'http://' + mainIP + ':' + linkerPort + '/?req=cmLinks'
# + '&remote=' + remoteValue + '&allowEdit=' + allowEditValue + '&useApp=' + useAppValue
# + '&text=' + encodeURIComponent(visibleText) + '&peeraddress=' + encodeURIComponent(peeraddress)
# + '&path=' + encodeURIComponent(thePath) + '&first=' + firstVisibleLineNum + '&last='
# + lastVisibleLineNum);
# See CmGetLinksForText() just below.
sub CmLinks {
my ($obj, $formH, $peeraddress) = @_;
my $result = 'nope';
ReportActivity($SHORTNAME);
if (defined($formH->{'text'}) && defined($formH->{'path'})
&& defined($formH->{'first'}) && defined($formH->{'last'}))
{
my $path = lc($formH->{'path'});
#my $dir = lc(DirectoryFromPathTS($formH->{'path'}));
my $clientIsRemote = (defined($formH->{'remote'})) ? $formH->{'remote'}: '0';
my $allowEditing = (defined($formH->{'allowEdit'})) ? $formH->{'allowEdit'}: '0';
CmGetLinksForText($formH, $path, $clientIsRemote, $allowEditing, \$result);
}
return($result);
}
# For each link, respond with JSON for:
# - line/col of start of match in text
# - text in match to be marked up as link (determines line/col of end of match in text also)
# - type of link: 'web', 'file', 'image'
# - appropriate content for <a> element for linked file
sub CmGetLinksForText {
my ($formH, $dir, $clientIsRemote, $allowEditing, $resultR) = @_;
my $text = $formH->{'text'};
my $firstLineNum = $formH->{'first'};
my $lastLineNum = $formH->{'last'};
my $json = ''; # JSON string for this replaces $$resultR if $numLinksFound
my @links; # This array holds (object) contents of what in JS will be resp.arr[].
my $serverAddr = ServerAddress();
my $checkSpelling = (defined($formH->{'spellcheck'})) ? $formH->{'spellcheck'}: 0;
if ($checkSpelling eq 'true')
{
$checkSpelling = 1;
}
else
{
$checkSpelling = 0;
}
AddWebAndFileLinksToVisibleLinesForCodeMirror($text, $firstLineNum, $dir, \@links, $serverAddr,
$server_port, $clientIsRemote, $allowEditing, $checkSpelling);
my $numLinksFound = @links;
if ($numLinksFound)
{
my %arrHash;
$arrHash{'arr'} = \@links;
$json = \%arrHash;
#####$$resultR = uri_escape_utf8(encode_json($json));
$$resultR = encode_json($json);
}
}
# For non-CodeMirror files.
# Add links for all local file, image, and web links in $formH->{'text'}.
# Links are added on demand for visible lines only.
# Invoked by xmlHttpRequest in autoLinks.js#requestLinkMarkup().
# As opposed to CodeMirror views, links are inserted directly in the returned text.
sub NonCmLinks {
my ($obj, $formH, $peeraddress) = @_;
my $result = 'nope';
ReportActivity($SHORTNAME);
if (defined($formH->{'text'}) && defined($formH->{'path'})
&& defined($formH->{'first'}) && defined($formH->{'last'}))
{
my $path = lc($formH->{'path'});
#my $dir = lc(DirectoryFromPathTS($formH->{'path'}));
my ($baseName, $ext) = FileNameProperAndExtensionFromFileName($formH->{'path'});
$ext = lc($ext);
my $clientIsRemote = (defined($formH->{'remote'})) ? $formH->{'remote'}: '0';
my $allowEditing = (defined($formH->{'allowEdit'})) ? $formH->{'allowEdit'}: '0';
my $shouldInline = (defined($formH->{'shouldInline'})) ? $formH->{'shouldInline'}: '0';
GetLinksForText($formH, $path, $ext, $clientIsRemote, $allowEditing, $shouldInline, \$result);
}
return($result);
}
sub GetLinksForText {
my ($formH, $dir, $ext, $clientIsRemote, $allowEditing, $shouldInline, $resultR) = @_;
my $text = $formH->{'text'};
#my $firstLineNum = $formH->{'first'};
#my $lastLineNum = $formH->{'last'};
my $serverAddr = ServerAddress();
AddWebAndFileLinksToVisibleLines($text, $dir, $ext, $serverAddr, $server_port,
$clientIsRemote, $allowEditing, $shouldInline, $resultR);
}
# Called in response to the %RequestAction req=autolink&partialpath=...
# Get best full file path matching 'partialpath'. See reverse_filepaths.pm#FullPathInContextNS().
# This is actually called by the Files page, see files.js#openAutoLinkWithPort().
# Note there is no "context" folder for the provided partial path, so a directory name
# will be needed more often. Eg "main.cpp" might be adequate in a log file when linking to
# the closest instance of main.cpp, but when using the File page's Open dialog then something
# more like project51/main.cpp will be needed.
sub FullPathForPartial {
my ($obj, $formH, $peeraddress) = @_;
my $result = 'nope';
if (defined($formH->{'partialpath'}))
{
my $partialPath = $formH->{'partialpath'};
$partialPath =~ s!\\!/!g;
my $contextDir = '';
$result = FullPathInContextNS($partialPath, $contextDir);
if ($result eq '') # Special handling for images
{
if ($partialPath =~ m!\.(\w\w?\w?\w?\w?\w?\w?)(\#|$)!)
{
my $extProper = $1;
if (IsImageExtensionNoPeriod($extProper))
{
my $trimmedCurrentPath = $partialPath;
$trimmedCurrentPath =~ s!^/!!;
if (FileOrDirExistsWide($IMAGES_DIR . $trimmedCurrentPath) == 1)
{
$result = $IMAGES_DIR . $trimmedCurrentPath;
}
elsif ($COMMON_IMAGES_DIR ne '' && FileOrDirExistsWide($COMMON_IMAGES_DIR . $trimmedCurrentPath) == 1)
{
$result = $COMMON_IMAGES_DIR . $trimmedCurrentPath;
}
}
}
}
}
if ($result eq '')
{
$result = 'nope';
}
my $encodedResult = encode_utf8($result);
return($encodedResult);
}
{ ##### AutoLink
my $host;
my $port;
my $clientIsRemote;
my $allowEditing;
my $haveRefToText; # For CodeMirror we get the text not a ref, and this is 0.
my $line; # Full text of a single line being autolinked
my $revLine; # $line reversed (used to search backwards from a file extension)
my $contextDir; # The directory path to the file wanting links
my $len; # Length of $line.
# In non-CodeMirror files there are <mark> tags around Search highlights.
# We need a version of the line of text that's been stripped of <mark> tags
# for spotting links. $revLine can just be stripped of <mark>s since it's
# only used to spot links. Basically, we need to spot links in the stripped
# version of the line but do the replacements in the original line.
my $lineIsStripped; # 1 means <mark> tags have been removed from $strippedLine.
my $strippedLine;
my $strippedLen;
# Replacements for discovered links: For text files where replacement is done directly
# in the text, these replacements are more easily done in reverse order to avoid throwing off
# the start/end. For CodeMirror files, reps are done in the JavaScript (using $linksA).
my @repStr; # Replacement for original text, or overlay text to put over original
my @repLen; # Length of original text to be replaced (not length of repStr)
my @repStartPos; # Start position of replacement in original text
my @repLinkType; # For CodeMirror, 'file', 'web', 'image'
my @linkIsPotentiallyTooLong; # For unquoted links to text headers, we grab 100 chars, which might be too much.
my $longestSourcePath; # Longest linkable path identified in original text
my $bestVerifiedPath; # Best full path corresponding to $longestSourcePath
my $shouldInlineImages; # True = use img element, false = use showhint().
# AddWebAndFileLinksToLine: look for file and web address mentions, turn them into links.
# Does the promised "autolinking", so no quotes etc are needed around file names even if
# the file name contains spaces, and directories in the path are needed only if the file
# name isn't unique and the file being mentioned isn't the "closest" one, where distance
# is measured as the number of directory moves up and down from the referring file to
# the file being mentioned.
# So for example
# mentioning main.cpp is fine if the referring file is proj51/docs/design.txt and the file wanted
# is proj51/src/main.cpp, but if the referring file is somewhere outside the proj51 folder then
# it should be mentioned as eg proj51/main.cpp. See Gloss.txt for more about links.
# One exception, quotes are needed to link to a header within a document. Picking one of my own
# docs as an example, IntraMine Jan 2019.txt becomes a link, but to link to a header within it
# quotes are needed: eg "IntraMine Jan 2019.txt#Editor/Opener links in CodeMirror views".
# OK the quotes aren't really needed, but your resulting links will look better if you use them,
# since without the quotes an arbitrary 100 characters will be grabbed as the potential
# header string, leaving it to the receiving end to figure out which header is meant.
# And that leads to a very long underlined link, which looks a bit ugly.
sub AddWebAndFileLinksToLine {
my ($txtR, $theContextDir, $theHost, $thePort, $theClientIsRemote, $shouldAllowEditing,
$shouldInline, $restrictLinks, $currentLineNumber, $linksA) = @_;
if (ref($txtR) eq 'SCALAR') # REFERENCE to a scalar, so doing text
{
$haveRefToText = 1;
$line = $$txtR;
}
else # not a ref (at least it shouldn't be), so doing CodeMirror
{
$haveRefToText = 0;
$line = $txtR;
}
# Init some of the remaining variables with AutoLink scope.
# (For $revLine and $strippedLine see EvaluateLinkCandidates just below.)
$contextDir = $theContextDir; # Path of directory for file containing the text in $txtR
$len = length($line);
$host = $theHost;
$port = $thePort;
$clientIsRemote = $theClientIsRemote;
$allowEditing = $shouldAllowEditing;
@repStr = ();
@repLen = ();
@repStartPos = ();
@repLinkType = ();
@linkIsPotentiallyTooLong = ();
$shouldInlineImages = $shouldInline;
# Look for all of: single or double quoted text, a potential file extension, or a url.
# Or (added later), a [text](href) with _LB_ for '[', _RP_ for ')' etc as found in POD files.
EvaluateLinkCandidates($restrictLinks);
my $numReps = @repStr;
if ($numReps)
{
if ($haveRefToText)
{
DoTextReps($numReps, $txtR);
} # text
else # CodeMirror
{
DoCodeMirrorReps($numReps, $currentLineNumber, $linksA);
} # CodeMirror
} # $numReps
}
# Look for all of: single or double quoted text, a potential file extension, or a url.
# Or (added later), a [text](href) with _LB_ for '[', _RP_ for ')' etc as found in POD files.
sub EvaluateLinkCandidates {
my ($restrictLinks) = @_;
my $previousEndPos = 0;
my $previousRevEndPos = $len;
my $haveGoodMatch = 0; # check-back distance is not adjusted if there is no good current match.
# Collect positions of quotes and HTML tags (start and end of both start and end tags).
# And <mark> tags, which can interfere with links.
GetTagAndQuotePositions($line);
$lineIsStripped = LineHasMarkTags(); # Stripping <mark> tags happens next.
# If $line has <mark> tags, create a version stripped of those, for spotting links
# without having to use a monstrous regex.
if ($lineIsStripped)
{
$strippedLine = $line;
$strippedLine =~ s!(</?mark[^>]*>)!!g;
$strippedLen = length($strippedLine);
$revLine = scalar reverse($strippedLine);
}
else
{
$strippedLine = $line;
$strippedLen = $len;
$revLine = scalar reverse($line);
}
# while see quotes or a potential file .extension, or http(s)://
# or [text](href) with _LB_ for '[', _RP_ for ')' etc. Those are from POD files only.
# So ok, this is a bit of a beast. Breaking it down:
# ((\[([^\]]+)]\((https?://[^)]+)\) : [displayed name](URL)
# (_LB_.+?_RB__LP_.+?_RP_ : ditto, but encoded, _LB_ == Left Bracket etc, POD files only
# (\"([^"]+)\.\w+([#:][^"]+)?\") : "quoted file specifier", must have file extension
# (\'([^']+)\.\w+([#:][^']+)?\') : 'quoted file specifier', must have file extension
# (\"[^"]+\")|(\'[^']+\') : "directory" | 'directory'
# \.(\w\w?\w?\w?\w?\w?\w?)([#:][A-Za-z0-9_:~-]+)? : unquoted file specifier, must have ext
# ((https?://([^\s)<\"](?\!ttp:))+)) : web link
while ($strippedLine =~ m!((\[([^\]]+)]\((https?://[^)]+)\))|(_LB_.+?_RB__LP_.+?_RP_)|(\"([^"]+)\.\w+([#:][^"]+)?\")|(\'([^']+)\.\w+([#:][^']+)?\')|(\"[^"]+\")|(\'[^']+\')|\.(\w\w?\w?\w?\w?\w?\w?)([#:][A-Za-z0-9_:~-]+)?|((https?://([^\s)<\"](?\!ttp:))+)))!g)
{
my $startPos = $-[0]; # this does include the '.', beginning of entire match
my $endPos = $+[0]; # pos of char after end of entire match
my $captured = $1; # double-quoted chunk, or extension (plus any anchor), or url or [text](href)
my $haveTextHref = (index($captured, '_LB_') == 0);
my $textHref = ($haveTextHref) ? $captured : '';
my $haveMarkdownLink = (defined($3)) ? 1 : 0; # [...](http...)
my $doMarkdownLink = ($haveMarkdownLink && $haveRefToText); # otherwise do just the http link part
my $markdownDisplayText = ($doMarkdownLink) ? $3: '';
my $markdownLink = ($doMarkdownLink) ? $4: '';
my $charBefore = ($startPos > 0) ? substr($strippedLine, $startPos - 1, 1) : '';
my $haveDirSpecifier = 0;
$haveGoodMatch = 0;
# $12, $13: (\"[^"]+\")|(\'[^']+\')
# These are checked for after other quote patterns, and if triggered
# we're dealing with a potential directory specifier.
if (defined($12) || defined($13))
{
$haveDirSpecifier = 1;
}
my $haveQuotation = ((index($captured, '"') == 0) || (index($captured, "'") == 0));
my $badQuotation = 0;
my $insideID = 0;
my $quoteChar = '';
my $hasPeriod = (index($captured, '.') >= 0);
if ($haveQuotation || $haveDirSpecifier)
{
# Check for non-word or BOL before first quote, non-word or EOL after second.
if ($startPos > 0)
{
# Reject if char before first quote is a word char or '='
# (as in class="...", as found in syntax highlighting etc).
# For '=', don't mark as bad if $captured contains a '.'
# (hinting that there might be a file extension in there).
if ($charBefore !~ m!\W|! || ($charBefore eq '=' && !$hasPeriod))
{
$badQuotation = 1;
}
else
{
# Check we aren't inside a generated header id such as
# <h3 id="gloss2html.pl#AddWebAndFileLinksToLine()_refactor"...
if ($haveRefToText)
{
if ($startPos >= 7)
{
if (substr($strippedLine, $startPos - 3, 3) eq "id=")
{
$badQuotation = 1;
$insideID = 1;
}
}
elsif ($startPos >= 5) # Perl line numbers, <td n="1">
{
if (substr($strippedLine, $startPos - 2, 2) eq "n=")
{
$badQuotation = 1;
$insideID = 1;
}
}
}
}
}
if ($endPos < $strippedLen)
{
my $charAfter = substr($strippedLine, $endPos, 1);
# Reject if char after is a word char or '>'
# (as in class="quoted">).
if ($charAfter !~ m!\W! || ($charAfter eq '>' && !$hasPeriod))
{
$badQuotation = 1;
}
}
# Skip quotes that are inside HTML tags.
# Is quote at $startPos at a bad position? Skip.
# Is quote at $endPos at a bad position? Reset to next good position, or skip.
if (!$hasPeriod)
{
if (IsBadQuotePosition($startPos))
{
$badQuotation = 1;
}
elsif (IsBadQuotePosition($endPos-1))
{
my $nextGoodQuotePos = NextGoodQuotePosition($endPos-1);
if ($nextGoodQuotePos >= 0)
{
$endPos = $nextGoodQuotePos + 1;
}
else
{
$badQuotation = 1;
}
}
}
if (!$badQuotation && !$haveDirSpecifier)
{
# Trim quotes and pick up $quoteChar.
$quoteChar = substr($captured, 0, 1);
$captured = substr($captured, 1);
$captured = substr($captured, 0, length($captured) - 1);
}
}
if ($badQuotation)
{
if ($insideID)
{
pos($strippedLine) = $endPos;
}
else
{
pos($strippedLine) = $startPos + 1;
}
}
else
{
my $haveURL = (!$haveTextHref && index($captured, 'http') == 0);
my $anchorWithNum = (!$haveQuotation && !$haveURL && !$haveTextHref && defined($15)) ? $15 : ''; # includes '#'
# Need to sort out actual anchor if we're dealing with a quoted chunk.
if ($haveQuotation && !$haveURL && !$haveTextHref)
{
my $hashPos = index($captured, '#');
if ($hashPos < 0)
{
$hashPos = index($captured, ':');
}
if ($hashPos >= 0)
{
my $quotePos = index($captured, '"', $hashPos);
if ($quotePos != $hashPos + 1)
{
$anchorWithNum = substr($captured, $hashPos); # includes '#'
}
}
}
my $url = $haveURL ? $captured : '';
my $extProper = (!$haveQuotation && !$haveURL && !$haveTextHref && !$haveDirSpecifier) ? substr($captured, 1) : '';
# Get file extension if it's a quoted chunk (many won't have an extension).
if ($haveQuotation && !$haveURL && !$haveTextHref)
{
my $foundExtension = ExtensionBeforeHashOrEnd($captured);
if ($foundExtension ne '')
{
$extProper = $foundExtension;
#$extProper = $1;
}
}
if ($anchorWithNum ne '' && !$haveURL && !$haveQuotation && !$haveTextHref)
{
my $pos = index($extProper, '#');
if ($pos < 0)
{
$pos = index($extProper, ':');
}
$extProper = substr($extProper, 0, $pos);
}
# "$haveTextExtension" includes docx|pdf
my $haveTextExtension = (!$haveURL && !$haveTextHref && IsTextDocxPdfExtensionNoPeriod($extProper));
my $haveImageExtension = $haveTextExtension ? 0 : (!$haveURL && !$haveTextHref && IsImageExtensionNoPeriod($extProper));
my $haveVideoExtension = (!$haveURL && !$haveTextHref && IsVideoExtensionNoPeriod($extProper));
my $haveGoodExtension = ($haveTextExtension || $haveImageExtension || $haveVideoExtension); # else URL or [text](href)
my $linkIsMaybeTooLong = 0;
# Potentially a text file mention or an image.
if ($haveGoodExtension)
{
# Skip video link if client is remote, no way yet to handle that.
if (!($haveVideoExtension && $clientIsRemote))
{
# At the last minute, suppress #anchorWithNum if it has a leading
# space or tab.
if (index($anchorWithNum, ' ') == 1 || index($anchorWithNum, "\t") == 1)
{
$anchorWithNum = '';
}
$haveGoodMatch = RememberTextOrImageOrVideoFileMention($startPos,
$previousRevEndPos, $captured, $extProper,
$haveQuotation, $haveTextExtension, $haveImageExtension,
$haveVideoExtension, $quoteChar, $anchorWithNum, $restrictLinks);
}
} # if known extension
elsif ($haveURL)
{
# Skip first char if quoted.
if ($haveQuotation)
{
++$startPos;
}
RememberUrl($startPos, $haveQuotation, $quoteChar, $url);
$haveGoodMatch = 1;
}
elsif ($haveTextHref)
{
RememberTextHref($startPos, $textHref);
$haveGoodMatch = 1;
}
elsif ($haveDirSpecifier)
{
$haveGoodMatch = RememberDirMention($startPos, $captured);
}
elsif ($haveMarkdownLink)
{
if ($doMarkdownLink)
{
RememberMarkdownLink($startPos, $captured, $markdownDisplayText, $markdownLink);
$haveGoodMatch = 1;
}
else # Handle like $haveURL using $4
{
my $urlStartPos = $startPos + length($3) + 3; # + name + []()
RememberUrl($urlStartPos, 0, '', $4);
$haveGoodMatch = 1;
}
}
if ($haveGoodMatch)
{
$previousEndPos = $endPos;
$previousRevEndPos = $strippedLen - $previousEndPos - 1; # Limit backwards extent of 2nd and subsequent searches.
$haveGoodMatch = 0;
}
elsif (!$haveGoodMatch && $haveQuotation)
{
pos($strippedLine) = $startPos + 1;
}
}
} # while another extension or url matched
}
# Good extension if period followed by up to 7 word chars
# and then end of string or a '#'. I tried to replace this
# with a loop and substr(), but it got too complicated.
# if ($captured =~ m!\.(\w\w?\w?\w?\w?\w?\w?)(\#|$)!)
# $extProper = $1;
sub ExtensionBeforeHashOrEnd {
my ($captured) = @_;
my $result = '';
if ($captured =~ m!\.(\w\w?\w?\w?\w?\w?\w?)([#:]|$)!)
{
$result = $1;
}
return($result);
}
# If we can find a valid file mention looking backwards from $startPos, remember its details
# in @repStr, @repLen etc. We go for the longest valid path.
# We're searching backwards for the file name and directories when an extension has
# been spotted.
sub RememberTextOrImageOrVideoFileMention {
my ($startPos, $previousRevEndPos, $captured, $extProper, $haveQuotation,
$haveTextExtension, $haveImageExtension, $haveVideoExtension, $quoteChar, $anchorWithNum, $restrictLinks) = @_;
my $linkIsMaybeTooLong = 0;
# To allow for spaces in #anchors where file#anchor hasn't been quoted, grab the
# 100 chars following '#' here, and sort it out on the other end when going to
# the anchor. Only for txt files. This looks ugly in the view, alas.
if ($extProper eq 'txt' && !$haveQuotation && $anchorWithNum ne '')
{
my $anchorPosOnLine = $startPos;
$anchorPosOnLine = index($strippedLine, '#', $startPos);
if ($anchorPosOnLine < 0)
{
$anchorPosOnLine = index($strippedLine, ':', $startPos);
}
# TODO if the anchor is a line number just grab the number.
# (But how to tell a line number from a heading?)
$anchorWithNum = substr($strippedLine, $anchorPosOnLine, 100);
# Remove any HTML junk there.
$anchorWithNum =~ s!\</[A-Za-z]+\>!!g;
$anchorWithNum =~ s!\<[A-Za-z]+\>!!g;
$linkIsMaybeTooLong = 1;
}
my $fileTail = '.' . $extProper;
my $fileTailLength = length($fileTail);
my $anchorLength = length($anchorWithNum);
my $periodPlusAfterLength = $fileTailLength + $anchorLength;
$longestSourcePath = '';
$bestVerifiedPath = '';
# Get quotes out of the way first:
# $haveQuotation: set $longestSourcePath, $bestVerifiedPath, $doingQuotedPath
# to match approach just below if quote follows an extension mention.
my $doingQuotedPath = 0;
if ($haveQuotation)
{
my $pathToCheck = $captured;
my $pos = index($pathToCheck, '#');
if ($pos < 0)
{
$pos = index($pathToCheck, ':');
}
if ($pos > 0)
{
$pathToCheck = substr($pathToCheck, 0, $pos);
}
my $verifiedPath = '';
if ($restrictLinks)
{
$verifiedPath = RestrictedFullPath($pathToCheck, $contextDir);
}
else
{
$verifiedPath = FullPathInContextNS($pathToCheck, $contextDir); # reverse_filepaths.pm }
}
if ($verifiedPath ne '')
{
$longestSourcePath = $pathToCheck;
$bestVerifiedPath = $verifiedPath;
$doingQuotedPath = 1;
}
}
my $revPos = $strippedLen - $startPos - 1 + 1; # extra +1 there to skip '.' before the extension proper.
# Extract the substr to search.
my $revStrToSearch = substr($revLine, $revPos, $previousRevEndPos - $revPos + 1);
# Good break points for hunt are [\\/ ], and [*?"<>|\t] end the hunt.
# For image files only, we look in a couple of standard places for just the file name.
# This can sometimes be expensive, but we look at the standard locations only until
# either a slash is seen or a regular mention is found.
my $checkStdImageDirs = ($haveImageExtension || $haveVideoExtension) ? 1 : 0;
my $commonDirForImageName = ''; # Set if image found one of the std image dirs
my $imageName = ''; # for use if image found in one of std image dirs
GetLongestGoodPath($doingQuotedPath, $checkStdImageDirs, $revStrToSearch, $fileTail,
\$imageName, \$commonDirForImageName, $restrictLinks);
my $haveGoodMatch = 0;
if ($longestSourcePath ne '' || $commonDirForImageName ne '')
{
my $linkType = 'file'; # For CodeMirror
my $usingCommonImageLocation = 0;
if ($longestSourcePath eq '')
{
$longestSourcePath = $imageName;
$usingCommonImageLocation = 1;
}
my $repString = '';
my $repLength = length($longestSourcePath) + $anchorLength;
if ($haveQuotation)
{
$repLength += 2; # for the quotes
}
my $repStartPosition = ($haveQuotation) ? $startPos : $startPos - $repLength + $periodPlusAfterLength;
# We are using the "stripped" line here, so recover the start position and length of text
# to be replaced by adding back the <mark> tags, for use at the displayed link text.
($repStartPosition, $repLength) = CorrectedPositionAndLength($repStartPosition, $repLength);
my $displayTextForAnchor = substr($line, $repStartPosition, $repLength);
if ($haveTextExtension)
{
GetTextFileRep($haveQuotation, $quoteChar, $extProper, $longestSourcePath,
$anchorWithNum, $displayTextForAnchor, \$repString);
}
else # image or video extension
{
# For CodeMirror
if ($haveVideoExtension)
{
$linkType = 'video';
}
else
{
$linkType = 'image';
}
GetImageFileRep($haveQuotation, $quoteChar, $usingCommonImageLocation,
$imageName, $displayTextForAnchor, $haveVideoExtension, \$repString);
}
push @repStr, $repString;
push @repLen, $repLength;
push @repStartPos, $repStartPosition;
push @linkIsPotentiallyTooLong, $linkIsMaybeTooLong;
if (!$haveRefToText)
{
push @repLinkType, $linkType;
}
$haveGoodMatch = 1;
}
return($haveGoodMatch);
}
# If $captured text in quotes can be associated with a full directory path
# by reverse_filepaths.pm#BestMatchingFullDirectoryPath(),
# push a link for it into @repStr etc.
sub RememberDirMention {
my ($startPos, $captured) = @_;
my $repLength = length($captured);
# Adjust position and length of URL as displayed to include any <mark> tags that were stripped.
($startPos, $repLength) = CorrectedPositionAndLength($startPos, $repLength);
# Re-get $captured directory for display (including any <mark> elements).
my $displayedDir = substr($line, $startPos, $repLength); # was = $url;
$repLength = length($displayedDir);
my $haveGoodMatch = 0;
my $trimmedDirPath = $captured;
# Trim quotes
$trimmedDirPath = substr($trimmedDirPath, 1);
$trimmedDirPath = substr($trimmedDirPath, 0, -1);
# Change \ to /.
$trimmedDirPath =~ s!\\!/!g;
# Remove any starting or trailing slashes.
$trimmedDirPath =~ s!^/!!;
$trimmedDirPath =~ s!/$!!;
# Lower case
$trimmedDirPath = lc($trimmedDirPath);
my $directoryPath = BestMatchingFullDirectoryPath($trimmedDirPath, $contextDir);
if ($directoryPath ne '')
{
my $linkType = 'directory'; # For CodeMirror
# Show a hint with the dir path.
my $dirHint = " onmouseover=\"showhint('<br>$directoryPath<br> ', this, event, '500px', false);\"";
my $repString = "<a href=\"$directoryPath\" onclick=\"openDirectory(this.href); return false;\"$dirHint>$displayedDir</a>";
push @repStr, $repString;
push @repLen, $repLength;
push @repStartPos, $startPos;
push @linkIsPotentiallyTooLong, 0;
if (!$haveRefToText)
{
push @repLinkType, $linkType;
}
$haveGoodMatch = 1;
}
return($haveGoodMatch);
}
# Keep looking backwards a word at a time, calling FullPathInContextNS() and noting
# $bestVerifiedPath and $longestSourcePath until beginning of line or double quote is seen.
# The longest good path if any is left in $longestSourcePath.
sub GetLongestGoodPath {