-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatasets.html
More file actions
1805 lines (1574 loc) Β· 294 KB
/
datasets.html
File metadata and controls
1805 lines (1574 loc) Β· 294 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Eye-Tracking-While-Reading Datasets</title>
<link rel="stylesheet" href="https://cdn.datatables.net/1.13.7/css/jquery.dataTables.min.css">
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1600px;
margin: 0 auto;
background: white;
border-radius: 10px;
box-shadow: 0 10px 40px rgba(0, 0, 0, 0.1);
padding: 30px;
}
h1 {
color: #2c3e50;
margin-bottom: 10px;
font-size: 2.5em;
}
.subtitle {
color: #7f8c8d;
margin-bottom: 30px;
font-size: 1.1em;
}
.controls {
background: #f8f9fa;
padding: 20px;
border-radius: 8px;
margin-bottom: 30px;
border-left: 4px solid #3498db;
}
.controls h3 {
color: #2c3e50;
margin-bottom: 15px;
font-size: 1.1em;
}
.filter-group {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 15px;
margin-bottom: 20px;
}
.filter-item {
display: flex;
flex-direction: column;
}
.filter-group-wrapper {
margin-bottom: 30px;
padding-bottom: 20px;
border-bottom: 1px solid #ecf0f1;
}
.filter-group-wrapper:last-child {
border-bottom: none;
margin-bottom: 0;
padding-bottom: 0;
}
.filter-group-header {
font-weight: 700;
color: #2c3e50;
font-size: 1.1em;
margin-bottom: 15px;
padding-bottom: 8px;
border-bottom: 2px solid #3498db;
display: block;
width: 100%;
}
/* Category-specific underline colors matching table header colors */
.filter-header-general {
border-bottom-color: #5a8ab8 !important;
}
.filter-header-participants {
border-bottom-color: #8b5a9e !important;
}
.filter-header-stimuli {
border-bottom-color: #d47f3f !important;
}
.filter-header-available-data {
border-bottom-color: #6b9b6f !important;
}
.filter-header-lab-setup {
border-bottom-color: #b85580 !important;
}
.filter-header-stimulus-layout {
border-bottom-color: #d4b648 !important;
}
.filter-group-items {
display: flex;
flex-direction: column;
gap: 15px;
}
.filter-item {
display: flex;
flex-direction: column;
}
.filter-item label {
font-weight: 600;
color: #2c3e50;
margin-bottom: 8px;
font-size: 0.95em;
}
.filter-item input,
.filter-item select {
padding: 10px 12px;
border: 1px solid #bdc3c7;
border-radius: 5px;
font-size: 0.95em;
transition: border-color 0.3s;
}
.filter-item input:focus,
.filter-item select:focus {
outline: none;
border-color: #3498db;
box-shadow: 0 0 0 3px rgba(52, 152, 219, 0.1);
}
.filter-item input[type="text"],
.filter-item input[type="number"] {
width: 100%;
}
.range-inputs {
display: flex;
gap: 10px;
align-items: center;
}
.range-inputs input {
flex: 1;
}
.unknown-option {
margin-top: 10px;
padding-top: 10px;
border-top: 1px solid #ecf0f1;
display: flex;
align-items: center;
gap: 8px;
}
.unknown-option input[type="checkbox"] {
cursor: pointer;
}
.unknown-option label {
margin: 0;
cursor: pointer;
font-size: 0.9em;
font-weight: 400;
}
.button-group {
display: flex;
gap: 10px;
flex-wrap: wrap;
}
button {
padding: 10px 20px;
border: none;
border-radius: 5px;
font-size: 0.95em;
font-weight: 600;
cursor: pointer;
transition: all 0.3s;
}
.btn-primary {
background: #3498db;
color: white;
}
.btn-primary:hover {
background: #2980b9;
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(52, 152, 219, 0.3);
}
.btn-secondary {
background: #95a5a6;
color: white;
}
.btn-secondary:hover {
background: #7f8c8d;
}
.btn-sm {
padding: 6px 12px;
font-size: 0.85em;
}
#tableContainer {
overflow-x: scroll;
overflow-y: auto;
border: 1px solid #ecf0f1;
border-radius: 8px;
margin-top: 20px;
max-height: 70vh;
position: relative;
}
#dataTable {
width: 100%;
border-collapse: collapse;
min-width: 900px;
}
/* Frozen first column styles */
#tableContainer.frozen-column #dataTable th:first-child,
#tableContainer.frozen-column #dataTable td:first-child {
position: sticky;
left: 0;
z-index: 10;
background: white;
}
#tableContainer.frozen-column #dataTable thead th:first-child {
background: #34495e;
z-index: 11;
}
#tableContainer.frozen-column #dataTable tbody tr:nth-child(even) td:first-child {
background: #f8f9fa;
}
#tableContainer.frozen-column #dataTable tbody tr:hover td:first-child {
background: #f8f9fa;
}
#tableContainer.frozen-column #dataTable th:first-child,
#tableContainer.frozen-column #dataTable td:first-child {
box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
}
#dataTable thead {
background: #34495e;
color: white;
position: sticky;
top: 0;
z-index: 2;
}
#dataTable th {
padding: 15px;
text-align: left;
font-weight: 600;
border-bottom: none;
white-space: nowrap;
position: relative;
background: #34495e;
cursor: pointer;
}
/* Tooltip styling for column headers with descriptions */
#dataTable th[title] {
cursor: help;
}
#dataTable th[title]:hover::after {
content: attr(title);
position: absolute;
top: 100%;
left: 50%;
transform: translateX(-50%);
margin-top: 8px;
padding: 10px 14px;
background: #2c3e50;
color: #ecf0f1;
border: 1px solid #34495e;
border-radius: 6px;
font-size: 0.85em;
font-weight: normal;
white-space: normal;
width: max-content;
max-width: 320px;
z-index: 1000;
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.4);
line-height: 1.5;
text-align: left;
pointer-events: none;
}
#dataTable th[title]:hover::before {
content: '';
position: absolute;
top: 100%;
left: 50%;
transform: translateX(-50%);
margin-top: 2px;
border: 6px solid transparent;
border-bottom-color: #2c3e50;
z-index: 1001;
pointer-events: none;
}
}
#dataTable th .sort-indicator {
display: inline-block;
margin-left: 8px;
font-size: 0.8em;
opacity: 0.5;
}
#dataTable td {
padding: 12px 15px;
border-bottom: 1px solid #ecf0f1;
}
#dataTable tbody tr {
transition: background-color 0.2s;
}
#dataTable tbody tr:hover {
background-color: #f8f9fa;
}
#dataTable tbody tr:nth-child(even) {
background-color: #f8f9fa;
}
.info-bar {
display: flex;
justify-content: space-between;
align-items: center;
margin-top: 20px;
padding: 15px;
background: #ecf0f1;
border-radius: 5px;
font-size: 0.95em;
color: #2c3e50;
}
.cell-content {
max-width: 400px;
overflow: hidden;
text-overflow: ellipsis;
word-wrap: break-word;
}
a {
color: #3498db;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
.tooltip {
position: relative;
border-bottom: 1px dotted #7f8c8d;
cursor: help;
}
.empty-message {
text-align: center;
padding: 40px;
color: #7f8c8d;
font-size: 1.1em;
}
.type-badge {
display: inline-block;
padding: 3px 8px;
border-radius: 3px;
font-size: 0.75em;
font-weight: 600;
margin-right: 5px;
}
.type-text { background: #e8f4f8; color: #2c3e50; }
.type-numeric { background: #fef5e7; color: #7d6608; }
.type-categorical { background: #f4ecf7; color: #633974; }
.type-list { background: #ebf5fb; color: #1b4965; }
.type-boolean { background: #eafaf1; color: #0b5345; }
.type-url { background: #fadbd8; color: #922b21; }
/* Category colors - very light/subtle tones */
.category-general { background-color: #f0f7fb !important; }
.category-participants { background-color: #f7f3f9 !important; }
.category-stimuli { background-color: #fef6ec !important; }
.category-available-data { background-color: #f2f8f0 !important; }
.category-lab-setup { background-color: #fbf2f6 !important; }
.category-stimulus-layout { background-color: #fdf9e3 !important; }
/* Muted header colors */
#dataTable thead .category-general { background-color: #5a8ab8 !important; }
#dataTable thead .category-participants { background-color: #8b5a9e !important; }
#dataTable thead .category-stimuli { background-color: #d47f3f !important; }
#dataTable thead .category-available-data { background-color: #6b9b6f !important; }
#dataTable thead .category-lab-setup { background-color: #b85580 !important; }
#dataTable thead .category-stimulus-layout { background-color: #d4b648 !important; }
/* Category header row styles - match cell colors */
.category-header-row th {
color: #2c3e50;
font-weight: 600;
font-size: 0.9em;
text-align: center;
padding: 1px 8px;
border-right: 1px solid #e5e7e9;
line-height: 0.9;
}
.category-header-row th:last-child {
border-right: none;
}
/* Apply category colors to header row */
.category-header-row .header-general { background-color: #f0f7fb !important; }
.category-header-row .header-participants { background-color: #f7f3f9 !important; }
.category-header-row .header-stimuli { background-color: #fef6ec !important; }
.category-header-row .header-available-data { background-color: #f2f8f0 !important; }
.category-header-row .header-lab-setup { background-color: #fbf2f6 !important; }
.category-header-row .header-stimulus-layout { background-color: #fdf9e3 !important; }
.language-filter {
display: flex;
flex-direction: column;
}
.language-options {
display: flex;
flex-direction: column;
gap: 8px;
max-height: 250px;
overflow-y: auto;
padding: 10px;
border: 1px solid #bdc3c7;
border-radius: 5px;
background: white;
}
.language-option {
display: flex;
align-items: center;
gap: 8px;
}
.language-option input[type="checkbox"] {
cursor: pointer;
width: 18px;
height: 18px;
}
.language-option label {
cursor: pointer;
margin: 0;
font-weight: 400;
font-size: 0.9em;
}
.language-option input[type="checkbox"]:checked + label {
font-weight: 600;
color: #3498db;
}
.stimulus-type-options {
display: flex;
flex-direction: column;
gap: 8px;
max-height: 250px;
overflow-y: auto;
padding: 10px;
border: 1px solid #bdc3c7;
border-radius: 5px;
background: white;
}
.stimulus-option {
display: flex;
align-items: center;
gap: 8px;
}
.stimulus-option input[type="checkbox"] {
cursor: pointer;
width: 18px;
height: 18px;
}
.stimulus-option label {
cursor: pointer;
margin: 0;
font-weight: 400;
font-size: 0.9em;
}
.stimulus-option input[type="checkbox"]:checked + label {
font-weight: 600;
color: #3498db;
}
.stimulus-type-note {
margin-top: 12px;
padding: 10px;
background: #fff3cd;
border-left: 4px solid #ffc107;
border-radius: 3px;
font-size: 0.85em;
color: #856404;
line-height: 1.4;
}
.selected-languages {
margin-top: 8px;
padding: 8px;
background: #ecf0f1;
border-radius: 4px;
font-size: 0.85em;
min-height: 30px;
display: flex;
flex-wrap: wrap;
gap: 5px;
align-items: center;
}
.language-tag {
background: #3498db;
color: white;
padding: 3px 8px;
border-radius: 3px;
font-size: 0.85em;
font-weight: 600;
}
.controls.collapsed {
max-height: 60px;
overflow: hidden;
}
.controls.collapsed .filter-group {
display: none;
}
.controls.collapsed .button-group {
display: none;
}
.toggle-filters {
display: flex;
align-items: center;
gap: 10px;
cursor: pointer;
user-select: none;
margin-bottom: 15px;
}
.toggle-filters:hover h3 {
color: #3498db;
}
.toggle-filters h3 {
margin-bottom: 0;
}
.toggle-icon {
font-size: 1.2em;
transition: transform 0.3s;
color: #3498db;
}
.controls.collapsed .toggle-icon {
transform: rotate(-90deg);
}
.toggle-filters-bottom {
display: flex;
align-items: center;
gap: 8px;
cursor: pointer;
user-select: none;
font-size: 0.95em;
color: #3498db;
padding: 10px 15px;
border-radius: 5px;
transition: background 0.3s;
}
.toggle-filters-bottom:hover {
color: #2980b9;
background: rgba(52, 152, 219, 0.1);
}
.toggle-icon-bottom {
font-size: 1em;
transition: transform 0.3s;
}
.controls.collapsed .toggle-icon-bottom {
transform: rotate(180deg);
}
/* Fullscreen styles */
.fullscreen-active {
/* keep default colors; only adjust sizing */
}
.fullscreen-active #tableContainer {
height: 100vh;
max-height: 100vh;
}
.help-icon {
display: inline-block;
width: 16px;
height: 16px;
background: #3498db;
color: white;
border-radius: 50%;
text-align: center;
line-height: 16px;
font-size: 0.9em;
margin-left: 4px;
cursor: help;
position: relative;
top: -2px;
}
.help-icon:hover {
background: #2980b9;
}
</style>
</head>
<body>
<div class="container">
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 20px;">
<div>
<h1>ππ Eye-Tracking-While-Reading Datasets</h1>
<p class="subtitle">The table presents a list of eye-tracking-while-reading datasets.</p>
<p style="color:#2c3e50; font-size:0.95em; margin-top:6px;">Tip: hover over column headers in the table to see their descriptions.</p>
</div>
<div style="display:flex; gap:10px; align-items:center;">
<a href="https://www.cl.uzh.ch/en/research-groups/digital-linguistics/resources/dataset-review.html" target="_blank" style="display: inline-block; padding: 10px 20px; background: #3498db; color: white; border-radius: 5px; text-decoration: none; font-weight: 600; transition: background 0.3s;">
More Information β
</a>
<button id="citeButton" class="btn-secondary" onclick="openCite()" title="Cite this table">Cite this table</button>
</div>
</div>
<div id="recentlyAddedBanner" style="background: linear-gradient(135deg, #fff3cd 0%, #ffe599 100%); border-left: 4px solid #ffc107; padding: 12px 18px; margin-bottom: 20px; border-radius: 6px; display: flex; align-items: center; justify-content: space-between;">
<div style="display: flex; align-items: center; gap: 12px;">
<span style="font-size: 1.2em;">β¨</span>
<div>
<strong style="color: #856404;">Recently Added:</strong>
<span style="color: #856404; margin-left: 8px;">Cuentos</span>
</div>
</div>
<button id="closeBannerBtn" onclick="closeBanner()" style="background: none; border: none; font-size: 1.5em; cursor: pointer; color: #856404; padding: 0; margin: 0; display: flex; align-items: center; justify-content: center; width: 24px; height: 24px;">β</button>
</div>
<div class="controls">
<div class="toggle-filters" onclick="toggleFilters()">
<span class="toggle-icon">βΌ</span>
<h3>Filters</h3>
</div>
<div id="filterContainer" class="filter-group"></div>
<div class="button-group">
<div class="toggle-filters-bottom" onclick="toggleFilters()">
<span class="toggle-icon-bottom">β²</span>
<span>Close</span>
</div>
<button class="btn-primary" onclick="applyFilters()">Apply Filters</button>
<button class="btn-secondary" onclick="clearFilters()">Clear All Filters</button>
<button class="btn-secondary" id="freezeColumnToggle" onclick="toggleFreezeColumn()">π Freeze Name Column</button>
<button class="btn-secondary" id="fullscreenToggle" onclick="toggleFullscreen()">Fullscreen</button>
</div>
</div>
<div id="tableContainer">
<table id="dataTable" class="display">
<thead>
</thead>
<tbody id="tableBody"></tbody>
</table>
</div>
<div class="info-bar">
<span>Showing <strong id="rowCount">0</strong> of <strong id="totalCount">0</strong> datasets</span>
<span id="filterStatus" style="color: #3498db;"></span>
</div>
</div>
<!-- Cite modal -->
<div id="citeModal" style="display:none; position:fixed; inset:0; background:rgba(0,0,0,0.5); align-items:center; justify-content:center; z-index:2000;">
<div style="background:white; padding:20px; border-radius:8px; max-width:800px; width:90%; box-shadow:0 10px 40px rgba(0,0,0,0.3);">
<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:12px;">
<h3 id="modalTitle" style="margin:0;">Cite this table</h3>
<button onclick="closeCite()" style="background:transparent;border:none;font-size:1.2em;cursor:pointer;">β</button>
</div>
<pre id="bibtexPre" style="white-space:pre-wrap; word-break:break-word; font-family:monospace; background:#f6f7f8; padding:12px; border-radius:6px; max-height:400px; overflow:auto;"></pre>
<div style="display:flex; gap:10px; justify-content:flex-end; margin-top:12px;">
<button class="btn-primary btn-sm" onclick="copyBibtex()">Copy BibTeX</button>
<a href="#" onclick="closeCite();return false;" class="btn-secondary btn-sm" style="text-decoration:none; padding:8px 12px; display:inline-block;">Close</a>
</div>
</div>
</div>
<script>
// Embedded data and metadata
const DATA = [{"Name": "ADEGBTS", "Data accessibility": "Free", "pymovements": null, "Full name": "A Dataset for Exploring Gaze Behaviors in Text Summarization", "Dataset characteristics": "Participants were asked to summarize each text after reading.", "# Participants": "50", "Age range": null, "Age mean\u00b1SD": "23.1\u00b11.1", "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "All participants were asked to fill out a questionnaire to count participants\u2019 familiarity with different categories of arti- cles", "Reading instructions": null, "# Items": "100.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": "502", "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Public news websites, each beloning to one out of ten categories, simplified Chinese script, Mandarin (infered from stimulus example, information not explicitly provided).", "Stimulus language": "Mandarin (simp. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "no", "Scripts": null, "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "Tobii EyeTracking 4C", "Mount": null, "Sampling frequency (Hz)": "100", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/MMLabTHUSZ/ADEGBTS/tree/master"], ["paper", "https://dl.acm.org/doi/pdf/10.1145/3339825.3394928"]], "_bibtex": "@inproceedings{yi2020gaze-sum, title = {A dataset for exploring gaze behaviors in text summarization}, booktitle = {Proceedings of the 11th {{ACM Multimedia Systems Conference}}}, author = {Yi, Kun and Guo, Yu and Jiang, Weifeng and Wang, Zhi and Sun, Lifeng}, year = 2020, series = {{{MMSys}} '20}, pages = {243--248}, publisher = {Association for Computing Machinery}, doi = {10.1145/3339825.3394928}, isbn = {978-1-4503-6845-2}, keywords = {dataset,gaze behaviors,personalized text summarization,text summarization} }"}, {"Name": "ASD Data", "Data accessibility": "Not available", "pymovements": "No", "Full name": null, "Dataset characteristics": null, "# Participants": "109", "Age range": null, "Age mean\u00b1SD": "33.73\u00b18.36", "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": "56 participants are diagnosed with ASD; the others are a control group", "Reading instructions": null, "# Items": "27.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "4212", "Stimulus description": "Texts from various sources", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "no", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "Gazepoint GP3", "Mount": null, "Sampling frequency (Hz)": "60", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["paper", "https://wlv.openrepository.com/items/cd988076-568b-4ecf-82b2-b07d1ccc93b5"]], "_bibtex": "@phdthesis{yaneve2016asd, title = {Assessing text and web accessibility for people with autism spectrum disorder}, author = {Yaneva, Victoria}, year = 2016, school = {University of Wolverhampton} }"}, {"Name": "Alzheimer", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "4 tasks: pupil fixation and description of a pleasant past experience, picture description and paragraph reading, also includes audio and video transcripts", "# Participants": "162", "Age range": "53-96", "Age mean\u00b1SD": "68.78", "Native language": "English", "Inclusion criteria": "Fluent in English, 50 years plus. Clinic patients had diagnoses of mild-moderate AD, mild cognitive impairment (MCI), or subjective memory complaints (SMC)", "Exclusion criteria": "Psychiatric disease or other neurological condition apart from Alzheimer", "Other characteristics": "83 participants were part of a control group", "Reading instructions": "\u201cYou will be shown a paragraph on the screen. Please read the paragraph out loud.\u201d", "# Items": "1.0", "Items per subject": "1", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Standardized paragraph from the International Reading Speed Texts (IReST)", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "International Reading Speed Texts (IReST)", "Comprehension questions": "no", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "no", "Scripts": "unclear", "Data license": "CC BY 4.0", "Other available data": null, "Notes on availability": "From the paper: \"The datasets presented in this article are not readily available because study participants have not consented to the distribution of their personally identifying data (such as audio and video recordings). Requests to access the datasets should be directed to TF, http://thalia.field@ubc.ca.\"", "Eye-tracker": null, "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["paper", "https://www.frontiersin.org/journals/human-neuroscience/articles/10.3389/fnhum.2021.716670/full"]], "_bibtex": "@article{jang_classification_2021, title = {Classification of {{Alzheimer}}'s {{Disease}} leveraging multi-task machine learning analysis of speech and eye-movement data}, author = {Jang, Hyeju and Soroski, Thomas and Rizzo, Matteo and Barral, Oswald and Harisinghani, Anuj and {Newton-Mason}, Sally and Granby, Saffrin and {Stutz da Cunha Vasco}, Thiago Monnerat and Lewis, Caitlin and Tutt, Pavan and Carenini, Giuseppe and Conati, Cristina and Field, Thalia S.}, year = 2021, journal = {Frontiers in Human Neuroscience}, volume = {15}, pages = {716670}, issn = {1662-5161}, doi = {10.3389/fnhum.2021.716670}, urldate = {2025-07-16}, langid = {english}, keywords = {Alzheimer's disease,Eye-tracking,Language,machine learning,Mild Cognitive Impairment,multimodal,Speech} }"}, {"Name": "BCCWJ", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": null, "# Participants": "24", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Japanese", "Inclusion criteria": "Native Speaker", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": null, "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "\"The Balanced Corpus of Contemporary Written Japanese\" (BCCWJ) newspaper samples. ", "Stimulus language": "Japanese", "Stimulus language family": "Japonic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": null, "Stimulus source": "https://clrd.ninjal.ac.jp/bccwj/en/", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": "CC BY-NC-SA 3.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": null, "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": "--", "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/masayu-a/BCCWJ-EyeTrack"]], "_bibtex": "@misc{BCCWJ-eye-tracking, title = {{{BCCWJ-EyeTrack}}}, author = {Asahara, Masayuki}, year = 2018, publisher = {{National Institute for Japanese Language and Linguistics}}, url = {https://github.com/masayu-a/BCCWJ-EyeTrack} }"}, {"Name": "BSC", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Beijing Sentence Corpus", "Dataset characteristics": "Includes human predictability norms.", "# Participants": "60", "Age range": null, "Age mean\u00b1SD": "22.0\u00b12.6", "Native language": "Mandarin", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "150.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Sentences from People's Daily newspaper; strong political tones have been removed.", "Stimulus language": "Mandarin (simp. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "no", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink II", "Mount": "Chin rest", "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": "43", "Eye-to-camera distance (cm)": null, "Monitor": "19-inch ViewSonic G90f monitor (frame rate 100 Hz)", "Resolution": "1024 \u00d7 768", "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": "0.75", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/vr3k8/"], ["paper", "https://link.springer.com/article/10.3758/s13428-021-01730-2"]], "_bibtex": "@article{Pan2021BSC, title = {The {{Beijing Sentence Corpus}}: {{A Chinese}} sentence corpus with eye movement data and predictability norms}, author = {Pan, Jinger and Yan, Ming and Richter, Eike M. and Shu, Hua and Kliegl, Reinhold}, year = 2021, journal = {Behavior Research Methods}, volume = {54}, number = {4}, pages = {1989--2000}, publisher = {{Springer Science and Business Media LLC}}, issn = {1554-3528}, doi = {10.3758/s13428-021-01730-2} }"}, {"Name": "BSC Font Size Manipulation", "Data accessibility": "Free", "pymovements": null, "Full name": "Beijing Sentence Corpus font size manipulation", "Dataset characteristics": "Subjects read sentences from the Beijing Sentences Corpus while the font size was manipulated", "# Participants": "48", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Mandarin", "Inclusion criteria": "Native Speaker", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "120.0", "Items per subject": "120", "Mean\u00b1SD words per item": "9.7\u00b11", "Mean\u00b1SD chars per item": "17.9\u00b11.2", "Mean\u00b1SD chars per word": "1.85", "Mean\u00b1SD chars seen per participant": "2137", "Mean\u00b1SD words seen per participant": "1158", "Total # chars all items": "2317", "Total # words all items": "1158", "Stimulus description": "Sentences were selected from a local newspaper (see BSC), and some of them were slightly edited to avoid possible syntactic, semantic or word-boundary ambiguities. By manipulating the font sizes while keeping the viewing distance constant among conditions, four angular size conditions were adopted.", "Stimulus language": "Mandarin", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": "Beijing Sentences Corpus", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "unclear", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink II", "Mount": "Chin rest", "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": "50", "Eye-to-camera distance (cm)": null, "Monitor": "21-inch (c. 53.3 cm) Dell P1130 CRT monitor (frame rate 100 Hz)", "Resolution": "1280 \u00d7 1024 ", "Text presentation": null, "Font": null, "Font size": "12, 20, 40, 60", "Monospaced": null, "Character per visual angle": "2.5, 1.43, 0.71, 0.47", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2011-app-shu-zhou-yan-kliegl"], ["paper", "https://link.springer.com/article/10.3758/s13414-010-0029-y"]], "_bibtex": "@article{shu2011FontSizeModulates, title = {Font size modulates saccade-target selection in {{Chinese}} reading}, author = {Shu, Hua and Zhou, Wei and Yan, Ming and Kliegl, Reinhold}, year = 2011, journal = {Attention, Perception, \\& Psychophysics}, volume = {73}, number = {2}, pages = {482--490}, issn = {1943-393X}, doi = {10.3758/s13414-010-0029-y}, urldate = {2025-12-20}, langid = {english}, keywords = {Chinese,Eye movement,Font size,Saccade} }"}, {"Name": "BSC II", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Beijing Sentence Corpus II", "Dataset characteristics": null, "# Participants": "70", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Mandarin", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "150.0", "Items per subject": "150", "Mean\u00b1SD words per item": "11.2\u00b11.6", "Mean\u00b1SD chars per item": "21\u00b12.5", "Mean\u00b1SD chars per word": "1.9", "Mean\u00b1SD chars seen per participant": "3150\u00b10.0", "Mean\u00b1SD words seen per participant": "1680\u00b10.0", "Total # chars all items": "3150", "Total # words all items": "1680", "Stimulus description": null, "Stimulus language": "Mandarin (trad. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": "BSC corpus", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "no", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": null, "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/j9xt4/"], ["paper", "https://link.springer.com/article/10.3758/s13428-024-02523-z"]], "_bibtex": "@article{Yan2025, title = {The {{Beijing Sentence Corpus II}}: {{A}} cross-script comparison between traditional and simplified {{Chinese}} sentence reading}, author = {Yan, Ming and Pan, Jinger and Kliegl, Reinhold}, year = 2025, journal = {Behavior Research Methods}, volume = {57}, number = {2}, pages = {60}, publisher = {{Springer Science and Business Media LLC}}, issn = {1554-3528}, doi = {10.3758/s13428-024-02523-z} }"}, {"Name": "CELER", "Data accessibility": "Restricted", "pymovements": "No", "Full name": "Corpus of Eye Movements in L1 and L2 English Reading", "Dataset characteristics": "Half of the stimuli are uniquely read by one participant.", "# Participants": "365", "Age range": null, "Age mean\u00b1SD": "27.3\u00b16.8", "Native language": "English, Arabic, Chinese, Japanese, Spanish, Portuguese", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "69 English L1 and 296 English L2 (5 different language backgrounds)", "Reading instructions": null, "# Items": "156.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "900", "Stimulus description": "Sentences from the Wall Street Journal", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": "Syntactic annotations", "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "no", "Scripts": "unclear", "Data license": "CC BY 4.0", "Other available data": null, "Notes on availability": "Due to licensing problems, the data can only be obtained by following the procedure in this README", "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/berzak/celer?tab=readme-ov-file"], ["paper", "https://direct.mit.edu/opmi/article/doi/10.1162/opmi_a_00054/110717/CELER-A-365-Participant-Corpus-of-Eye-Movements-in"]], "_bibtex": "@article{celer2022, title = {{{CELER}}: {{A}} 365-participant corpus of eye movements in {{L1}} and {{L2 English}} reading}, author = {Berzak, Yevgeni and Nakamura, Chie and Smith, Amelia and Weng, Emily and Katz, Boris and Flynn, Suzanne and Levy, Roger}, year = 2022, journal = {Open Mind}, pages = {1--10}, issn = {2470-2986}, doi = {10.1162/opmi_a_00054} }"}, {"Name": "CFILT Coreference", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "Participants read while annotation coreferences in the text", "# Participants": "14", "Age range": "47-50 (2 expert linguists); 20-30 (12 post-graduates)", "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Non-native speakers of the stimulus language", "Reading instructions": null, "# Items": "22.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Less than 10 sentences per text", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://www.cfilt.iitb.ac.in/cognitive-nlp/dataset3.html"], ["paper", "https://aclanthology.org/W16-1904/"]], "_bibtex": "@inproceedings{cheri-etal-2016-coreference, title = {Leveraging annotators' gaze behaviour for coreference resolution}, booktitle = {Proceedings of the 7th {{Workshop}} on {{Cognitive Aspects}} of {{Computational Language Learning}}}, author = {Cheri, Joe and Mishra, Abhijit and Bhattacharyya, Pushpak}, year = 2016, pages = {22--26}, publisher = {Association for Computational Linguistics}, doi = {10.18653/v1/W16-1904} }"}, {"Name": "CFILT Essay Grading", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "Participants graded the read essays", "# Participants": "8", "Age range": null, "Age mean\u00b1SD": "25", "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Fluent English speakers", "Reading instructions": null, "# Items": "48.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Essays", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://www.cfilt.iitb.ac.in/cognitive-nlp/dataseteg2.html"], ["paper", "https://aclanthology.org/2020.aacl-main.86/"]], "_bibtex": "@inproceedings{mathias-etal-2020-happy, title = {Happy are those who grade without seeing: {{A}} multi-task learning approach to grade essays using gaze behaviour}, booktitle = {Proceedings of the 1st {{Conference}} of the {{Asia-Pacific Chapter}} of the {{Association}} for {{Computational Linguistics}} and the 10th {{International Joint Conference}} on {{Natural Language Processing}}}, author = {Mathias, Sandeep and Murthy, Rudra and Kanojia, Diptesh and Mishra, Abhijit and Bhattacharyya, Pushpak}, year = 2020, pages = {858--872}, publisher = {Association for Computational Linguistics} }"}, {"Name": "CFILT Sarcasm", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "Participants were asked to label the sentences as either positive or negative.", "# Participants": "7", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Non-native speakers", "Reading instructions": null, "# Items": "1000.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "350 are labeled as sarcastic and 650 as non-sarcastic", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://www.cfilt.iitb.ac.in/cognitive-nlp/dataset2.html"], ["paper", "https://ojs.aaai.org/index.php/AAAI/article/view/9884"]], "_bibtex": "@article{Mishra-Kanojia-Bhattacharyya-2016, title = {Predicting readers' sarcasm understandability by modeling gaze behavior}, author = {Mishra, Abhijit and Kanojia, Diptesh and Bhattacharyya, Pushpak}, year = 2016, journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, volume = {30}, number = {1}, pages = {1--7}, doi = {10.1609/aaai.v30i1.9884} }"}, {"Name": "CFILT Scanpath", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": null, "# Participants": "16", "Age range": "47-50 (3 expert linguists); 20-30 (13 post-graduates)", "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Non-native speakers of the stimulus language", "Reading instructions": null, "# Items": "32.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Paragraphs from simple and standard Wikipedia", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://www.cfilt.iitb.ac.in/cognitive-nlp/dataset4.html"], ["paper", "https://ojs.aaai.org/index.php/AAAI/article/view/11159"]], "_bibtex": "@article{mishra2017scanpath, title = {Scanpath complexity: {{Modeling}} reading effort using gaze information}, author = {Mishra, Abhijit and Kanojia, Diptesh and Nagar, Seema and Dey, Kuntal and Bhattacharyya, Pushpak}, year = 2017, journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, volume = {31}, number = {1}, pages = {1--8}, doi = {10.1609/aaai.v31i1.11159} }"}, {"Name": "CFILT Sentiment", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "Participants were asked to label the sentiment of the sentences as either positive, negative or objective.", "# Participants": "5", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "1059.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Movie reviews or twitter posts", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "Tobii TX 300", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://www.cfilt.iitb.ac.in/cognitive-nlp/dataset1.html"], ["paper", "https://aclanthology.org/P14-2007/"]], "_bibtex": "@inproceedings{joshi-etal-2014-measuring, title = {Measuring sentiment annotation complexity of text}, booktitle = {Proceedings of the 52nd {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}} ({{Volume}} 2: {{Short Papers}})}, author = {Joshi, Aditya and Mishra, Abhijit and Senthamilselvan, Nivvedan and Bhattacharyya, Pushpak}, year = 2014, pages = {36--41}, publisher = {Association for Computational Linguistics}, doi = {10.3115/v1/P14-2007} }"}, {"Name": "CFILT Text Quality", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "Participants annotated the text quality of each text based on three given properties (organization, coherence and cohesion)", "# Participants": "20", "Age range": "20-25", "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Fluent English speakers", "Reading instructions": null, "# Items": "30.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Text from different sources", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://www.cfilt.iitb.ac.in/cognitive-nlp/dataseteg.html"], ["paper", "https://aclanthology.org/P18-1219/"]], "_bibtex": "@inproceedings{mathias-etal-2018-eyes, title = {Eyes are the windows to the soul: {{Predicting}} the rating of text quality using gaze behaviour}, booktitle = {Proceedings of the 56th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}} ({{Volume}} 1: {{Long Papers}})}, author = {Mathias, Sandeep and Kanojia, Diptesh and Patel, Kevin and Agrawal, Samarth and Mishra, Abhijit and Bhattacharyya, Pushpak}, year = 2018, pages = {2352--2362}, publisher = {Association for Computational Linguistics}, doi = {10.18653/v1/P18-1219} }"}, {"Name": "Children EEG", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "Combined EGG and eye movements tracking, 2&3 grade children. See also Dyslexic Children EEG.", "# Participants": "25", "Age range": "8-9", "Age mean\u00b1SD": null, "Native language": "Serbian", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "1.0", "Items per subject": "1", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "The story \u201cSt Sava and the villager without luck\u201d was divided into 13 paragraphs/slides, so the text on each slide was kept in the original order but in different colours", "Stimulus language": "Serbian", "Stimulus language family": "Slavic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": " \u201cSt Sava and the villager without luck\u201d", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": "Data of this research is available upon request via corresponding author.", "Eye-tracker": "SMI RED-m", "Mount": "Remote, chin rest", "Sampling frequency (Hz)": "120", "Eye-to-screen distance (cm)": "57 to chin rest", "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": "Black", "Spacing": null, "Background color": "White, red, blue, yellow, orange, purple, turquoise", "_links": [["paper", "https://www.mdpi.com/1424-8220/21/2/406"]], "_bibtex": "@article{jakovljevic2021SensorHubDetecting, title = {The sensor hub for detecting the developmental characteristics in reading in children on a white vs. colored background/colored overlays}, author = {Jakovljevi{\\'c}, Tamara and Jankovi{\\'c}, Milica M. and Savi{\\'c}, Andrej M. and Soldatovi{\\'c}, Ivan and Todorovi{\\'c}, Petar and Jakulin, Tadeja Jere and Papa, Gregor and Kovi{\\'c}, Vanja and Jakovljevi{\\'c}, Tamara and Jankovi{\\'c}, Milica M. and Savi{\\'c}, Andrej M. and Soldatovi{\\'c}, Ivan and Todorovi{\\'c}, Petar and Jakulin, Tadeja Jere and Papa, Gregor and Kovi{\\'c}, Vanja}, year = 2021, journal = {Sensors}, volume = {21}, number = {2}, pages = {406}, publisher = {publisher}, issn = {1424-8220}, doi = {10.3390/s21020406}, urldate = {2026-01-06}, copyright = {http://creativecommons.org/licenses/by/3.0/}, langid = {english}, keywords = {background colors,developmental differences,overlay colors,reading in children,sensor hub} }"}, {"Name": "Chinese Reading", "Data accessibility": "Free", "pymovements": "Yes", "Full name": null, "Dataset characteristics": null, "# Participants": "1718", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "7577.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": "22.48", "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": "8551", "Total # words all items": null, "Stimulus description": "Stimulus language inferred based on stimulus, not explicitly stated in paper", "Stimulus language": "Mandarin (simp. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "unclear", "Data license": "CC BY 4.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": "Chin rest", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "58", "Eye-to-camera distance (cm)": null, "Monitor": "21-inch CRT monitor (Sony G520)", "Resolution": "1024 \u00d7 768", "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/94wue/"], ["paper", "https://www.nature.com/articles/s41597-022-01464-6"]], "_bibtex": "@article{Zhang2022chinese, title = {The database of eye-movement measures on words in {{Chinese}} reading}, author = {Zhang, Guangyao and Yao, Panpan and Ma, Guojie and Wang, Jingwen and Zhou, Junyi and Huang, Linjieqiong and Xu, Pingping and Chen, Lijing and Chen, Songlin and Gu, Junjuan and Wei, Wei and Cheng, Xi and Hua, Huimin and Liu, Pingping and Lou, Ya and Shen, Wei and Bao, Yaqian and Liu, Jiayu and Lin, Nan and Li, Xingshan}, year = 2022, journal = {Scientific Data}, volume = {9}, number = {1}, pages = {1--8}, publisher = {{Springer Science and Business Media LLC}}, issn = {2052--4463}, doi = {10.1038/s41597-022-01464-6} }"}, {"Name": "Chinese Word Length Effect", "Data accessibility": "Not available", "pymovements": "No", "Full name": null, "Dataset characteristics": "Includes human predictability norms", "# Participants": "30", "Age range": null, "Age mean\u00b1SD": "24.0\u00b12.0", "Native language": "Chinese", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "90.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": "19.0\u00b12.0", "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Sentences are constructed; rated for their naturalness. Simplified Chinese script, Mandarin (inferred from stimulus example, information not explicitly provided).", "Stimulus language": "Mandarin (simp. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Constructed", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "no", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": "65", "Eye-to-camera distance (cm)": null, "Monitor": "19 inch", "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": "1", "Font color": null, "Spacing": null, "Background color": null, "_links": [["paper", "https://psycnet.apa.org/fulltext/2018-58848-001.html"]], "_bibtex": "@article{Zang2018zh-word-len, title = {Investigating word length effects in {{Chinese}} reading.}, author = {Zang, Chuanli and Fu, Ying and Bai, Xuejun and Yan, Guoli and Liversedge, Simon P.}, year = 2018, journal = {Journal of Experimental Psychology: Human Perception and Performance}, volume = {44}, number = {12}, pages = {1831--1841}, publisher = {American Psychological Association (APA)}, issn = {0096-1523}, doi = {10.1037/xhp0000589} }"}, {"Name": "CoLAGaze", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Corpus of Eye Movements for Linguistic Acceptability", "Dataset characteristics": "Stimulus sentences are sourced from the CoLA corpus (https://nyu-mll.github.io/CoLA/)", "# Participants": "42", "Age range": "19-62", "Age mean\u00b1SD": "34.5\u00b12", "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": " ", "Reading instructions": null, "# Items": "306.0", "Items per subject": "153", "Mean\u00b1SD words per item": "8.88 (gram.), 8.87 (ungram.)", "Mean\u00b1SD chars per item": "39.25 (gram.), 39.21 (ungram.)", "Mean\u00b1SD chars per word": "4.42 \u00b1 0.6 gramm, 4.42 \u00b1 0.64 ungramm", "Mean\u00b1SD chars seen per participant": "6002", "Mean\u00b1SD words seen per participant": "1858", "Total # chars all items": "12'004", "Total # words all items": "2716", "Stimulus description": "Sentences from CoLa training and development sets. Minimal pairs of correct and ungrammatical sentences", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": "https://nyu-mll.github.io/CoLA/", "Comprehension questions": "In 1/3 of the trials, after all trials grammaticality judgement", "Text annotation": "Error-type, word length, word suprisal", "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink Portable Duo", "Mount": "Chin-and-forehead rest", "Sampling frequency (Hz)": "2000", "Eye-to-screen distance (cm)": "60", "Eye-to-camera distance (cm)": "55", "Monitor": "A physical size of 54.37\u00d730.26 cm (refresh rate 60 Hz)", "Resolution": "1280 \u00d7 1024", "Text presentation": null, "Font": "Courier New", "Font size": "17", "Monospaced": "yes", "Character per visual angle": "2", "Font color": "Black", "Spacing": null, "Background color": "Light gray", "_links": [["data", "https://osf.io/gj2uk/?view_only=e32ed23d6df34e428d70e070e32f4cfc"], ["paper", "https://dl.acm.org/doi/10.1145/3715669.3723120"]], "_bibtex": "@inproceedings{bondar_colagaze_2025, title = {{{CoLAGaze}}: {{A}} corpus of eye movements for linguistic acceptability}, shorttitle = {{{CoLAGaze}}}, booktitle = {Proceedings of the 2025 {{Symposium}} on {{Eye Tracking Research}} and {{Applications}}}, author = {Bondar, Anna and Reich, David R. and J{\\\"a}ger, Lena A.}, year = 2025, pages = {1--9}, publisher = {Association for Computing Machinery}, doi = {10.1145/3715669.3723120} }"}, {"Name": "CopCo", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Copenhagen Corpus", "Dataset characteristics": null, "# Participants": "57", "Age range": "21-62", "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "25 native speakers, 19 dislexic native speakers and 13 L2 speakers", "Reading instructions": null, "# Items": "20.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "34897", "Stimulus description": "Speech manuscripts", "Stimulus language": "Danish", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/ud8s5/wiki/home/"], ["paper", "https://aclanthology.org/2022.lrec-1.182/"], ["paper", "https://aclanthology.org/2023.nodalida-1.7/"], ["paper", "https://aclanthology.org/2024.lrec-main.1187/"]], "_bibtex": "@inproceedings{hollenstein-etal-2022-copenhagen, title = {The copenhagen corpus of eye tracking recordings from natural reading of {{Danish}} texts}, booktitle = {Proceedings of the {{Thirteenth Language Resources}} and {{Evaluation Conference}}}, author = {Hollenstein, Nora and Barrett, Maria and Bj{\\\"o}rnsd{\\'o}ttir, Marina}, year = 2022, pages = {1712--1720}, publisher = {European Language Resources Association} }"}, {"Name": "Cuentos", "Data accessibility": "Free", "pymovements": "No", "Full name": "Cuentos", "Dataset characteristics": "Big number of participants (113) reading stories in Spanish on texts that have not been used in previous eye-tracking experiments.", "# Participants": "113", "Age range": "18.12-69.94", "Age mean\u00b1SD": "23.8\u00b17.78", "Native language": "Spanish", "Inclusion criteria": "Native speakers; adults", "Exclusion criteria": "Non-native speakers", "Other characteristics": null, "Reading instructions": "Read texts thoroughly as you'll be asked to answer comprehension questions later", "# Items": "30.0", "Items per subject": "3-20", "Mean\u00b1SD words per item": "3300\u00b1747 for long stories, 795\u00b1135 words for short stories", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "40,000", "Stimulus description": "Published stories and blog posts", "Stimulus language": "Spanish", "Stimulus language family": "Romance", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes, regarding the content of the stimuli.", "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "yes", "Data license": "Creative Commons", "Other available data": "Cloze task on the long stories stimuli", "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "55cm and 65cm", "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": "1920 x 1080 and 1024 x 768", "Text presentation": "Single-page", "Font": "Courier New", "Font size": "24 and 18", "Monospaced": "Yes", "Character per visual angle": null, "Font color": "Black", "Spacing": "55-pixels and 50-pixels", "Background color": "Grey", "_links": [["data", "https://doi.org/10.6084/m9.figshare.28311908"], ["paper", "https://www.nature.com/articles/s41597-026-06798-z"]], "_bibtex": "@article{travi_cuentos_2026, title = {Cuentos: A Large-Scale Eye-Tracking Reading Corpus on Spanish Narrative Texts}, copyright = {2026 The Author(s)}, issn = {2052-4463}, shorttitle = {Cuentos}, url = {https://www.nature.com/articles/s41597-026-06798-z}, doi = {10.1038/s41597-026-06798-z}, abstract = {Eye-tracking is a well-established method for studying reading processes. Our gaze jumps word to word, sampling information almost sequentially. Time spent on each word, along with skipping or revisiting patterns, provides proxies for cognitive processes during comprehension. However, few studies have focused on Spanish, where empirical data remain scarce, and little is known about how findings from other languages translate to Spanish reading behavior. We present the largest publicly available Spanish eye-tracking dataset to date, comprising readings of self-contained stories from 113 native speakers (mean age 23.8; 61 females, 52 males). The dataset comprises both long stories (3300\u00b1747 words, 11 readings per item on average) and short stories (795\u00b1135 words, 50 readings per item on average), providing extensive coverage of natural reading scenarios with over 940,000 fixations covering close to 40,000 words (8,500 unique words). This comprehensive resource offers opportunities to investigate Spanish eye movement patterns, explore language-specific cognitive processes, examine Spanish linguistic phenomena, and develop computational algorithms for reading research and natural language processing applications.}, language = {en}, urldate = {2026-02-18}, journal = {Scientific Data}, publisher = {Nature Publishing Group}, author = {Travi, Fermin and Bianchi, Bruno and Slezak, Diego Fernandez and Kamienkowski, Juan E.}, month = feb, year = {2026}, keywords = {Language, Computational neuroscience}}"}, {"Name": "DEMONIC", "Data accessibility": "Not available", "pymovements": "No", "Full name": "Dutch Eye-Movements Online Internet Corpus", "Dataset characteristics": null, "# Participants": "55", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Dutch", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": null, "Items per subject": "224", "Mean\u00b1SD words per item": "10.9\u00b12.7", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "1746", "Stimulus description": null, "Stimulus language": "Dutch", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "no", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink II", "Mount": "Head-mounted", "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": "80", "Eye-to-camera distance (cm)": null, "Monitor": "17-in. computer screen (refresh rate 60 Hz)", "Resolution": null, "Text presentation": "Single-line", "Font": "Courier New", "Font size": null, "Monospaced": null, "Character per visual angle": "2.78", "Font color": null, "Spacing": null, "Background color": null, "_links": [["paper", "https://pubmed.ncbi.nlm.nih.gov/20373225/"]], "_bibtex": "@article{kupermanEffectWordPosition2010demonic, title = {The effect of word position on eye-movements in sentence and paragraph reading}, author = {Kuperman, Victor and Dambacher, Michael and Nuthmann, Antje and Kliegl, Reinhold}, year = 2010, journal = {Quarterly Journal of Experimental Psychology (2006)}, volume = {63}, number = {9}, pages = {1838--1857}, issn = {1747-0226}, doi = {10.1080/17470211003602412}, langid = {english}, pmid = {20373225}, keywords = {Attention,Eye Movements,Humans,Models Statistical,Photic Stimulation,Reaction Time,Reading,Semantics,Vocabulary} }"}, {"Name": "DMORPH", "Data accessibility": "Not available", "pymovements": "No", "Full name": null, "Dataset characteristics": null, "# Participants": "28", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Dutch", "Inclusion criteria": "Native Speaker", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": "Read sentences for comprehension at their own pace and to press a \u201cresponse\u201d button on the button box", "# Items": "292.0", "Items per subject": null, "Mean\u00b1SD words per item": "11.6\u00b12.2", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "1421", "Stimulus description": "Sentences contained target words. The set of target words included 156 Dutch bimorphemic words (e.g., president+schap \u201cpresidency\u201d) ending in one of the following derivational suffixes: -achtig, -baar, -dom, -er, -erig, -erij, -es, -heid, -ig, -ing, -lijk, -loos, -nis, -schap, -sel, -ster, -te, -vol, and -zaam (3\u201312 words per suffix). These 19 suffixes were selected for inclusion in our study since they are reasonably productive in modern Dutch and belong to the Germanic stratum. 136 filler sentences.", "Stimulus language": "Dutch", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": "Yes-no question for each sentence", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "no", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink II", "Mount": "Head-mounted", "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": "80", "Eye-to-camera distance (cm)": null, "Monitor": "17-in. computer screen (refresh rate 60 Hz)", "Resolution": null, "Text presentation": "Single-line", "Font": "Courier New", "Font size": null, "Monospaced": null, "Character per visual angle": "2.78", "Font color": null, "Spacing": null, "Background color": null, "_links": [["paper", "https://www.sciencedirect.com/science/article/abs/pii/S0749596X09000989?via%3Dihub"]], "_bibtex": "@article{kupermanProcessingTradeoffsReading2010demorph, title = {Processing trade-offs in the reading of {{Dutch}} derived words}, author = {Kuperman, Victor and Bertram, Raymond and Baayen, R. Harald}, year = 2010, journal = {Journal of Memory and Language}, volume = {62}, number = {2}, pages = {83--97}, issn = {0749-596X}, doi = {10.1016/j.jml.2009.10.001}, urldate = {2025-07-16}, keywords = {Derived words,Eye movements,Information theory,Lexical processing,Morphology} }"}, {"Name": "Developmental Reading", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "For each language, participants were selected from different age groups, ranging from children to adults.", "# Participants": "159", "Age range": null, "Age mean\u00b1SD": null, "Native language": "English, Chinese ", "Inclusion criteria": "Native Speakers", "Exclusion criteria": null, "Other characteristics": "American: 23 third grader, 30 fifth grader, 26 undergrades, Chinese: 25 third grader, 25 fifth grader, 30 undergrades; American participants were 23 third-grade students (mean age = 9.1 years, range = 8.6-10.2; 12 males and 11 females), 30 fifth-grade students (mean age = 11.2 years, range = 10.6-12.1; 15 males and 15 females), and 26 undergraduate students. Chinese participants were 25 third-grade students (mean age = 9.4 years, range = 9.0-10.6), 25 fifth grade students (mean age = 11.4, range = 10.7-11.9; and 30 undergraduate students from Beijing. Children read the 2 parallel stories and 3 age appropriate ones, adults read all stories", "Reading instructions": "Read silently, for comprehension", "# Items": "16.0", "Items per subject": "5 (children), 8 (adults)", "Mean\u00b1SD words per item": "225.5 (English), 149.5 (Chinese)", "Mean\u00b1SD chars per item": "44.4 (English), 12.5 (Chinese)", "Mean\u00b1SD chars per word": "5.55 (English), 1.57(Chinese)", "Mean\u00b1SD chars seen per participant": "English: 5006 (children), 10'012 (adults); Chinese: 939 (children), 1878 (adults)", "Mean\u00b1SD words seen per participant": "English: 902 (children), 1804 (adults); Chinese: 598 (children), 1196 (adults)", "Total # chars all items": "10'012 (English), 1878 (Chinese) ", "Total # words all items": "1804 (English), 1196 (Chinese)", "Stimulus description": "2 stories were chosen to be comparable across languages. The rest of the stories were selected from extracurricular reading books published in China and the United States.", "Stimulus language": "English, Chinese ", "Stimulus language family": "Germanic, Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "The 2 parallell stories were originally used in: cross-cultural study of American, Japanese, and Chinese students' reading abilities (Stevenson et al., 1990).", "Comprehension questions": "Yes, only orally", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "unclear", "Reading measures": "yes", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink I", "Mount": "Head mount", "Sampling frequency (Hz)": "250", "Eye-to-screen distance (cm)": "60-70 ", "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": "NA (English), Song (Chinese)", "Font size": null, "Monospaced": null, "Character per visual angle": "2.86 (English), 0.77 (Chinese)", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2009-jemr-feng"], ["paper", "https://pubmed.ncbi.nlm.nih.gov/19489899/"], ["paper", "https://www.mdpi.com/1995-8692/3/2/8"]], "_bibtex": "@article{feng2009OrthographyDevelopmentReading, title = {Orthography and the development of reading processes: {{An}} eye-movement study of {{Chinese}} and {{English}}}, shorttitle = {Orthography and the development of reading processes}, author = {Feng, Gary and Miller, Kevin and Shu, Hua and Zhang, Houcan}, year = 2009, journal = {Child Development}, volume = {80}, number = {3}, pages = {720--735}, issn = {1467-8624}, doi = {10.1111/j.1467-8624.2009.01293.x}, langid = {english}, pmid = {19489899}, keywords = {Adolescent,Age Factors,Awareness,Child,China,Eye Movements,Female,Humans,Language Development,Language Tests,Male,Reading,United States,Young Adult} }"}, {"Name": "Dundee", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": null, "# Participants": "20", "Age range": null, "Age mean\u00b1SD": null, "Native language": "English, French", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "10 participants for each language, participants read the text in their native language", "Reading instructions": null, "# Items": null, "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "56,212", "Stimulus description": "Extracts of newspaper articles (\"The Independent\")", "Stimulus language": "English, French", "Stimulus language family": "Germanic, Romance", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "unclear", "Reading measures": "yes", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "Dr Bouis Oculometer Eyetracker", "Mount": "Bite-bar", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "50", "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": "3.33", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2010-jemr-feng"], ["data", "https://gitup.uni-potsdam.de/pmr2/2009-jemr-feng"], ["data", "https://gitup.uni-potsdam.de/pmr2/2010-qjep-kuperman-dambacher-nuthmann-kliegl"], ["paper", "https://pubmed.ncbi.nlm.nih.gov/22643118/"], ["paper", "https://gitup.uni-potsdam.de/pmr2/2009-jemr-feng"], ["paper", "https://gitup.uni-potsdam.de/pmr2/2010-jemr-feng/-/blob/main/2010_Feng_LandingPositionsMixedModels_jemr.pdf?ref_type=heads"], ["paper", "https://www.mdpi.com/1995-8692/3/2/8"], ["paper", "https://journals.sagepub.com/doi/10.1080/17470211003602412"]], "_bibtex": "@article{kennedy2013frequency, title = {Frequency and predictability effects in the {{Dundee Corpus}}: {{An}} eye movement analysis}, author = {Kennedy, Alan and Pynte, Jo{\\\"e}l and Murray, Wayne S and Paul, Shirley-Anne}, year = 2013, journal = {Quarterly Journal of Experimental Psychology}, volume = {66}, number = {3}, pages = {601--618}, publisher = {SAGE Publications Sage UK: London, England}, doi = {10.1080/17470218.2012.676054} }"}, {"Name": "Dyslexic Children EEG", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "Combined EGG and Eye Movements tracking, school children with and without dyslexia. See also Children EEG.", "# Participants": "36", "Age range": "8-12", "Age mean\u00b1SD": null, "Native language": "Serbian", "Inclusion criteria": "Group of dyslexic children was diagnosed with dyslexia, IQ higher than 90, control group no learning and reading diability or attention disorder", "Exclusion criteria": null, "Other characteristics": "18 with dyslexia and 18 without dyslexia, matched according to gender and school grade", "Reading instructions": null, "# Items": "1.0", "Items per subject": "1", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "The story \u201cSt Sava and the villager without luck\u201d was divided into 13 paragraphs/slides, so the text on each slide was kept in the original order but in different colours", "Stimulus language": "Serbian", "Stimulus language family": "Slavic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": " \u201cSt Sava and the villager without luck\u201d", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": "Data of this research is available upon request via corresponding author.", "Eye-tracker": "SMI RED-m", "Mount": "Remote, chin rest", "Sampling frequency (Hz)": "120", "Eye-to-screen distance (cm)": "57 to chin rest", "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": "Black", "Spacing": null, "Background color": "White, red, blue, yellow, orange, purple, turquoise", "_links": [["paper", "https://www.mdpi.com/2076-3425/11/5/539"]], "_bibtex": "@article{jakovljevic2021RelationPhysiologicalParameters, title = {The relation between physiological parameters and colour modifications in text background and overlay during reading in children with and without dyslexia}, author = {Jakovljevi{\\'c}, Tamara and Jankovi{\\'c}, Milica M. and Savi{\\'c}, Andrej M. and Soldatovi{\\'c}, Ivan and {\\v C}oli{\\'c}, Gordana and Jakulin, Tadeja Jere and Papa, Gregor and Kovi{\\'c}, Vanja and Jakovljevi{\\'c}, Tamara and Jankovi{\\'c}, Milica M. and Savi{\\'c}, Andrej M. and Soldatovi{\\'c}, Ivan and {\\v C}oli{\\'c}, Gordana and Jakulin, Tadeja Jere and Papa, Gregor and Kovi{\\'c}, Vanja}, year = 2021, journal = {Brain Sciences}, volume = {11}, number = {5}, pages = {539}, publisher = {publisher}, issn = {2076-3425}, doi = {10.3390/brainsci11050539}, urldate = {2026-01-06}, copyright = {http://creativecommons.org/licenses/by/3.0/}, langid = {english}, keywords = {background colour,children,dyslexia,ECG,EDA,EEG,eye tracking,overlay colour,physiological parameters,reading,sensors,text colour} }"}, {"Name": "EMTeC", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Eye Movements on Machine-Generated Texts Corpus", "Dataset characteristics": null, "# Participants": "107", "Age range": "18-69", "Age mean\u00b1SD": "34.1\u00b113.5", "Native language": "English", "Inclusion criteria": "Native Speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "42.0", "Items per subject": "42", "Mean\u00b1SD words per item": "86.52\u00b120.68", "Mean\u00b1SD chars per item": "446.44", "Mean\u00b1SD chars per word": "5.16 \u00b1 0.52", "Mean\u00b1SD chars seen per participant": "18\u2019750.48", "Mean\u00b1SD words seen per participant": "38\u2019633", "Total # chars all items": "262\u2019507", "Total # words all items": "50,874", "Stimulus description": "Machine generated text, belonging to 6 different text types. 3 LLM models were prompted with 42 unique prompts and 5 different decoding strategies were employed. Each participant only saw 1output of 1 model and 1 decoding strategy", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "LLMs: Phi-2, Mistral, wizardLM", "Comprehension questions": "Yes", "Text annotation": "Text-level: length text, average Zipf frequency, average word frequency, 8 different readability metrics; word-level: word length with and without punctuation, PoS, # of left and right dependent, distance to head, if last in line on ET screen, word & zipf frequency, suprisal & entropy on the output text only and on prompt and & output text", "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "yes", "Data license": "CC-By Attribution 4.0 International", "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink Portable Duo", "Mount": "Head-and-chin rest", "Sampling frequency (Hz)": "2000", "Eye-to-screen distance (cm)": "60", "Eye-to-camera distance (cm)": "55", "Monitor": "Original screen size subset to 31 cm height and 43 cm width (refresh rate 60 Hz)", "Resolution": "1280 \u00d7 1024", "Text presentation": null, "Font": "Courier ", "Font size": "14", "Monospaced": "yes", "Character per visual angle": "2.86", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/DiLi-Lab/EMTeC"], ["paper", "https://link.springer.com/article/10.3758/s13428-025-02677-4"]], "_bibtex": "@article{bolliger_emtec_2025, title = {{{EMTeC}}: {{A}} corpus of eye movements on machine-generated texts}, shorttitle = {{{EMTeC}}}, author = {Bolliger, Lena S. and Haller, Patrick and Cretton, Isabelle C. R. and Reich, David R. and Kew, Tannon and J{\\\"a}ger, Lena A.}, year = 2025, journal = {Behavior Research Methods}, volume = {57}, number = {7}, pages = {189}, issn = {1554-3528}, doi = {10.3758/s13428-025-02677-4}, urldate = {2025-07-16}, langid = {english}, keywords = {Computational Linguistics,Corpus Linguistics,Decoding,ESCRT,Eye-tracking,Language Processing,Machine Translation,Machine-generated,Natural Language Processing (NLP),Reading} }"}, {"Name": "ETDD70", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Eye-Tracking Dataset for Classification of Dyslexia Using AI-Based Methods", "Dataset characteristics": null, "# Participants": "70", "Age range": "9-10", "Age mean\u00b1SD": null, "Native language": "Czech", "Inclusion criteria": "Native Speakers", "Exclusion criteria": null, "Other characteristics": "35 dyslexic and 35 non-dyslexic", "Reading instructions": "Reading aloud ", "# Items": null, "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "3 tasks: syllable reading: 9x10 syllables; meaningful texts: 7 lines with 6 sentences; pseudo text: 7 lines with 15 artificial sentences", "Stimulus language": "Czech", "Stimulus language family": "Slavic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "No", "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "no", "Scripts": "unclear", "Data license": "Creative Commons Attribution 4.0 International", "Other available data": null, "Notes on availability": null, "Eye-tracker": "SMI RED 250 remote", "Mount": null, "Sampling frequency (Hz)": "250", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": "60-70", "Monitor": "Refresh rate 60 Hz", "Resolution": null, "Text presentation": null, "Font": "Times New Roman", "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": "Black", "Spacing": null, "Background color": "Grey", "_links": [["data", "https://zenodo.org/records/17513247"], ["paper", "https://www.springerprofessional.de/en/etdd70-eye-tracking-dataset-for-classification-of-dyslexia-using/50146130"]], "_bibtex": "@misc{dostalova2025ETDD70EyeTrackingDyslexia, title = {{{ETDD70}}: {{Eye-tracking}} dyslexia dataset}, shorttitle = {{{ETDD70}}}, author = {Dostalova, Nicol and Svaricek, Roman and Sedmidubsky, Jan and Culemann, Wolf and Sasinka, Cenek and Zezula, Pavel and Cenek, Jiri}, year = 2025, publisher = {Zenodo}, doi = {10.5281/zenodo.13332133}, urldate = {2025-12-18}, langid = {english} }"}, {"Name": "Eye Movement Control in Chinese Reading", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": null, "# Participants": "84", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Mandarin", "Inclusion criteria": "Native Speaker", "Exclusion criteria": null, "Other characteristics": "30 third-grade students (mean 8.5 years), 27 fifth grade students (mean 10.6 years) and 27 adults ", "Reading instructions": "Read sentences silently for comprehension", "# Items": "200.0", "Items per subject": "120", "Mean\u00b1SD words per item": "14.7\u00b11.3", "Mean\u00b1SD chars per item": "8.1\u00b11.0", "Mean\u00b1SD chars per word": "1.81", "Mean\u00b1SD chars seen per participant": "3193", "Mean\u00b1SD words seen per participant": "1764", "Total # chars all items": "5321", "Total # words all items": "2940", "Stimulus description": "40 age-appropriate sentences were presented to each participant to balance the overall reading difficulty. Sentences for G3 and G5 children were selected from their textbooks, and sentences for adults were a subset of the Beijing Sentence Corpus, together with 80 common sentences", "Stimulus language": "Mandarin (simp. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": "Yes, after 32 sentences", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink ", "Mount": "Desktop, head and chin rest", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "80", "Eye-to-camera distance (cm)": null, "Monitor": "21-in CRT monitor (frame rate 100 Hz)", "Resolution": "1024 \u00d7 768", "Text presentation": null, "Font": "Song", "Font size": null, "Monospaced": null, "Character per visual angle": "0.9", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2019-dp-yan-pan-kliegl/-/tree/main?ref_type=heads"], ["paper", "https://gitup.uni-potsdam.de/pmr2/2019-dp-yan-pan-kliegl/-/blob/main/Yan_etal.DevelPsychol.2019.pdf?ref_type=heads"]], "_bibtex": "@article{yan2019EyeMovementControl, title = {Eye movement control in {{Chinese}} reading: {{A}} cross-sectional study}, shorttitle = {Eye movement control in {{Chinese}} reading}, author = {Yan, Ming and Pan, Jinger and Kliegl, Reinhold}, year = 2019, journal = {Developmental Psychology}, volume = {55}, number = {11}, pages = {2275--2285}, issn = {1939-0599}, doi = {10.1037/dev0000819}, langid = {english}, pmid = {31535894}, keywords = {Adult,Child,Child Behavior,China,Cross-Sectional Studies,Eye Movement Measurements,Eye Movements,Female,Humans,Male,Pattern Recognition Visual,Psycholinguistics,Reading,Young Adult} }"}, {"Name": "Eye-voice span", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Half of the participant read the sentcenes aloud, other half silently", "# Participants": "63", "Age range": "16-24", "Age mean\u00b1SD": "18.6", "Native language": "German", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "32 particpant (age 18\u00b11.5) read the sentences aloud, 31 (age 19\u00b11.4) silently, ", "Reading instructions": null, "# Items": "144.0", "Items per subject": "144", "Mean\u00b1SD words per item": "8.54\u00b11.44", "Mean\u00b1SD chars per item": "54.58\u00b110.67", "Mean\u00b1SD chars per word": "5.26\u00b12.59", "Mean\u00b1SD chars seen per participant": "7860", "Mean\u00b1SD words seen per participant": "1230", "Total # chars all items": "7860", "Total # words all items": "1230", "Stimulus description": "Potsdam Sentence Corpus 2, simple, declarative German sentences taken from various newspapers (Poltrock, unpublished Diploma thesis).", "Stimulus language": "German", "Stimulus language family": "Germanic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Single sentences", "Stimulus source": "Newspapers", "Comprehension questions": null, "Text annotation": "Word frequency, incremental cloze predictability", "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": "Tower Mount, head rest", "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": "60", "Eye-to-camera distance (cm)": null, "Monitor": "22\u2032\u2032 Iiyama Vision Master Pro 514 CRT monitor", "Resolution": "1280 \u00d7 960", "Text presentation": null, "Font": "Courier New", "Font size": "24", "Monospaced": null, "Character per visual angle": "2.22", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2015-fpsyg-laubrock-kliegl"], ["paper", "https://www.frontiersin.org/journals/psychology/articles/10.3389/fpsyg.2015.01432/full#h3"]], "_bibtex": "@article{laubrock2015EyevoiceSpanReading, title = {The eye-voice span during reading aloud}, author = {Laubrock, Jochen and Kliegl, Reinhold}, year = 2015, journal = {Frontiers in Psychology}, volume = {6}, pages = {1--19}, publisher = {Frontiers}, issn = {1664-1078}, doi = {10.3389/fpsyg.2015.01432}, urldate = {2026-01-06}, langid = {english}, keywords = {cognitive control of gaze,Eye Movements,eye-voice lead,linear mixed models,Psycholinguistics,reading,synchronization} }"}, {"Name": "FakeNewsPerception", "Data accessibility": "Free", "pymovements": "Yes", "Full name": null, "Dataset characteristics": "Real and fake news stories, their ratings on the believability of stories, and post-tests including Cognitive Reflection Test (CRT), News-Find-Me (NFM), and perceived political orientation", "# Participants": "25", "Age range": null, "Age mean\u00b1SD": "25.9\u00b14.8", "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": "Read at usual reading pace.", "# Items": "120.0", "Items per subject": "60", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Minimal pairs of real and fake news stories; the news texts included a header and images", "Stimulus language": "German", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "Different news", "Comprehension questions": "No", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "no", "Scripts": "yes", "Data license": "CC0 1.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": "Tobii Pro Spectrum", "Mount": "Desk-mounted", "Sampling frequency (Hz)": "600", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": "23.8-inch screen", "Resolution": "1920 \u00d7 1080", "Text presentation": null, "Font": null, "Font size": null, "Monospaced": "not specified", "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/C1UD2A"], ["paper", "https://www.sciencedirect.com/science/article/pii/S2352340921001931?via%3Dihub"]], "_bibtex": "@article{sumer2021FakeNewsPerceptionEyeMovement, title = {{{FakeNewsPerception}}: {{An}} eye movement dataset on the perceived believability of news stories}, shorttitle = {{{FakeNewsPerception}}}, author = {S{\\\"u}mer, {\\\"O}mer and Bozkir, Efe and K{\\\"u}bler, Thomas and Gr{\\\"u}ner, Sven and Utz, Sonja and Kasneci, Enkelejda}, year = 2021, journal = {Data in Brief}, volume = {35}, pages = {106909}, issn = {2352-3409}, doi = {10.1016/j.dib.2021.106909}, urldate = {2026-01-06}, keywords = {Eye movements,Eye tracking dataset,Fake news perception,Misinformation,Reading comprehension,Scanpath comparison} }"}, {"Name": "GECO", "Data accessibility": "Free", "pymovements": "No", "Full name": "Ghent Eye-tracking Corpus", "Dataset characteristics": "Bilinguals read one half in Dutch, the other in English.", "# Participants": "33", "Age range": null, "Age mean\u00b1SD": "Bilinguals: 21.2\u00b12.2; monolignuals: 21.8\u00b15.6", "Native language": "English, Dutch", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "19 bilinguals (L1: nl, L2: en) and 14 monolinguals", "Reading instructions": null, "# Items": "1.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Complete Agatha Christie novel which is easy to read", "Stimulus language": "English, Dutch", "Stimulus language family": "Germanic, Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "\"The Mysterious Affair at Styles\" by Agatha Christie", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://expsy.ugent.be/downloads/geco/"], ["paper", "https://link.springer.com/article/10.3758/s13428-016-0734-0"]], "_bibtex": "@article{Cop2016geco, title = {Presenting {{GECO}}: {{An}} eyetracking corpus of monolingual and bilingual sentence reading}, author = {Cop, Uschi and Dirix, Nicolas and Drieghe, Denis and Duyck, Wouter}, year = 2016, journal = {Behavior Research Methods}, volume = {49}, number = {2}, pages = {602--615}, publisher = {{Springer Science and Business Media LLC}}, issn = {1554-3528}, doi = {10.3758/s13428-016-0734-0} }"}, {"Name": "GECO-CN", "Data accessibility": "Free", "pymovements": null, "Full name": "Ghent Eye\u2011tracking Corpus of\u00a0Sentence Reading for\u00a0Chinese\u2011English bilinguals", "Dataset characteristics": "Participants read one half in Chinese, the other in English.", "# Participants": "30", "Age range": "20-29", "Age mean\u00b1SD": "25.3\u00b12.6", "Native language": "Chinese", "Inclusion criteria": "Native speakers of Chinese, English as L2, studying in Belgium", "Exclusion criteria": null, "Other characteristics": "Bilinguals with L1: zh and L2: en", "Reading instructions": null, "# Items": "1.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": "1.44, 4.31", "Mean\u00b1SD chars seen per participant": "85540, 244984", "Mean\u00b1SD words seen per participant": "59403, 56841", "Total # chars all items": "85540, 244984", "Total # words all items": "59403, 56841", "Stimulus description": "Complete Agatha Christie novel which is easy to read", "Stimulus language": "English, Mandarin (simp. chars.)", "Stimulus language family": "Germanic, Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": "Desktop mount, chin and head rest", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": "Courier New", "Font size": "28", "Monospaced": "yes", "Character per visual angle": "1.6, 2", "Font color": "Black", "Spacing": null, "Background color": "Light grey", "_links": [["data", "https://osf.io/pmvhd/?view_only=77def2827a514254957cc846e14826cf"], ["paper", "https://link.springer.com/article/10.3758/s13428-022-01931-3"]], "_bibtex": "@article{Sui2022gecocn, title = {{{GECO-CN}}: {{Ghent Eye-tracking COrpus}} of sentence reading for {{Chinese-English}} bilinguals}, author = {Sui, Longjiao and Dirix, Nicolas and Woumans, Evy and Duyck, Wouter}, year = 2022, journal = {Behavior Research Methods}, volume = {55}, number = {6}, pages = {2743--2763}, publisher = {{Springer Science and Business Media LLC}}, issn = {1554-3528}, doi = {10.3758/s13428-022-01931-3} }"}, {"Name": "GazeBase - Reading Task", "Data accessibility": "Free", "pymovements": "Yes", "Full name": null, "Dataset characteristics": "Reading spread over three years; each participant reads two items per session", "# Participants": "322", "Age range": "18-47", "Age mean\u00b1SD": "21.89\u00b14.22", "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Not all participants read all texts", "Reading instructions": "Maximum of 60 seconds to read texts silently", "# Items": "18.0", "Items per subject": "18", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Different passages from the same poem", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "\"The Hunting of the Snark\" by Lewis Carroll", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "no", "Reading measures": "no", "Scripts": "unclear", "Data license": "CC BY 4.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": "Desktop mount, chin and forehead rest", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "55", "Eye-to-camera distance (cm)": null, "Monitor": "474\u00d7 297mm ViewSonic (ViewSonic Corporation, Brea, California, USA) monitor", "Resolution": "1680 \u00d7 1050", "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://figshare.com/articles/dataset/GazeBase_Data_Repository/12912257?file=27039812"], ["paper", "https://www.nature.com/articles/s41597-021-00959-y"]], "_bibtex": "@article{Griffith2021gazebase, title = {{{GazeBase}}, a large-scale, multi-stimulus, longitudinal eye movement dataset}, author = {Griffith, Henry and Lohr, Dillon and Abdulin, Evgeny and Komogortsev, Oleg}, year = 2021, journal = {Scientific Data}, volume = {8}, number = {1}, pages = {1--9}, publisher = {{Springer Science and Business Media LLC}}, issn = {2052-4463}, doi = {10.1038/s41597-021-00959-y} }"}, {"Name": "GazeBase-VR", "Data accessibility": "Free", "pymovements": "Yes", "Full name": null, "Dataset characteristics": "Five different ET tasks: (1) a vergence task, (2) a horizontal smooth pursuit task, (3) a video-viewing task, (4) a self-paced reading task, and (5) a random oblique saccade task;", "# Participants": "407", "Age range": "18-58", "Age mean\u00b1SD": "20.95\u00b14.0", "Native language": "English", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Age was recorded at the beginning of the data collection", "Reading instructions": "Participants are informed beforehand that there will be a reading comprehension question", "# Items": "4.0", "Items per subject": "1", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": "820", "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "3360", "Stimulus description": "Article from National Geographic, natural reading page", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "National Geographic", "Comprehension questions": "Yes, after the text", "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "no", "Reading measures": "no", "Scripts": "unclear", "Data license": "CC-BY 4.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": "ET-enabled virtual-reality (VR) headset", "Mount": null, "Sampling frequency (Hz)": "250", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": "~1", "Font color": "Black", "Spacing": null, "Background color": "Light grey", "_links": [["data", "https://figshare.com/articles/dataset/GazeBaseVR_Data_Repository/21308391?file=38844024"], ["paper", "https://www.nature.com/articles/s41597-023-02075-5"]], "_bibtex": "@article{lohrGazeBaseVRLargescaleLongitudinal2023, title = {{{GazeBaseVR}}, a large-scale, longitudinal, binocular eye-tracking dataset collected in virtual reality}, author = {Lohr, Dillon and Aziz, Samantha and Friedman, Lee and Komogortsev, Oleg V.}, year = 2023, journal = {Scientific Data}, volume = {10}, number = {1}, pages = {177}, publisher = {Nature Publishing Group}, issn = {2052-4463}, doi = {10.1038/s41597-023-02075-5}, urldate = {2025-07-23}, copyright = {2023 The Author(s)}, langid = {english}, keywords = {Computer science,Electrical and electronic engineering} }"}, {"Name": "Grammatical/Ungrammatical", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Eye tracking data reading grammatical and ungrammatical sentences English and German", "# Participants": "98", "Age range": null, "Age mean\u00b1SD": null, "Native language": "English, German", "Inclusion criteria": "Native Speaker", "Exclusion criteria": null, "Other characteristics": "2 eye tracking experiments and 4 self paced readings, numbers cover both ET experiments. First Experiment: 47 (English) participants, second 51 (German) ", "Reading instructions": null, "# Items": "92.0", "Items per subject": "76", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Constructed grammatical and ungrammatical sentences; grammatical sentences had 3 verbs, while the ungrammatical sentences had the second verb missing. 60 distractor sentences, 16 stimulus sentences in 2 conditions, for both languages", "Stimulus language": "German, English", "Stimulus language family": "Germanic, Germanic", "Stimulus naturalness": "Constructed", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink I (English), IView-X (German) ", "Mount": "Head mount (English), chin rest (German)", "Sampling frequency (Hz)": "250 (English), 240 (German)", "Eye-to-screen distance (cm)": "50 (English), 55 (German)", "Eye-to-camera distance (cm)": null, "Monitor": "19-inch colour monitor (English), 17-inch colour monitor (German) ", "Resolution": "1024 \u00d7 \u001d768 (English), 1024 \u00d7 \u001d768 (German) ", "Text presentation": null, "Font": "Courier New", "Font size": null, "Monospaced": "yes", "Character per visual angle": "2.56 (English), 3.84 (German)", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2010-lcp-vasishth-suckow-lewis-kern"], ["paper", "https://www.tandfonline.com/doi/full/10.1080/01690960903310587"]], "_bibtex": "@article{vasishth2010ShorttermForgettingSentence, title = {Short-term forgetting in sentence comprehension: {{Crosslinguistic}} evidence from verb-final structures}, shorttitle = {Short-term forgetting in sentence comprehension}, author = {Vasishth, Shravan and Suckow, Katja and Lewis, Richard L. and Kern, Sabine}, year = 2010, journal = {Language and Cognitive Processes}, volume = {25}, number = {4}, pages = {533--567}, publisher = {Routledge}, issn = {0169-0965}, doi = {10.1080/01690960903310587}, urldate = {2025-12-20}, keywords = {Sentence comprehension,Ungrammatical embeddings,Working memory} }"}, {"Name": "Hong Kong Corpus", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": null, "# Participants": "96", "Age range": null, "Age mean\u00b1SD": "26.0\u00b13.64", "Native language": "Mandarin", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": "University students", "Reading instructions": null, "# Items": "307.0", "Items per subject": "307", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": "10'117", "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "5250 (single-line); 4967 (multi-line)", "Stimulus description": "300 single-line sentences and 7 multi-line passages; from newspaper articles", "Stimulus language": "Mandarin (simp. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Both single sentences and text passages", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": "Word frequency, word lenght", "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": "CC BY 4.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": "Tower mount, chin rest", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": "18-inch ViewSonic CRT monitor (refresh rate 85Hz)", "Resolution": "1024 \u00d7 768 ", "Text presentation": "Single-line and multi-line", "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/7uq3j/"], ["paper", "https://www.nature.com/articles/s41597-023-02813-9"]], "_bibtex": "@article{Wu2023HKC, title = {Hong {{Kong}} corpus of {{Chinese}} sentence and passage reading}, author = {Wu, Yushu and Kit, Chunyu}, year = 2023, journal = {Scientific Data}, volume = {10}, number = {1}, pages = {1--13}, publisher = {{Springer Science and Business Media LLC}}, issn = {2052-4463}, doi = {10.1038/s41597-023-02813-9} }"}, {"Name": "IITB-HGC", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "IITBHallucination Gaze Corpus", "Dataset characteristics": null, "# Participants": "5", "Age range": "21-25", "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": "English proficency, minimum of 100 score in TOEFL", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": "Participants are asked to assess faithfulness of a claim given its context", "# Items": "500.0", "Items per subject": "500", "Mean\u00b1SD words per item": "105", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "52'00", "Stimulus description": "500 claim-context pairs from the FactCCdataset (hallucination detection task)", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": null, "Stimulus length category": null, "Stimulus source": "FactCC dataset Kryscinski et al.(2020)", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "no", "Scripts": "no", "Data license": "MIT", "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus ", "Mount": "Head supported and chin rest", "Sampling frequency (Hz)": "2000", "Eye-to-screen distance (cm)": "70", "Eye-to-camera distance (cm)": null, "Monitor": "24-inch BenQ XL2420Z Widescreen LED Backlit TN Monitor, which had display dimensions of 569x337.8mm, resulting in a display area of 531.36x298.89mm (vertical refresh rate 144 Hz)", "Resolution": "1920 \u00d7 1080", "Text presentation": null, "Font": "Arial", "Font size": "20", "Monospaced": null, "Character per visual angle": null, "Font color": "Black", "Spacing": null, "Background color": "Light grey", "_links": [["data", "https://huggingface.co/datasets/cfilt/IITB-HGC/blob/main/README.md"], ["paper", "https://aclanthology.org/2023.findings-emnlp.764/"]], "_bibtex": "@inproceedings{maharaj_eyes_2023, title = {Eyes show the way: {{Modelling}} gaze behaviour for hallucination detection}, shorttitle = {Eyes {{Show}} the {{Way}}}, booktitle = {Findings of the {{Association}} for {{Computational Linguistics}}: {{EMNLP}} 2023}, author = {Maharaj, Kishan and Saxena, Ashita and Kumar, Raja and Mishra, Abhijit and Bhattacharyya, Pushpak}, year = 2023, pages = {11424--11438}, publisher = {Association for Computational Linguistics}, doi = {10.18653/v1/2023.findings-emnlp.764}, urldate = {2025-07-16} }"}, {"Name": "InteRead", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "An Eye Tracking Dataset of Interrupted Reading", "Dataset characteristics": "A self-paced reading task of an excerpt from an English fictional text, deliberately interrupted to simulate naturalistic scenarios. Excerpt from Arthur Conan Doyle\u2019s \u201cThe Adventure of the Speckled Band\u201d (written in British English and published in 1892). The excerpt consisted of 28 pages", "# Participants": "50", "Age range": "20-47", "Age mean\u00b1SD": "27.51\u00b15.55", "Native language": "English and others", "Inclusion criteria": "English proficiency", "Exclusion criteria": "Diagnosed attention or reading disorders", "Other characteristics": null, "Reading instructions": null, "# Items": "28.0", "Items per subject": "28", "Mean\u00b1SD words per item": "154\u00b122.3 ", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": "5,247", "Total # chars all items": null, "Total # words all items": "5,247", "Stimulus description": "Excerp of fiction book, with interuptions", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": " Arthur Conan Doyle\u2019s \u201cThe Adventure of the Speckled Band\"", "Comprehension questions": "Yes after reading all texts", "Text annotation": "Number of sentences in a page\u037e the part-of-speech tags and dependency relations for a token\u037e the total number of tokens and token types, i.e, the number of unique tokens, found in a page\u037e the token length (in chars)\u037e the type-token ratio, the proportion of types to tokens in a given page\u037e the content words, a boolean value indicating if atoken is a content word6\u037e the token frequency in a natural logarithmic scale\u037e the abstractness/concreteness rating of a token, ranging from 1 (purely abstract) to 5 (purely concrete)", "Stimulus license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International license (CC BY-NC-SA 4.0)", "Raw data": "yes", "Gaze events": "yes", "Reading measures": "no", "Scripts": "unclear", "Data license": "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International license (CC BY-NC-SA 4.0)", "Other available data": null, "Notes on availability": null, "Eye-tracker": "Tobii Pro Spectrum", "Mount": "Head and chin rest", "Sampling frequency (Hz)": "1200", "Eye-to-screen distance (cm)": "57", "Eye-to-camera distance (cm)": null, "Monitor": "Tobii Pro Spectrum screen (EIZO FlexScan EV2451) with dimensions of 52.8\u00d729.7cm ", "Resolution": "1920 \u00d7 1080", "Text presentation": null, "Font": "Courier", "Font size": "16", "Monospaced": "yes", "Character per visual angle": null, "Font color": "Black", "Spacing": null, "Background color": "White", "_links": [["data", "https://osf.io/43j5f/"], ["paper", "https://aclanthology.org/2024.lrec-main.802/"]], "_bibtex": "@inproceedings{zermiani_interead_2024, title = {{{InteRead}}: {{An}} eye tracking dataset of interrupted reading}, shorttitle = {{{InteRead}}}, booktitle = {Proceedings of the 2024 {{Joint International Conference}} on {{Computational Linguistics}}, {{Language Resources}} and {{Evaluation}} ({{LREC-COLING}} 2024)}, author = {Zermiani, Francesca and Dhar, Prajit and Sood, Ekta and K{\\\"o}gel, Fabian and Bulling, Andreas and Wirzberger, Maria}, year = 2024, pages = {9154--9169}, publisher = {{ELRA and ICCL}}, }"}, {"Name": "Irony Processing", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Adults with ADHD and neurotypical read stories including either literal or ironic statements. Working memory tests scores available", "# Participants": "107", "Age range": "18-35", "Age mean\u00b1SD": "25.40", "Native language": "Norwegian", "Inclusion criteria": "Native Speaker", "Exclusion criteria": "Learning Disorder, dyslexia, ASD", "Other characteristics": "2 groups: typically-developed (TD) readers without ADHD (N = 55, mean age = 23.11, SD = 3.92), and readers with an official ADHD diagnosis (N = 52; mean age = 27.84, SD = 5.58).", "Reading instructions": null, "# Items": "48.0", "Items per subject": "24", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": "721.24\u00b147.56", "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "24 story pairs, with 3 regions of interest: context region, used to disambiguate the target phrase, target phrase region, and spillover region; additionally, 12 filler stories. ", "Stimulus language": "Norwegian", "Stimulus language family": "Germanic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes, after each texts", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": "Desktop mount", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/zygmd/files/osfstorage?view_only=70aee8d72cbe4cd189237a7453fb3b24"], ["paper", "https://journals.sagepub.com/doi/10.1177/10870547251333819"]], "_bibtex": "@article{kyriacouIronyProcessingAdults2025, title = {Irony processing in adults with {{ADHD}}: {{Evidence}} from eye-tracking and executive attention tasks}, shorttitle = {Irony {{Processing}} in {{Adults}} with {{ADHD}}}, author = {Kyriacou, Marianna and Rummelhoff, Cecilie and K{\\\"o}der, Franziska}, year = 2025, journal = {Journal of Attention Disorders}, pages = {10870547251333819}, publisher = {SAGE Publications Inc}, issn = {1087-0547}, doi = {10.1177/10870547251333819}, urldate = {2025-05-03} }"}, {"Name": "MECO L1", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Multilingual Eye-movement Corpus L1", "Dataset characteristics": null, "# Participants": "535", "Age range": "18-45", "Age mean\u00b1SD": null, "Native language": "German, Dutch, English, Greek, Hebrew, Italian, Russian, Spanish, Turkish, Korean, Norwegian, Finnish, Estonian", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "12.0", "Items per subject": "12", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "2028", "Stimulus description": "Wikipedia-style texts on various topics not requiring an academic background", "Stimulus language": "German, Dutch, English, Greek, Hebrew, Italian, Russian, Spanish, Turkish, Korean, Norwegian, Finnish, Estonian", "Stimulus language family": "Germanic, Germanic, Germanic, Hellenic, Semitic, Romance, Slavic, Romance, Turkic, Koreanic, Germanic, Finno-Ugric, Finno-Ugric", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink Portable Duo, EyeLink 1000, EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/3527a/"], ["paper", "https://link.springer.com/article/10.3758/s13428-021-01772-6"]], "_bibtex": "@article{Siegelman2022mecol1wave1, title = {Expanding horizons of cross-linguistic research on reading: {{The Multilingual Eye-movement Corpus}} ({{MECO}})}, author = {Siegelman, Noam and Schroeder, Sascha and Acart{\\\"u}rk, Cengiz and Ahn, Hee-Don and Alexeeva, Svetlana and Amenta, Simona and Bertram, Raymond and Bonandrini, Rolando and Brysbaert, Marc and Chernova, Daria and Fonseca, Sara Maria Da and Dirix, Nicolas and Duyck, Wouter and Fella, Argyro and Frost, Ram and Gattei, Carolina A. and Kalaitzi, Areti and Kwon, Nayoung and L{\\~o}o, Kaidi and Marelli, Marco and Papadopoulos, Timothy C. and Protopapas, Athanassios and Savo, Satu and Shalom, Diego E. and Slioussar, Natalia and Stein, Roni and Sui, Longjiao and Taboh, Anal{\\'\\i} and T{\\o}nnesen, Veronica and Usal, Kerem Alp and Kuperman, Victor}, year = 2022, journal = {Behavior Research Methods}, volume = {54}, number = {6}, pages = {2843--2863}, publisher = {{Springer Science and Business Media LLC}}, doi = {10.3758/s13428-021-01772-6} }"}, {"Name": "MECO L1 2nd Wave", "Data accessibility": "Free", "pymovements": "No", "Full name": "Multilingual Eye-movement Corpus L1 2nd Wave", "Dataset characteristics": null, "# Participants": "654", "Age range": "18-58", "Age mean\u00b1SD": null, "Native language": "Basque, Brazilian Portuguese, Danish, English, German, German (Swiss), Hindi, Icelandic, Mandarin, Norwegian, Russian, Serbian, Spanish, Turkish", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "12.0", "Items per subject": "12", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "2028", "Stimulus description": "Wikipedia-style texts on various topics not requiring an academic background", "Stimulus language": "Basque, Brazilian Portuguese, Mandarin (simp. chars.), Mandarin (trad. chars.), Danish, English, German, Hindi, Icelandic, Norwegian, Russian, Serbian, Spanish, Turkish", "Stimulus language family": "Basque, Romance, Sinitic, Sinitic, Germanic, Germanic, Germanic, Indo-Aryan, Germanic, Germanic, Slavic, Slavic, Romance, Turkic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink Portable Duo, EyeLink II, EyeLink 1000, EyeLink 1000 Plus ", "Mount": null, "Sampling frequency (Hz)": "1000; Serbian: 500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/3527a/"], ["paper", "https://www.nature.com/articles/s41597-025-05453-3"]], "_bibtex": "@article{siegelman2025mecol1wave2, title = {Wave 2 of the {{Multilingual Eye-Movement Corpus}} ({{MECO}}): {{New}} text reading data across languages}, shorttitle = {Wave 2 of the {{Multilingual Eye-Movement Corpus}} ({{MECO}})}, author = {Siegelman, Noam and Schroeder, Sascha and Bao, Yaqian Borogjoon and Acart{\\\"u}rk, Cengiz and Agrawal, Niket and Bolliger, Lena S. and Brasser, Jan and {Campos-Rojas}, C{\\'e}sar and Drieghe, Denis and Filipovi{\\'c} {\\DJ}ur{\\dj}evi{\\'c}, Du{\\v s}ica and Goldina, Sofya and Ib{\\'a}{\\~n}ez Orellana, Romualdo and J{\\\"a}ger, Lena A. and J{\\'o}hannesson, {\\'O}mar I. and Khare, Anurag and Kharlamov, Nik and Knudsen, Hanne B. S. and Kristj{\\'a}nsson, {\\'A}rni and Lee, Charlotte E. and Lee, Jun Ren and Leite, Marina P. T. and Mancini, Simona and Mihajlovi{\\'c}, Nata{\\v s}a and Mi{\\v s}i{\\'c}, Ksenija and Orekhova, Miloslava and Parshina, Olga and Popovi{\\'c} Stija{\\v c}i{\\'c}, Milica and Protopapas, Athanassios and Reich, David R. and Rimzhim, Anurag and {Rothe-Neves}, Rui and S{\\'a}, Thais M. M. and {Santana-Covarrubias}, Andrea and Sekerina, Irina and Sigurdardottir, Heida M. and Smirnova, Anna and Srivastava, Priyanka and Teixeira, Elisangela N. and Ugrinic, Ivana and Usal, Kerem Alp and Vakulya, Karolina and Verma, Ark and Vieira, Jo{\\~a}o M. M. and Wu, Denise H. and Xue, Jin and Zdravkovi{\\'c}, Sun{\\v c}ica and Zhuo, Junjing and Ziaka, Laoura and Kuperman, Victor}, year = 2025, journal = {Scientific Data}, volume = {12}, number = {1}, pages = {1183}, publisher = {Nature Publishing Group}, issn = {2052-4463}, doi = {10.1038/s41597-025-05453-3}, urldate = {2025-08-04}, copyright = {2025 The Author(s)}, langid = {english}, keywords = {Databases,Human behaviour} }"}, {"Name": "MECO L2", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Multilingual Eye-movement Corpus L2", "Dataset characteristics": null, "# Participants": "543", "Age range": null, "Age mean\u00b1SD": "23.4", "Native language": "Dutch, English, Greek, German, Hebrew, Italian, Russian, Spanish, Turkish, Norwegian, Estonian, Finnish", "Inclusion criteria": "Non-native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "12.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "1653", "Stimulus description": "Encyclopedic texts originally designed for English language testing", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink Portable Duo, EyeLink 1000, EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/q9h43/"], ["paper", "https://www.cambridge.org/core/journals/studies-in-second-language-acquisition/article/text-reading-in-english-as-a-second-language-evidence-from-the-multilingual-eyemovements-corpus/31CE1F8A8D33F93EE31B75AF26F76DB5"]], "_bibtex": "@article{kuperman-2023-mecol2-wave1, title = {Text reading in {{English}} as a second language: {{Evidence}} from the multilingual eye-movements corpus}, author = {Kuperman, Victor and Siegelman, Noam and Schroeder, Sascha and Acart{\\\"u}rk, Cengiz and Alexeeva, Svetlana and Amenta, Simona and Bertram, Raymond and Bonandrini, Rolando and Brysbaert, Marc and Chernova, Daria and {al.}, et}, year = 2023, journal = {Studies in Second Language Acquisition}, volume = {45}, number = {1}, pages = {3--37}, publisher = {Cambridge University Press}, doi = {10.1017/S0272263121000954} }"}, {"Name": "MECO L2 2nd Wave", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Multilingual Eye-movement Corpus L2 2nd Wave", "Dataset characteristics": null, "# Participants": "660", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Basque, Brazilian Portuguese, Danish,German, English, Hindi, Icelandic, Mandarin, Norwegian, Russian, Serbian, Spanish, Turkish", "Inclusion criteria": "Non-native speakers", "Exclusion criteria": "Uncharacteristically high English fluency for non-native speakers", "Other characteristics": null, "Reading instructions": null, "# Items": "12.0", "Items per subject": "12", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "1653", "Stimulus description": "Encyclopedic texts originally designed for English language testing", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "ACCUPLACER Reading test and the English as Second Language Reading Skills Test", "Comprehension questions": "Yes, in between texts", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink Portable Duo, EyeLink II, EyeLink 1000, EyeLink 1000 Plus ", "Mount": "For lab specific setups see supplementary metrials", "Sampling frequency (Hz)": "1,000; Serbian: 500", "Eye-to-screen distance (cm)": "https://www.cambridge.org/core/journals/studies-in-second-language-acquisition/article/new-data-on-text-reading-in-english-as-a-second-language/C479CDB95EEC27DF56289529774D91E9#supplementary-materials", "Eye-to-camera distance (cm)": "https://www.cambridge.org/core/journals/studies-in-second-language-acquisition/article/new-data-on-text-reading-in-english-as-a-second-language/C479CDB95EEC27DF56289529774D91E9#supplementary-materials", "Monitor": "https://www.cambridge.org/core/journals/studies-in-second-language-acquisition/article/new-data-on-text-reading-in-english-as-a-second-language/C479CDB95EEC27DF56289529774D91E9#supplementary-materials", "Resolution": null, "Text presentation": null, "Font": "Consolas", "Font size": "20-22", "Monospaced": "yes", "Character per visual angle": null, "Font color": "Black", "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/q9h43/"], ["paper", "https://www.cambridge.org/core/journals/studies-in-second-language-acquisition/article/new-data-on-text-reading-in-english-as-a-second-language/C479CDB95EEC27DF56289529774D91E9"]], "_bibtex": "@article{kuperman2025mecol2wave2, title = {New data on text reading in {{English}} as a second language: {{The Wave}} 2 expansion of the {{Multilingual Eye-Movement Corpus}} ({{MECO}})}, shorttitle = {New data on text reading in {{English}} as a second language}, author = {Kuperman, Victor and Schroeder, Sascha and Acart{\\\"u}rk, Cengiz and Agrawal, Niket and Alexandre, Dominick M. and Bolliger, Lena S. and Brasser, Jan and {Campos-Rojas}, C{\\'e}sar and Drieghe, Denis and {\\DJ}ur{\\dj}evi{\\'c}, Du{\\v s}ica Filipovi{\\'c} and {de Freitas}, Luiz Vinicius Gadelha and Goldina, Sofya and Orellana, Romualdo Ib{\\'a}{\\~n}ez and J{\\\"a}ger, Lena A. and J{\\'o}hannesson, {\\'O}mar I. and Khare, Anurag and Kharlamov, Nik and Knudsen, Hanne B. S. and Kristj{\\'a}nsson, {\\'A}rni and Lee, Charlotte E. and Lee, Jun Ren and Leite, Marina P. T. and Mancini, Simona and Mihajlovi{\\'c}, Nata{\\v s}a and Mi{\\v s}i{\\'c}, Ksenija and Orekhova, Miloslava and Parshina, Olga and Stija{\\v c}i{\\'c}, Milica Popovi{\\'c} and Protopapas, Athanassios and Reich, David R. and Rimzhim, Anurag and {Rothe-Neves}, Rui and S{\\'a}, Thais M. M. and Covarrubias, Andrea Santana and Sekerina, Irina and Sigurdardottir, Heida M. and Smirnova, Anna and Srivastava, Priyanka and Teixeira, Elisangela N. and Ugrinic, Ivana and Usal, Kerem Alp and Vakulya, Karolina and Vieira, Jo{\\~a}o M. M. and Verma, Ark and Wu, Denise H. and Xue, Jin and Zdravkovi{\\'c}, Sun{\\v c}ica and Zhuo, Junjing and Ziaka, Laoura and Siegelman, Noam}, year = 2025, journal = {Studies in Second Language Acquisition}, pages = {1--19}, issn = {0272-2631, 1470-1545}, doi = {10.1017/S0272263125000105}, urldate = {2025-07-16}, langid = {english}, keywords = {comprehension,eye-tracking,fluency,reading,second language} }"}, {"Name": "MECO Mongolian", "Data accessibility": "Free", "pymovements": null, "Full name": "Multilingual Eye-movement Corpus L1 Mongolian", "Dataset characteristics": null, "# Participants": "66", "Age range": "18-27", "Age mean\u00b1SD": "20.82\u00b12.15", "Native language": "Mongolian", "Inclusion criteria": "Native speaker", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": "Read the passages silently for comprehension and press the space bar once they finished reading each passage", "# Items": "12.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "2592", "Stimulus description": "Wikipedia-style texts on various topics not requiring an academic background", "Stimulus language": "Mongolian", "Stimulus language family": "Mongolian", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes, for all texts", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "no", "Data license": "CC BY 4.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": "Tower mount, chin reast and head restrain", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "54", "Eye-to-camera distance (cm)": null, "Monitor": "20-inch Lenovo L2021 monitor", "Resolution": "1024 \u00d7 768", "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/3j9ut/files/osfstorage"], ["paper", "https://www.nature.com/articles/s41597-025-04771-w"]], "_bibtex": "@article{baoEyeMovementDatabase2025, title = {The eye movement database of passage reading in vertically written traditional {{Mongolian}}}, author = {Bao, Yaqian Borogjoon and Li, Xingshan and Kuperman, Victor}, year = 2025, journal = {Scientific Data}, volume = {12}, number = {1}, pages = {499}, publisher = {Nature Publishing Group}, issn = {2052-4463}, doi = {10.1038/s41597-025-04771-w}, urldate = {2025-07-16}, copyright = {2025 The Author(s)}, langid = {english}, keywords = {Databases,Human behaviour} }"}, {"Name": "MQA-RC", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": null, "# Participants": "28", "Age range": null, "Age mean\u00b1SD": null, "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": "Data collection has 2 parts. In the first part, 16 documents were read where the models usually gave incorrect answers to the comprehension questions. 23 participants read these documents in 3 different conditions: regular QA, where participants had access to the plot, the question and 5 answer options; open-ended answer generation, where participants saw the plot and the question, but had to generate their own responses; and QA by memory, where participants could first read the plot, and then answer the question (with 5 possible answers), without having the plot available. In the second part of the collection, 16 documents were chosen where the models usually gave the correct answers. These documents were read by 5 new participants who had access to the plots and answer options when answering the questions. ", "Reading instructions": null, "# Items": "32.0", "Items per subject": "16", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Movie plots", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "MovieQA dataset", "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": "The full dataset can be requested by filling out an EULA license agreement and send the agreement to cai-office@vis.uni-stuttgart.de.", "Eye-tracker": "Tobii", "Mount": null, "Sampling frequency (Hz)": "600", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://collaborative-ai.org/research/datasets/MQA-RC/"], ["paper", "https://aclanthology.org/2020.conll-1.2/"]], "_bibtex": "@inproceedings{sood2020interpreting, title = {Interpreting attention models with human visual attention in machine reading comprehension}, booktitle = {Proceedings of the 24th {{Conference}} on {{Computational Natural Language Learning}}}, author = {Sood, Ekta and Tannert, Simon and Frassinelli, Diego and Bulling, Andreas and Vu, Ngoc Thang}, year = 2020, pages = {12--25}, publisher = {Association for Computational Linguistics}, doi = {10.18653/v1/2020.conll-1.2} }"}, {"Name": "Mental Simulation Corpus", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Participants answered questions that allow for studying their mental simulation during reading.", "# Participants": "102", "Age range": null, "Age mean\u00b1SD": "23", "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "3.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "10800", "Stimulus description": "Literary short stories", "Stimulus language": "Dutch", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/qgx26/"], ["paper", "https://www.tandfonline.com/doi/full/10.1080/23273798.2018.1552007"]], "_bibtex": "@article{mak2019mentalsimulation, title = {Mental simulation during literary reading: {{Individual}} differences revealed with eye-tracking}, author = {Mak, Marloes and Willems, Roel M.}, year = 2019, journal = {Language, Cognition and Neuroscience}, volume = {34}, number = {4}, pages = {511--535}, publisher = {Routledge}, doi = {10.1080/23273798.2018.1552007} }"}, {"Name": "Morphological Structure I", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Two experiments described in the paper, this entry is about experiment 1. Each subject read half of the total number of sentences (i.e., 60); they read the other half of the sentences in a Chinese translation and the sentences were counterbalanced across subjects.", "# Participants": "48", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Uighur", "Inclusion criteria": "Native Speaker", "Exclusion criteria": null, "Other characteristics": "Undergrades", "Reading instructions": null, "# Items": "120.0", "Items per subject": "120", "Mean\u00b1SD words per item": "9.2\u00b11.2", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": "7.5\u00b13", "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "1106", "Stimulus description": null, "Stimulus language": "Uighur, Mandarin (simp. chars.)", "Stimulus language family": "Turkic, Sinitic", "Stimulus naturalness": null, "Stimulus length category": null, "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": "Word length, launch site, morphological complexity, and word frequency on FLPs", "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink II", "Mount": "Chin rest", "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": "50", "Eye-to-camera distance (cm)": null, "Monitor": "19-in. ViewSonic G90f CRT monitor (frame rate 100 Hz)", "Resolution": "1024 \u00d7 768", "Text presentation": null, "Font": "Bold", "Font size": "20", "Monospaced": null, "Character per visual angle": "2.17", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2014-cogn-yan-zhou-shu-yusupu-miao-kruegel-kliegl"], ["paper", "https://www.sciencedirect.com/science/article/pii/S001002771400047X?via%3Dihub"]], "_bibtex": "@article{yan2014EyeMovementsGuided, title = {Eye movements guided by morphological structure: {{Evidence}} from the {{Uighur}} language}, shorttitle = {Eye movements guided by morphological structure}, author = {Yan, Ming and Zhou, Wei and Shu, Hua and Yusupu, Rizwangul and Miao, Dongxia and Kr{\\\"u}gel, Andr{\\'e} and Kliegl, Reinhold}, year = 2014, journal = {Cognition}, volume = {132}, number = {2}, pages = {181--215}, issn = {0010-0277}, doi = {10.1016/j.cognition.2014.03.008}, urldate = {2025-12-20}, keywords = {Eye movements,Landing position,Morphological structure,Uighur} }"}, {"Name": "Morphological Structure II", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Two experiments described in the paper, this entry is about experiment 2.", "# Participants": "30", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Uighur", "Inclusion criteria": "Native Speaker", "Exclusion criteria": null, "Other characteristics": "Undergrades", "Reading instructions": null, "# Items": "86.0", "Items per subject": "86", "Mean\u00b1SD words per item": "7.3\u00b11.6", "Mean\u00b1SD chars per item": "48.1\u00b111.7", "Mean\u00b1SD chars per word": "7.5\u00b13", "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Sentences constructed around 86 pairs of target words (84 nouns and 2 verbs) with different morphological structure", "Stimulus language": "Uighur, Mandarin (simp. chars.)", "Stimulus language family": "Turkic, Sinitic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": "Word length, launch site, morphological complexity, and word frequency on FLPs", "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink II", "Mount": "Chin rest", "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": "50", "Eye-to-camera distance (cm)": null, "Monitor": "19-in. ViewSonic G90f CRT monitor (frame rate 100 Hz)", "Resolution": "1024 \u00d7 768", "Text presentation": null, "Font": null, "Font size": "20", "Monospaced": null, "Character per visual angle": "2.38", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2014-cogn-yan-zhou-shu-yusupu-miao-kruegel-kliegl"], ["paper", "https://www.sciencedirect.com/science/article/pii/S001002771400047X?via%3Dihub"]], "_bibtex": "@article{yan2014EyeMovementsGuided, title = {Eye movements guided by morphological structure: {{Evidence}} from the {{Uighur}} language}, shorttitle = {Eye movements guided by morphological structure}, author = {Yan, Ming and Zhou, Wei and Shu, Hua and Yusupu, Rizwangul and Miao, Dongxia and Kr{\\\"u}gel, Andr{\\'e} and Kliegl, Reinhold}, year = 2014, journal = {Cognition}, volume = {132}, number = {2}, pages = {181--215}, issn = {0010-0277}, doi = {10.1016/j.cognition.2014.03.008}, urldate = {2025-12-20}, keywords = {Eye movements,Landing position,Morphological structure,Uighur} }"}, {"Name": "Multi Choice Answering", "Data accessibility": "Not available", "pymovements": "No", "Full name": null, "Dataset characteristics": null, "# Participants": "71", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": "Non native Speaker of English", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": null, "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Multiple-choice reading comprehension test in English, where on 1 screen the task is presented to fill in the best fitting word, as well as the corresponding text passage and 5 possible words to fill the blank", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "Released items of the Foreign (English) Language Exam 2018", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "no", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "Tobii TX300 screen-based", "Mount": null, "Sampling frequency (Hz)": "300", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["paper", "https://dergipark.org.tr/en/pub/epod/issue/72844/1107597"]], "_bibtex": "@article{corbaci2022LatentGrowthModeling, title = {Latent growth modeling of item process data derived from eye-tracking technology: {{An}} experimental study investigating reading behavior of examinees when answering a multiple-choice test item}, shorttitle = {Latent {{Growth Modeling}} of {{Item Process Data Derived From Eye-tracking Technology}}}, author = {{\\c C}orbac{\\i}, Erg{\\\"u}n Cihat and Kahraman, Nil{\\\"u}fer}, year = 2022, journal = {Journal of Measurement and Evaluation in Education and Psychology}, volume = {13}, number = {3}, pages = {194--211}, publisher = {{Association for Measurement and Evaluation in Education and Psychology}}, issn = {1309-6575}, doi = {10.21031/epod.1107597}, urldate = {2026-01-05}, langid = {english} }"}, {"Name": "Not Batting an Eye", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "L2 English speaker reader sentences with idioms used literally and figurativley", "# Participants": "50", "Age range": null, "Age mean\u00b1SD": "29.5\u00b13.99", "Native language": "Norwegian", "Inclusion criteria": "English as L2", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": "Read the sentences silently as fast as possible without compromising comprehension, and to press ENTER when they finished reading each sentence to proceed to the next trial", "# Items": "40.0", "Items per subject": "40", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Sentences with idiomatic expression used in literal or figurative senses; 10 idioms, 10 literal, 20 fillers", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Constructed", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": "Yes, after 70 % of filler sentences", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": "CC BY 4.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": "Desktop, chin and forehead rest", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": "Courier New", "Font size": "16", "Monospaced": null, "Character per visual angle": null, "Font color": "Black", "Spacing": null, "Background color": "White", "_links": [["data", "https://www.mdpi.com/article/10.3390/languages9010032/s1"], ["paper", "https://www.mdpi.com/2226-471X/9/1/32"]], "_bibtex": "@article{kyriacou_not_2024, title = {Not {{Batting}} an {{Eye}}: {{Figurative}} meanings of {{L2}} idioms do not interfere with literal uses}, shorttitle = {Not {{Batting}} an {{Eye}}}, author = {Kyriacou, Marianna and K{\\\"o}der, Franziska}, year = 2024, journal = {Languages}, volume = {9}, number = {1}, pages = {32}, issn = {2226-471X}, doi = {10.3390/languages9010032}, urldate = {2025-07-16}, langid = {english}, keywords = {eye-tracking reading,idioms,literal idiom uses,literal meaning,second language (L2)} }"}, {"Name": "OASST-ETC", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": null, "# Participants": "24", "Age range": null, "Age mean\u00b1SD": "31.66\u00b16.13", "Native language": null, "Inclusion criteria": "High proficiency in English", "Exclusion criteria": "Reported neurological or psychiatric histories or medications.", "Other characteristics": null, "Reading instructions": "Participant was asked to evaluate 30 prompts (from a set of 45) with 2 responses each, with the option to skip up to 15 prompts if they felt unqualified to assess them", "# Items": "360.0", "Items per subject": "30-45", "Mean\u00b1SD words per item": "57.82\u00b134.94", "Mean\u00b1SD chars per item": "278.11", "Mean\u00b1SD chars per word": "4.81\u00b10.78", "Mean\u00b1SD chars seen per participant": "278.11 * 37 = 10\u2019290.23", "Mean\u00b1SD words seen per participant": "2\u2019139.34", "Total # chars all items": "100\u2019121.11", "Total # words all items": "20\u2019815", "Stimulus description": "Prompt plus 2 answers used for RLHF; 360 prompts into 8 subset of 45", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": null, "Stimulus source": "OpenAssistant Conversations dataset (OASST1)", "Comprehension questions": "No", "Text annotation": "Average num of words, chars", "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": null, "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "GP3 HD Eye Tracker", "Mount": null, "Sampling frequency (Hz)": "60", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": "27-inch monitor with dimensions 23.53 inches (width) by 13.24 inches (height).", "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/Telefonica-Scientific-Research/oasstetc/tree/main"], ["paper", "https://dl.acm.org/doi/abs/10.1145/3725840"]], "_bibtex": "@article{lopez-cardona2025OASSTETCDatasetAlignment, title = {{{OASST-ETC}} dataset: {{Alignment}} signals from eye-tracking analysis of {{LLM}} responses}, shorttitle = {{{OASST-ETC Dataset}}}, author = {{Lopez-Cardona}, Angela and Idesis, Sebastian and {Barreda-{\\'A}ngeles}, Miguel and Abadal, Sergi and Arapakis, Ioannis}, year = 2025, journal = {Proceedings of the ACM on Human-Computer Interaction}, volume = {9}, number = {3}, pages = {1--29}, doi = {10.1145/3725840}, urldate = {2025-12-20} }"}, {"Name": "OneStop Eye Movements", "Data accessibility": "Free", "pymovements": "Yes", "Full name": null, "Dataset characteristics": "Consists of four sub-corpora, each using a different reading regime: ordinary reading for comprehension, information seeking, repeated reading, information seeking in repeated reading.", "# Participants": "360", "Age range": null, "Age mean\u00b1SD": "22.8\u00b15.6", "Native language": "English", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "30.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "19,425 (advanced) 15,737 (elementary) 19,221 (QA)", "Stimulus description": "Guardian articles including 3 multiple choice reading comprehension questions annotated according to the STARC scheme. Each articles comes in 3 difficulty levels: elementary, intermediate and advanced. ", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "https://huggingface.co/datasets/malmaud/onestop_qa", "Comprehension questions": "Yes", "Text annotation": "Word length, frequency and surprisal (GPT2), part-of-speech tags, syntactic dependency trees", "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/lacclab/OneStop-Eye-Movements"], ["paper", "https://osf.io/preprints/psyarxiv/kgxv5_v2"]], "_bibtex": "@article{berzak2025OneStop360ParticipantEnglish, title = {{{OneStop}}: {{A}} 360-participant {{English}} eye tracking dataset with different reading regimes}, shorttitle = {{{OneStop}}}, author = {Berzak, Yevgeni and Malmaud, Jonathan and Shubi, Omer and Meiri, Yoav and Lion, Ella and Levy, Roger}, year = 2025, journal = {Scientific Data}, volume = {12}, number = {1}, pages = {1995}, publisher = {Nature Publishing Group}, issn = {2052-4463}, doi = {10.1038/s41597-025-06272-2}, urldate = {2026-01-22}, copyright = {2025 The Author(s)}, langid = {english}, keywords = {Human behaviour} }"}, {"Name": "PSC", "Data accessibility": "Free", "pymovements": null, "Full name": "Potsdam Sentence Corpus", "Dataset characteristics": "Includes human predictability norms", "# Participants": "222", "Age range": "16-84", "Age mean\u00b1SD": null, "Native language": "German", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "144.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Sentences are constructed naturally around a target word", "Stimulus language": "German", "Stimulus language family": "Germanic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": "The data seems to be scattered across multiple repositories. It is not entirely clear what is available.", "Eye-tracker": "EyeLink I, EyeLink II", "Mount": null, "Sampling frequency (Hz)": "250 / 500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2012-vwr-heister-wuerzner-kliegl"], ["data", "https://gitup.uni-potsdam.de/pmr2/2017-pbr-hohenstein-matuschek-kliegl"], ["data", "https://gitup.uni-potsdam.de/pmr2/2007-jepgen-kliegl"], ["data", "https://gitup.uni-potsdam.de/pmr2/2007-br-dambacher-kliegl"], ["paper", "https://www.tandfonline.com/doi/abs/10.1080/09541440340000213"]], "_bibtex": "@article{kliegl2004psc, title = {Length, frequency, and predictability effects of words on eye movements in reading}, author = {Kliegl, Reinhold and Grabner, Ellen and Rolfs, Martin and Engbert, Ralf}, year = 2004, journal = {European Journal of Cognitive Psychology}, volume = {16}, number = {1-2}, pages = {262--284}, publisher = {Routledge}, doi = {10.1080/09541440340000213} }"}, {"Name": "PSC II", "Data accessibility": "Free", "pymovements": null, "Full name": "Potsdam Sentence Corpus II", "Dataset characteristics": null, "# Participants": "273", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "144.0", "Items per subject": "144", "Mean\u00b1SD words per item": "8.54\u00b11.44", "Mean\u00b1SD chars per item": "54.58\u00b110.67", "Mean\u00b1SD chars per word": "5.26\u00b12.59", "Mean\u00b1SD chars seen per participant": "7860", "Mean\u00b1SD words seen per participant": "1230", "Total # chars all items": "7860", "Total # words all items": "1230", "Stimulus description": "Sentences from Potsdam Sentence Corpus.", "Stimulus language": "German", "Stimulus language family": "Germanic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Single sentences", "Stimulus source": "Newspapers", "Comprehension questions": null, "Text annotation": "Word frequency, incremental cloze predictability", "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "yes", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": null, "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2012-vwr-heister-wuerzner-kliegl/-/tree/main?ref_type=heads"], ["paper", "https://gitup.uni-potsdam.de/pmr2/2012-vwr-heister-wuerzner-kliegl/-/tree/main?ref_type=heads"]], "_bibtex": "@incollection{heister2012AnalysingLargeDatasets, title = {Analysing large datasets of eye movements during reading}, booktitle = {Visual {{Word Recognition Volume}} 2}, author = {Heister, Julian and W{\\\"u}rzner, Kay-Michael and Kliegl, Reinhold}, year = 2012, publisher = {Psychology Press} }"}, {"Name": "PSC with EEG", "Data accessibility": "Unclear", "pymovements": null, "Full name": null, "Dataset characteristics": "30 participants read PSC sentences with ET and EEG tracking", "# Participants": "30", "Age range": "17-37", "Age mean\u00b1SD": "23", "Native language": "German", "Inclusion criteria": "Native Speaker", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "144.0", "Items per subject": "144", "Mean\u00b1SD words per item": "7.9", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": "1138", "Total # chars all items": null, "Total # words all items": "1138", "Stimulus description": "Data of 4 groups are compared in this study. A group of 24 high-school students and a group of 32 older readers read the Potsdam Sentence Corpus (PSC; Kliegl et al., 2004, 2006) with easy questions.1 The young adults averaged 17.6 years of age (SD = 0.6, range: 16\u201318 years), and the older adults averaged 70.6 years of age (SD = 4.0, range: 65\u201384 years). An age-matched group of 30 high-school students and 25 old readers read the PSC with frequent, difficult questions. The hard-question young adults averaged 18.5 years of age (SD = 0.9, range: 17\u201320 years) and the hard-question old adults averaged 68.0 years of age (SD = 3.3, range: 65\u201376 years).", "Stimulus language": "German", "Stimulus language family": "Germanic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Single sentences", "Stimulus source": "PSG", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "IView-X", "Mount": "Table mount, chin and forehead rest", "Sampling frequency (Hz)": "240", "Eye-to-screen distance (cm)": "85", "Eye-to-camera distance (cm)": null, "Monitor": "17-in. (43.18-cm) monitor (SyncMaster 171T TFT [Samsung Group, Seoul, South Korea], 60 Hz vertical refresh)", "Resolution": "800 \u00d7 600", "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2011-jepgen-dimigen-sommer-hohlfeld-jacobs-kliegl"], ["paper", "https://gitup.uni-potsdam.de/pmr2/2011-jepgen-dimigen-sommer-hohlfeld-jacobs-kliegl/-/blob/main/Dimigen.et.al.2011.JEPGEN.2011.Eyetracking+EEG.pdf?ref_type=heads"]], "_bibtex": "@article{dimigen2011CoregistrationEyeMovements, title = {Coregistration of eye movements and {{EEG}} in natural reading: {{Analyses}} and review}, shorttitle = {Coregistration of eye movements and {{EEG}} in natural reading}, author = {Dimigen, Olaf and Sommer, Werner and Hohlfeld, Annette and Jacobs, Arthur M. and Kliegl, Reinhold}, year = 2011, journal = {Journal of Experimental Psychology. General}, volume = {140}, number = {4}, pages = {552--572}, issn = {1939-2222}, doi = {10.1037/a0023885}, langid = {english}, pmid = {21744985}, keywords = {Adolescent,Adult,Electroencephalography,Evoked Potentials Visual,Eye Movements,Female,Fixation Ocular,Humans,Male,Reading,Visual Perception,Young Adult} }"}, {"Name": "PSR", "Data accessibility": "Free", "pymovements": "No", "Full name": "Persian Sentence Reading Corpus of Eye Movements", "Dataset characteristics": null, "# Participants": "60", "Age range": null, "Age mean\u00b1SD": "29.60\u00b14.03", "Native language": "Persian, Azari", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "10 participants were monolingual Persian speakers, while 50 were bilinguals of Azari", "Reading instructions": null, "# Items": "99.0", "Items per subject": "99", "Mean\u00b1SD words per item": "10.27\u00b11.01", "Mean\u00b1SD chars per item": "55.76\u00b16.74", "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "60'900", "Stimulus description": "Naturalistic sentences, taken from the Bijankhan corpus", "Stimulus language": "Persian", "Stimulus language family": "Iranian", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": "Bijankhan corpus", "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": "Head and chin rest", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "73", "Eye-to-camera distance (cm)": "66", "Monitor": "24-inch monitor", "Resolution": " 1024 \u00d7 768", "Text presentation": null, "Font": "Courier New", "Font size": "18", "Monospaced": "yes", "Character per visual angle": "2.38", "Font color": "Black", "Spacing": null, "Background color": "White", "_links": [["data", "https://osf.io/4w362/"], ["paper", "https://pmc.ncbi.nlm.nih.gov/articles/PMC11634938/#Abs1"]], "_bibtex": "@article{tekbudak2024PSRCorpusPersian, title = {The {{PSR}} corpus: {{A Persian}} sentence reading corpus of eye movements}, shorttitle = {The {{PSR}} corpus}, author = {Tekbudak, Zohre Soleymani and Purmohammad, Mehdi and {\\\"O}zkan, Ay{\\c s}eg{\\\"u}l and Acart{\\\"u}rk, Cengiz}, year = 2024, journal = {Behavior Research Methods}, volume = {57}, number = {1}, pages = {14}, issn = {1554-3528}, doi = {10.3758/s13428-024-02517-x}, urldate = {2025-12-20}, langid = {english}, keywords = {Eye movements,Oculomotor control in reading,Persian,Right-to-left script reading,Silent reading} }"}, {"Name": "Parafoveal-on-foveal", "Data accessibility": "Unclear", "pymovements": null, "Full name": null, "Dataset characteristics": null, "# Participants": "111", "Age range": "16-76", "Age mean\u00b1SD": null, "Native language": "German", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Data of 4 groups are compared in this study. A group of 24 high-school students and a group of 32 older readers read the Potsdam sentence corpus (PSC; Kliegl et al., 2004, 2006) with easy questions.1 The young adults averaged 17.6 years of age (SD = 0.6, range: 16\u201318 years), and the older adults averaged 70.6 years of age (SD = 4.0, range: 65\u201384 years). An age-matched group of 30 high-school students and 25 old readers read the PSC with frequent, difficult questions. The hard-question young adults averaged 18.5 years of age (SD = 0.9, range: 17\u201320 years) and the hard-question old adults averaged 68.0 years of age (SD = 3.3, range: 65\u201376 years).", "Reading instructions": null, "# Items": "144.0", "Items per subject": "144", "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "1138", "Stimulus description": null, "Stimulus language": "German", "Stimulus language family": "Germanic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Single sentences", "Stimulus source": "PSC", "Comprehension questions": "Yes, depending on condition (hard or easy questions, and at different frequencies)", "Text annotation": "Same as PSC", "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink II, EyeLink I (old adults easy question group)", "Mount": null, "Sampling frequency (Hz)": "500, 250 (old adults, easy question group)", "Eye-to-screen distance (cm)": "60", "Eye-to-camera distance (cm)": null, "Monitor": "21-in. EYE-Q 650 monitor (frame rate 75 Hz)", "Resolution": "832 \u00d7 632", "Text presentation": null, "Font": "New Courier", "Font size": "12", "Monospaced": "yes", "Character per visual angle": "2.63", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2013-qjep-wotschack-kliegl"], ["paper", "https://journals.sagepub.com/doi/10.1080/17470218.2011.625094"]], "_bibtex": "@article{wotschack2013ReadingStrategyModulates, title = {Reading strategy modulates parafoveal-on-foveal effects in sentence reading}, author = {Wotschack, Christiane and Kliegl, Reinhold}, year = 2013, journal = {Quarterly Journal of Experimental Psychology}, volume = {66}, number = {3}, pages = {548--562}, publisher = {SAGE Publications}, issn = {1747-0218}, doi = {10.1080/17470218.2011.625094}, urldate = {2026-01-06}, langid = {english} }"}, {"Name": "Passage Reading", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Corpus constructed specifically to study parafoveal viewing", "# Participants": "48", "Age range": null, "Age mean\u00b1SD": "26.48\u00b114.83", "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "40.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Text passages constructed around target words which will either appear in parafovea or not", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/4qtnf/"], ["paper", "https://www.tandfonline.com/doi/full/10.1080/20445911.2017.1340303#abstract"]], "_bibtex": "@article{parker2017passage-reading, title = {Predictability effects during reading in the absence of parafoveal preview}, author = {Parker, Adam J. and Kirkby, Julie A. and Slattery, Timothy J.}, year = 2017, journal = {Journal of Cognitive Psychology}, volume = {29}, number = {8}, pages = {902--911}, publisher = {Routledge}, doi = {10.1080/20445911.2017.1340303} }"}, {"Name": "PoTeC", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Potsdam Textbook Corpus", "Dataset characteristics": "Corpus designed to study within-participant and across-participants expert reading", "# Participants": "75", "Age range": null, "Age mean\u00b1SD": "24.2\u00b14.2", "Native language": "German", "Inclusion criteria": "Must be in either 1st year of BSc with main subject Biology or Physics (=undegraduate), or already have at least a BSc degree in either Biology or Physics (=graduate); native speakers", "Exclusion criteria": "Participants have or persue a degree in both subjects", "Other characteristics": "Native speakers", "Reading instructions": "read naturally", "# Items": "12.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "1896", "Stimulus description": "Texts from undergraduate textbooks on physics or biology", "Stimulus language": "German", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes; 3 text questions and 3 background questions after each text", "Text annotation": "See \"Summary of the available features\" in the paper. Many features at different levels (e.g. surprisal, PoS-tags, education, ...)", "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": "Chin-and-forehead rest", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": "One page, header on separate page", "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": "White", "Spacing": null, "Background color": "Black", "_links": [["data", "https://github.com/DiLi-Lab/PoTeC"], ["paper", "https://arxiv.org/abs/2403.00506"]], "_bibtex": "@article{jakobi2025PoTeCGermanNaturalistic, title = {{{PoTeC}}: {{A German}} naturalistic eye-tracking-while-reading corpus}, shorttitle = {{{PoTeC}}}, author = {Jakobi, Deborah N. and Kern, Thomas and Reich, David R. and Haller, Patrick and J{\\\"a}ger, Lena A.}, year = 2025, journal = {Behavior Research Methods}, volume = {57}, number = {8}, pages = {211}, issn = {1554-3528}, doi = {10.3758/s13428-024-02536-8}, urldate = {2025-12-20}, langid = {english}, keywords = {Corpus,Eye-tracking,German,Reading} }"}, {"Name": "PopSci Corpus", "Data accessibility": "Not available", "pymovements": "No", "Full name": null, "Dataset characteristics": null, "# Participants": "17", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "16.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "20000", "Stimulus description": "Popular science texts on natural and applied science", "Stimulus language": "German", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "no", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["paper", "https://www.researchgate.net/publication/245032535_PopSci_A_reading_corpus_of_popular_science_texts_with_rich_multi-level_annotations_A_case_study"]], "_bibtex": "@inproceedings{popsci, title = {{{PopSci}}: {{A}} reading corpus of popular science texts with rich multi-level annotations. {{A}} case study}, booktitle = {Book of {{Abstracts}} of the 17th {{European Conference}} on {{Eye Movements}}.}, author = {Wolfer, Sascha and {M{\\\"u}ller-Feldmeth}, Daniel and Konieczny, Lars and Held, Uli and Maksymski, K. and {Hansen-Schirra}, Silvia and Hansen, S. and Auer, Peter}, year = 2013, pages = {1--2}, publisher = {Bern Open Publishing} }"}, {"Name": "Potsdam-Allahabad Hindi Eyetracking Corpus", "Data accessibility": "Unclear", "pymovements": null, "Full name": null, "Dataset characteristics": "The same sentences were read in Hindi and Urdu script in 2 separate sessions.", "# Participants": "30", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "153.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "2610", "Stimulus description": "Sentences are selected from the Hindi-Urdu treebank", "Stimulus language": "Hindi, Urdu", "Stimulus language family": "Indo-Aryan, Indo-Aryan", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "unclear", "Gaze events": "unclear", "Reading measures": "unclear", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "SMI iView X HED", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["paper", "https://www.mdpi.com/1995-8692/8/2/8"]], "_bibtex": "@article{Husain-Vasishth-Srinivasan-2014-hindi, title = {Integration and prediction difficulty in {{Hindi}} sentence comprehension: {{Evidence}} from an eye-tracking corpus}, author = {Husain, Samar and Vasishth, Shravan and Srinivasan, Narayanan}, year = 2014, journal = {Journal of Eye Movement Research}, volume = {8}, number = {2}, pages = {1--12}, doi = {10.16910/jemr.8.2.3} }"}, {"Name": "Provo Corpus", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": null, "# Participants": "84", "Age range": null, "Age mean\u00b1SD": null, "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "55.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "2750", "Stimulus description": "Short texts from various different sources", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": null, "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/sjefs/"], ["paper", "https://link.springer.com/article/10.3758/s13428-017-0908-4"]], "_bibtex": "@article{Luke2017provo, title = {The {{Provo Corpus}}: {{A}} large eye-tracking corpus with predictability norms}, author = {Luke, Steven G. and Christianson, Kiel}, year = 2017, journal = {Behavior Research Methods}, volume = {50}, number = {2}, pages = {826--833}, publisher = {{Springer Science and Business Media LLC}}, issn = {1554-3528}, doi = {10.3758/s13428-017-0908-4} }"}, {"Name": "RSC", "Data accessibility": "Free", "pymovements": null, "Full name": "Russian Sentence Corpus", "Dataset characteristics": "Includes human predictability norms", "# Participants": "96", "Age range": null, "Age mean\u00b1SD": "24.0", "Native language": "Russian", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "144.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "1362", "Stimulus description": "Sentences are constructed naturally around a target word selected from the Russian National Corpus.", "Stimulus language": "Russian", "Stimulus language family": "Slavic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/x5q2r/"], ["paper", "https://link.springer.com/article/10.3758/s13428-018-1051-6"]], "_bibtex": "@article{Laurinavichyute2018RSC, title = {Russian sentence corpus: {{Benchmark}} measures of eye movements in reading in russian}, author = {Laurinavichyute, Anna K. and Sekerina, Irina A. and Alexeeva, Svetlana and Bagdasaryan, Kristine and Kliegl, Reinhold}, year = 2018, journal = {Behavior Research Methods}, volume = {51}, number = {3}, pages = {1161--1178}, publisher = {{Springer Science and Business Media LLC}}, issn = {1554-3528}, doi = {10.3758/s13428-018-1051-6} }"}, {"Name": "RaCCooNS", "Data accessibility": "Free", "pymovements": null, "Full name": "Radboud Coregistration Corpus of Narrative Sentences", "Dataset characteristics": "Co-registration of EEG data", "# Participants": "37", "Age range": null, "Age mean\u00b1SD": "26.2", "Native language": "Dutch", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "200.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "2783", "Stimulus description": "Narrative sentences", "Stimulus language": "Dutch", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": null, "Data license": "CC-BY-NCSA- 4.0", "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://data.ru.nl/collections/ru/cls/eeg_et_sentence_reading_dsc_556"], ["paper", "https://link.springer.com/article/10.1007/s10579-023-09684-x"]], "_bibtex": "@article{Frank2023, title = {An eye-tracking-with-{{EEG}} coregistration corpus of narrative sentences}, author = {Frank, Stefan L. and Aumeistere, Anna}, year = 2023, journal = {Language Resources and Evaluation}, pages = {1--17}, publisher = {{Springer Science and Business Media LLC}}, issn = {1574-0218}, doi = {10.1007/s10579-023-09684-x} }"}, {"Name": "RastrOS", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Includes human predictability norms.", "# Participants": "37", "Age range": null, "Age mean\u00b1SD": "22.2\u00b14.7", "Native language": "Portugese", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "50.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "2494", "Stimulus description": "Paragraphs from journalistic, literary and popular science texts", "Stimulus language": "Portugese", "Stimulus language family": "Romance", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "- The L\u00e1cio-Web corpusFootnote 7 (Aluisio et al., 2004) (11 paragraphs), a publicly available Portuguese corpus (free to download) compiled between 2002 to 2004; - literary texts from Public Domain Books, i.e., they are no longer under copyright (10 paragraphs); - and more recent texts from scientific dissemination websites and from news portals (the remaining 29 paragraphs) to account for new lexical items (words and terms).", "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/9jxg3/"], ["paper", "https://link.springer.com/article/10.1007/s10579-022-09609-0"]], "_bibtex": "@article{Leal2022rastros, title = {{{RastrOS Project}}: {{Natural Language Processing}} contributions to the development of an eye-tracking corpus with predictability norms for {{Brazilian Portuguese}}}, author = {Leal, Sidney Evaldo and Lukasova, Katerina and {Carthery-Goulart}, Maria Teresa and Alu{\\'i}sio, Sandra Maria}, year = 2022, journal = {Language Resources and Evaluation}, volume = {56}, number = {4}, pages = {1333--1372}, publisher = {{Springer Science and Business Media LLC}}, issn = {1574-0218}, doi = {10.1007/s10579-022-09609-0} }"}, {"Name": "Reading Attention", "Data accessibility": "Unclear", "pymovements": null, "Full name": null, "Dataset characteristics": "The participants were asked to judge the relevance of the document for the query.", "# Participants": "29", "Age range": "17-28", "Age mean\u00b1SD": null, "Native language": "Chinese", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "60.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Each document belongs to one out of 15 queries. Each participant reads 15 documents, 1 for each query. Simplified Chinese script, Mandarin (inferred from stimulus example, information not explicitly provided).", "Stimulus language": "Mandarin (simp. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "no", "Scripts": "unclear", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "Tobii X2-30", "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "http://www.thuir.cn/group/~YQLiu/datasets/CIKM18Li.zip"], ["paper", "https://dl.acm.org/doi/10.1145/3269206.3271764"]], "_bibtex": "@inproceedings{li2018reading-attention, title = {Understanding reading attention distribution during relevance judgement}, booktitle = {Proceedings of the 27th {{ACM International Conference}} on {{Information}} and {{Knowledge Management}}}, author = {Li, Xiangsheng and Liu, Yiqun and Mao, Jiaxin and He, Zexue and Zhang, Min and Ma, Shaoping}, year = 2018, series = {Cikm '18}, pages = {733--742}, publisher = {Association for Computing Machinery}, isbn = {978-1-4503-6014-2}, keywords = {attention,relevance judgement,user behavior analysis} }"}, {"Name": "Reading Brain", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "fMRi, Eyetracking, diffusion tensor imaging and behavior test data available", "# Participants": "51", "Age range": "18-40", "Age mean\u00b1SD": null, "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": "Scanning of structural (T1-weighted) and resting-state data, participants performed a reading task with simultaneous eye-tracking and fMRI scanning, and the session ended with a diffusion tensor imaging (DTI) scan. The second session consisted of only behavioral tests, including 5 standardized tests: the Attention Network Test (ANT), Gray's Silent Reading Test (GSRT), Letter-Number Sequencing (LNS), Peabody Picture Vocabulary Test (PPVT-4), and Tower of Hanoi (ToH), followed by a survey: Reading Background Questionnaire (RBQ), which includes familiarity rating for the topics of our 5 reading texts.", "Reading instructions": null, "# Items": "5.0", "Items per subject": "5", "Mean\u00b1SD words per item": "306.2\u00b12.8", "Mean\u00b1SD chars per item": "1533.8\u00b173.0", "Mean\u00b1SD chars per word": "5.010", "Mean\u00b1SD chars seen per participant": "7669", "Mean\u00b1SD words seen per participant": "1531", "Total # chars all items": "7669", "Total # words all items": "1531", "Stimulus description": "5 short texts, with STEM topics", "Stimulus language": "English", "Stimulus language family": "Germanic ", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "http://blclab.org/wp-content/uploads/2019/08/Reading_Brain_Methods_L1Adults.pdf", "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": null, "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": "Long-range mount MRI-compatible", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "143", "Eye-to-camera distance (cm)": "120", "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": "0.88", "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://openneuro.org/datasets/ds003974/versions/3.0.0"], ["paper", "https://www.nature.com/articles/s41598-019-47176-7"]], "_bibtex": "@article{hsuNeurocognitiveSignaturesNaturalistic2019, title = {Neurocognitive signatures of naturalistic reading of scientific texts: {{A}} fixation-related {{fMRI}} study}, shorttitle = {Neurocognitive {{Signatures}} of {{Naturalistic Reading}} of {{Scientific Texts}}}, author = {Hsu, Chun-Ting and Clariana, Roy and Schloss, Benjamin and Li, Ping}, year = 2019, journal = {Scientific Reports}, volume = {9}, number = {1}, pages = {10678}, publisher = {Nature Publishing Group}, issn = {2045-2322}, doi = {10.1038/s41598-019-47176-7}, urldate = {2025-07-26}, copyright = {2019 The Author(s)}, langid = {english}, keywords = {Human behaviour,Language,Reading} }"}, {"Name": "Reading Brain L2", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "fMRi, eyetracking, diffusion tensor imaging and behavior test data available; includes a reading task with simultaneous eye-tracking and fMRI scanning, diffusion tensor imaging (DTI) scan, behavioral tests, including 5 standardized tests: the Attention Network Test (ANT), Gray's Silent Reading Test (GSRT), Letter-Number Sequencing (LNS), Peabody Picture Vocabulary Test (PPVT-4), and Tower of Hanoi (ToH).", "# Participants": "56", "Age range": null, "Age mean\u00b1SD": null, "Native language": "Mandarin", "Inclusion criteria": "Bilingual, English, Mandarin", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "5.0", "Items per subject": "5", "Mean\u00b1SD words per item": "306.2\u00b12.8", "Mean\u00b1SD chars per item": "1533.8\u00b173.0", "Mean\u00b1SD chars per word": "5.010", "Mean\u00b1SD chars seen per participant": "7669", "Mean\u00b1SD words seen per participant": "1531", "Total # chars all items": "7669", "Total # words all items": "1531", "Stimulus description": "5 short texts, with STEM topics", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "http://blclab.org/wp-content/uploads/2019/08/Reading_Brain_Methods_L1Adults.pdf", "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": null, "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": null, "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://openneuro.org/datasets/ds003988/versions/1.0.0"], ["paper", "https://www.sciencedirect.com/science/article/pii/S0911604417300623?via%3Dihub"], ["paper", "https://doi.org/10.1007/s11145-017-9781-x"], ["paper", "https://doi.org/10.1038/s41598-019-47176-7"]], "_bibtex": "@dataset{ds003988:1.0.0, author = {Ping Li and Chun-Ting Hsu and Ben Schloss and Anya Yu and Lindsey Ma and Marissa Scotto and Friederike Seyfried and Chanyuan Gu}, title = {\"The Reading Brain Project L2 Adults\"}, year = {2022}, doi = {doi:10.18112/openneuro.ds003988.v1.0.0}, publisher = {OpenNeuro} }"}, {"Name": "SB-SAT", "Data accessibility": "Free", "pymovements": "Yes", "Full name": "Stony Brook Scholastic Assessment Test", "Dataset characteristics": "Includes subjective difficulty ratings of each text", "# Participants": "95", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": "Undergraduate students", "Reading instructions": null, "# Items": "4.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "SAT (Scholastic Assessment Test) passages taken from practice tests for reading comprehension", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "no", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/ahnchive/SB-SAT/tree/master?tab=readme-ov-file"], ["paper", "https://dl.acm.org/doi/abs/10.1145/3379156.3391335"]], "_bibtex": "@inproceedings{ahn2020PredictingReadingComprehension, title = {Towards predicting reading comprehension from gaze behavior}, booktitle = {{{ACM Symposium}} on {{Eye Tracking Research}} and {{Applications}}}, author = {Ahn, Seoyoung and Kelton, Conor and Balasubramanian, Aruna and Zelinsky, Greg}, year = 2020, series = {{{ETRA}} '20 short papers}, pages = {1--5}, publisher = {Association for Computing Machinery}, doi = {10.1145/3379156.3391335}, articleno = {32}, isbn = {978-1-4503-7134-6}, keywords = {Eye tracking,Machine learning,Reading dataset,Text comprehension prediction} }"}, {"Name": "Story Reading", "Data accessibility": "Restricted", "pymovements": "No", "Full name": null, "Dataset characteristics": "This dataset is mentioned in papers: Time Course and Hazard Function: A Distributional Analysis of Fixation Duration in Reading, Mixed Responses: Why Readers Spend Less Time at Unfavorable Landing Positions", "# Participants": "42", "Age range": null, "Age mean\u00b1SD": null, "Native language": "English, Chinese, Japanese, Korean", "Inclusion criteria": "Native Speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": null, "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Novels", "Stimulus language": "English, Chinese, Japanese, Korean", "Stimulus language family": "Germanic, Sinitic, Japonic, Koreanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "unclear", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": "Only fixation durations are available", "Eye-tracker": "EyeLink I (Korean study), EyeLink II (all other studies)", "Mount": "Head mount", "Sampling frequency (Hz)": "250 (EyeLink I), 500 (EyeLink II)", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitup.uni-potsdam.de/pmr2/2009-jemr-feng"], ["paper", "https://www.mdpi.com/1995-8692/3/2/8"]], "_bibtex": "@article{feng2009TimeCourseHazard, title = {Time course and hazard function: {{A}} distributional analysis of fixation duration in reading}, shorttitle = {Time {{Course}} and {{Hazard Function}}}, author = {Feng, Gary}, year = 2009, journal = {Journal of Eye Movement Research}, volume = {3}, number = {2}, pages = {1--23}, publisher = {publisher}, issn = {1995-8692}, doi = {10.16910/jemr.3.2.3}, urldate = {2025-12-20}, copyright = {http://creativecommons.org/licenses/by/3.0/}, langid = {english}, keywords = {distribution,fixation duration,reading,statistical modeling} }"}, {"Name": "Swedish Dyslexia", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": null, "# Participants": "185", "Age range": "9-10", "Age mean\u00b1SD": null, "Native language": "Swedish", "Inclusion criteria": "High-risk (HR) group required that subjects (1) had Swedish as first language; (2) performed in the lower 5th percentile of the full cohort on two standardized tests of word decoding; and, (3) experienced persistent problems in learning to read according to an independent assessment completed by the classroom teacher, control group of low-risk (LR) subjects with average or above average word reading skills were pairwise matched to the HR subjects on sex, first language, school class, and non-verbal ability ", "Exclusion criteria": "Intellectual disability", "Other characteristics": null, "Reading instructions": "Read the text silently and to answer three questions about its content afterwards", "# Items": "1.0", "Items per subject": "1", "Mean\u00b1SD words per item": "46", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": "46", "Total # chars all items": null, "Total # words all items": "46", "Stimulus description": "1 text on 1 page consisting of the sentences", "Stimulus language": "Swedish", "Stimulus language family": "Germanic", "Stimulus naturalness": null, "Stimulus length category": null, "Stimulus source": null, "Comprehension questions": "Yes, after text", "Text annotation": "Type token ratio, word variation ratio", "Stimulus license": null, "Raw data": "yes", "Gaze events": "no", "Reading measures": "no", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "Ober-2TM", "Mount": "Goggle-based chin and forehead rest", "Sampling frequency (Hz)": "100", "Eye-to-screen distance (cm)": "45 viewing distant (paper to eye?)", "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": "One page", "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": "White", "_links": [["data", "https://figshare.com/collections/Screening_for_Dyslexia_Using_Eye_Tracking_During_Reading/3521379"], ["paper", "https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0165508"]], "_bibtex": "@article{benfattoScreeningDyslexiaUsing2016, title = {Screening for dyslexia using eye tracking during reading}, author = {Benfatto, Mattias Nilsson and Seimyr, Gustaf {\\\"O}qvist and Ygge, Jan and Pansell, Tony and Rydberg, Agneta and Jacobson, Christer}, year = 2016, journal = {PLOS ONE}, volume = {11}, number = {12}, pages = {e0165508}, publisher = {Public Library of Science}, issn = {1932-6203}, doi = {10.1371/journal.pone.0165508}, urldate = {2025-07-16}, langid = {english}, keywords = {_tablet,Algorithms,Attention,Children,Dyslexia,Eye movements,Eyes,Language,Schools} }"}, {"Name": "TECO", "Data accessibility": "Free", "pymovements": null, "Full name": "Tsukuba Eye-tracking Corpus", "Dataset characteristics": "Flesch-Kincaid Grade Levels are available for all texts, range from 6-16", "# Participants": "41", "Age range": "18-25", "Age mean\u00b1SD": "21", "Native language": "Japanese", "Inclusion criteria": null, "Exclusion criteria": "More than 1 year in a English speaking country, below 75 % accuracy in reading comprehension test", "Other characteristics": null, "Reading instructions": null, "# Items": "30.0", "Items per subject": "30", "Mean\u00b1SD words per item": "335.07\u00b135.88", "Mean\u00b1SD chars per item": "1648.54", "Mean\u00b1SD chars per word": "4.92\u00b12.54", "Mean\u00b1SD chars seen per participant": "49'456", "Mean\u00b1SD words seen per participant": "10'000", "Total # chars all items": "49'456", "Total # words all items": ">10'000", "Stimulus description": "Passages from Eiken Test, with three different difficulty levels (B2-A2)", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": null, "Stimulus length category": "Text passages", "Stimulus source": "Eiken test", "Comprehension questions": "Yes, in between texts", "Text annotation": "Flesch-Kincaid level per text, word frequency, word position within the passage, line-edge position (i.e., line-initial or line-final, treated as 2 distinct variables), and the presence of punctuation.", "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000", "Mount": "Desktop mount,chin rest and forehead", "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": "70", "Eye-to-camera distance (cm)": null, "Monitor": "Dell 19-inchcomputer monitor ", "Resolution": "1280 \u00d7 1024", "Text presentation": null, "Font": "Consolas", "Font size": "18", "Monospaced": "yes", "Character per visual angle": "2.44", "Font color": "Black", "Spacing": null, "Background color": "Light grey", "_links": [["data", "https://osf.io/nxrm8/files/osfstorage"], ["paper", "https://www.sciencedirect.com/science/article/pii/S2772766124000296"]], "_bibtex": "@article{nahatame_teco_2024, title = {{{TECO}}: {{An}} eye-tracking corpus of {{Japanese L2 English}} learners' text reading}, shorttitle = {{{TECO}}}, author = {Nahatame, Shingo and Ogiso, Tomoko and Kimura, Yukino and Ushiro, Yuji}, year = 2024, journal = {Research Methods in Applied Linguistics}, volume = {3}, number = {2}, pages = {100123}, issn = {2772-7661}, doi = {10.1016/j.rmal.2024.100123}, urldate = {2025-07-16}, keywords = {Corpus,Eye tracking,Reading,Second language} }"}, {"Name": "TURead", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Includes human predictability norms", "# Participants": "196", "Age range": null, "Age mean\u00b1SD": "22.7\u00b12.6", "Native language": null, "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": "Silent and aloud reading", "# Items": "192.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Sentences selected from existing Turkish corpora based on target words; 37 stimuli consist of 2 or 3 sentences.", "Stimulus language": "Turkish", "Stimulus language family": "Turkic", "Stimulus naturalness": "Partially constructed", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": null, "Data license": null, "Other available data": null, "Notes on availability": "Only the measurements for the target words are available", "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "1000", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/w53cz/overview"], ["paper", "http://link.springer.com/article/10.3758/s13428-023-02120-6"]], "_bibtex": "@article{Acartrk2023, title = {{{TURead}}: {{An}} eye movement dataset of {{Turkish}} reading}, author = {Acart{\\\"u}rk, Cengiz and {\\\"O}zkan, Ay{\\c s}eg{\\\"u}l and Pek{\\c c}etin, Tu{\\u g}{\\c c}e Nur and Ormano{\\u g}lu, Zuhal and K{\\i}rk{\\i}c{\\i}, Bilal}, year = 2023, journal = {Behavior Research Methods}, pages = {1--14}, publisher = {{Springer Science and Business Media LLC}}, issn = {1554-3528}, doi = {10.3758/s13428-023-02120-6} }"}, {"Name": "Task Effects in Human Reading", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "The experiment included 2 conditions: preview and no preview. In the preview condition, participants first read the question, then they read the text, and then they saw the question again with 4 answer choices and had to select 1 answer. In the no preview condition, the question was not presented at the beginning of the trial, only after the text had been read.", "# Participants": "22", "Age range": null, "Age mean\u00b1SD": null, "Native language": null, "Inclusion criteria": "Native speaker", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "20.0", "Items per subject": "20", "Mean\u00b1SD words per item": "323", "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": "6460", "Total # chars all items": null, "Total # words all items": "6460", "Stimulus description": "Newspaper texts", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "Deepmind question answering corpus (10 from CNN section, 10 Daily Mails Section)", "Comprehension questions": "Yes for all texts", "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": "Not all data is availble ", "Eye-tracker": "EyeLink 1000", "Mount": null, "Sampling frequency (Hz)": "2000", "Eye-to-screen distance (cm)": "60", "Eye-to-camera distance (cm)": null, "Monitor": "24 inch LCD screen", "Resolution": null, "Text presentation": null, "Font": "Lucida Sans Typewriter", "Font size": "20", "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://gitlab.com/m-hahn/task-effects-neural-networks/-/blob/main/study2/analysis/experiment_data/processed_data/aggregated_data_csv/data-full.csv?ref_type=heads"], ["paper", "https://www.sciencedirect.com/science/article/abs/pii/S0010027722002773"]], "_bibtex": "@article{hahn_modeling_2023, title = {Modeling task effects in human reading with neural network-based attention}, author = {Hahn, Michael and Keller, Frank}, year = 2023, journal = {Cognition}, volume = {230}, pages = {105289}, issn = {0010-0277}, doi = {10.1016/j.cognition.2022.105289}, urldate = {2025-07-16}, keywords = {Computational modeling,Reading,Task effects} }"}, {"Name": "The Little Prince Corpus", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Parallel corpus of Cantonese and Mandarin; 2 parts: Naturalistic reading with the only task being comprehension questions and task specific reading (e.g. finding a specific information in the text). ", "# Participants": "60", "Age range": null, "Age mean\u00b1SD": "Cantonese: 21.7, Mandarin: 28.8", "Native language": "Cantonese, Mandarin", "Inclusion criteria": null, "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": null, "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": null, "Stimulus description": "Translations of \"the little Prince\" in simp. chars. Chinese characters and Cantonese. ", "Stimulus language": "Mandarin (simp. chars.), Cantonese (trad. chars.)", "Stimulus language family": "Sinitic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": "Translations of \"the little prince\" by Antoine de Saint-Exup\u00e9ry, no translation specifications in paper", "Comprehension questions": "Yes, in between texts", "Text annotation": "Word segmentation, PoS, syntactic distances inlcuding dependency depth , linear distance to head and root; word frequency, syllable number", "Stimulus license": null, "Raw data": "no", "Gaze events": "no", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": "Only average measurements for all participants are available, not for individuals", "Eye-tracker": null, "Mount": null, "Sampling frequency (Hz)": null, "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/CN-Eyetk/MCFIX"], ["paper", "https://aclanthology.org/2023.vardial-1.12/"]], "_bibtex": "@inproceedings{li-etal-2023-little-prince, title = {Comparing and predicting eye-tracking data of {{Mandarin}} and {{Cantonese}}}, booktitle = {Tenth {{Workshop}} on {{NLP}} for {{Similar Languages}}, {{Varieties}} and {{Dialects}} ({{VarDial}} 2023)}, author = {Li, Junlin and Peng, Bo and Hsu, Yu-yin and Chersoni, Emmanuele}, year = 2023, pages = {121--132}, publisher = {Association for Computational Linguistics}, doi = {10.18653/v1/2023.vardial-1.12} }"}, {"Name": "UCL Corpus", "Data accessibility": "Free", "pymovements": "Yes", "Full name": null, "Dataset characteristics": "Eye-tracking and self-paced-reading data.", "# Participants": "43", "Age range": null, "Age mean\u00b1SD": "25.8", "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": null, "# Items": "205.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "2339", "Stimulus description": "Sentences selected from free online novels.", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "no", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "no", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink II", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://static-content.springer.com/esm/art%3A10.3758%2Fs13428-012-0313-y/MediaObjects/13428_2012_313_MOESM1_ESM.zip"], ["paper", "https://link.springer.com/article/10.3758/s13428-012-0313-y"]], "_bibtex": "@article{frankReadingTimeData2013ucl, title = {Reading time data for evaluating broad-coverage models of {{English}} sentence processing}, author = {Frank, Stefan L. and Fernandez Monsalve, Irene and Thompson, Robin L. and Vigliocco, Gabriella}, year = 2013, journal = {Behavior Research Methods}, volume = {45}, number = {4}, pages = {1182--1190}, issn = {1554-3528}, doi = {10.3758/s13428-012-0313-y}, urldate = {2025-07-26}, langid = {english}, keywords = {Eye tracking,Information Processing,Language Processing,Model evaluation,Natural Language Processing (NLP),Open Reading Frames,Reading,Self-paced reading,Sentence comprehension,Word-reading time,Working Memory} }"}, {"Name": "WebQAmGaze", "Data accessibility": "Free", "pymovements": null, "Full name": null, "Dataset characteristics": "Recording during question answering, questions answer spans in the text were annotated.", "# Participants": "194", "Age range": null, "Age mean\u00b1SD": null, "Native language": "German, English, Spanish", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": "124 English, 51 Spanish, and 19 German", "Reading instructions": null, "# Items": "38.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "Long texts: 370; short texts: 3010", "Stimulus description": "Wikipedia-style texts: 2 long texts and 36 short texts", "Stimulus language": "German, English, Spanish", "Stimulus language family": "Germanic, Germanic, Romance", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Text passages", "Stimulus source": null, "Comprehension questions": "Yes", "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "unclear", "Reading measures": "yes", "Scripts": "yes", "Data license": "CC-By Attribution 4.0 International", "Other available data": null, "Notes on availability": null, "Eye-tracker": "Webcam", "Mount": null, "Sampling frequency (Hz)": "24.93 (mean)", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://github.com/tfnribeiro/WebQAmGaze"], ["paper", "https://arxiv.org/abs/2303.17876"]], "_bibtex": "@misc{ribeiro2023webqamgaze, title = {{{WebQAmGaze}}: {{A}} multilingual webcam eye-tracking-while-reading dataset}, author = {Ribeiro, Tiago and Brandl, Stephanie and S{\\o}gaard, Anders and Hollenstein, Nora}, year = 2023, eprint = {2303.17876}, primaryclass = {cs.CL}, archiveprefix = {arXiv}, url={https://arxiv.org/abs/2303.17876} }"}, {"Name": "ZuCo 1", "Data accessibility": "Free", "pymovements": "No", "Full name": "Zurich Cognitive Language Processing Corpus", "Dataset characteristics": "Co-registration of EEG data and each sentence was part of a block with a specific task.", "# Participants": "12", "Age range": null, "Age mean\u00b1SD": "38\u00b19.8", "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": "Each sentence was read in a block with 1 task.", "# Items": "1107.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "21631", "Stimulus description": "Wikipedia sentences and movie reviews", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/q3zws/"], ["paper", "https://www.nature.com/articles/sdata2018291"]], "_bibtex": "@article{Hollenstein2018zuco1, title = {{{ZuCo}}, a simultaneous {{EEG}} and eye-tracking resource for natural sentence reading}, author = {Hollenstein, Nora and Rotsztejn, Jonathan and Troendle, Marius and Pedroni, Andreas and Zhang, Ce and Langer, Nicolas}, year = 2018, journal = {Scientific Data}, volume = {5}, number = {1}, pages = {1--13}, publisher = {{Springer Science and Business Media LLC}}, issn = {2052-4463}, doi = {10.1038/sdata.2018.291} }"}, {"Name": "ZuCo 2", "Data accessibility": "Free", "pymovements": "No", "Full name": "Zurich Cognitive Language Processing Corpus 2.0", "Dataset characteristics": "Co-registration of EEG data and each sentence was part of a block with a specific task.", "# Participants": "18", "Age range": null, "Age mean\u00b1SD": "34\u00b18.3", "Native language": "English", "Inclusion criteria": "Native speakers", "Exclusion criteria": null, "Other characteristics": null, "Reading instructions": "Each sentence was read in a block with 1 task.", "# Items": "739.0", "Items per subject": null, "Mean\u00b1SD words per item": null, "Mean\u00b1SD chars per item": null, "Mean\u00b1SD chars per word": null, "Mean\u00b1SD chars seen per participant": null, "Mean\u00b1SD words seen per participant": null, "Total # chars all items": null, "Total # words all items": "15138", "Stimulus description": "Wikipedia sentences", "Stimulus language": "English", "Stimulus language family": "Germanic", "Stimulus naturalness": "Naturalistic", "Stimulus length category": "Single sentences", "Stimulus source": null, "Comprehension questions": null, "Text annotation": null, "Stimulus license": null, "Raw data": "yes", "Gaze events": "yes", "Reading measures": "yes", "Scripts": "yes", "Data license": null, "Other available data": null, "Notes on availability": null, "Eye-tracker": "EyeLink 1000 Plus", "Mount": null, "Sampling frequency (Hz)": "500", "Eye-to-screen distance (cm)": null, "Eye-to-camera distance (cm)": null, "Monitor": null, "Resolution": null, "Text presentation": null, "Font": null, "Font size": null, "Monospaced": null, "Character per visual angle": null, "Font color": null, "Spacing": null, "Background color": null, "_links": [["data", "https://osf.io/q3zws/"], ["paper", "https://aclanthology.org/2020.lrec-1.18/"]], "_bibtex": "@inproceedings{hollenstein-etal-2020-zuco, title = {{{ZuCo}} 2.0: {{A}} dataset of physiological recordings during natural reading and annotation}, booktitle = {Proceedings of the {{Twelfth Language Resources}} and {{Evaluation Conference}}}, author = {Hollenstein, Nora and Troendle, Marius and Zhang, Ce and Langer, Nicolas}, year = 2020, pages = {138--146}, publisher = {European Language Resources Association}, isbn = {979-10-95546-34-4}, langid = {english} }"}];
const METADATA = {"Name": {"type": "text", "nullable": false, "null_count": 0}, "Data accessibility": {"type": "categorical", "nullable": false, "null_count": 0, "unique_values": ["Free", "Not available", "Restricted", "Unclear"]}, "pymovements": {"type": "boolean", "nullable": true, "null_count": 34, "unique_values": ["No", "Yes"]}, "Full name": {"type": "text", "nullable": true, "null_count": 49}, "Dataset characteristics": {"type": "url", "nullable": true, "null_count": 28}, "# Participants": {"type": "numeric", "nullable": false, "null_count": 0, "min": 5.0, "max": 1718.0}, "Age range": {"type": "text", "nullable": true, "null_count": 51}, "Age mean\u00b1SD": {"type": "list", "nullable": true, "null_count": 41}, "Native language": {"type": "list", "nullable": true, "null_count": 21, "unique_values": ["Arabic", "Azari", "Basque", "Brazilian Portuguese", "Cantonese", "Chinese", "Czech", "Danish", "Dutch", "English", "English and others", "Estonian", "Finnish", "French", "German", "German (Swiss)", "Greek", "Hebrew", "Hindi", "Icelandic", "Italian", "Japanese", "Korean", "Mandarin", "Mongolian", "Norwegian", "Persian", "Portugese", "Portuguese", "Russian", "Serbian", "Spanish", "Swedish", "Turkish", "Uighur"], "language_counts": {"English": 24, "Japanese": 4, "Mandarin": 9, "Arabic": 1, "Chinese": 6, "Spanish": 7, "Portuguese": 1, "Serbian": 4, "Dutch": 6, "French": 1, "Czech": 1, "German": 11, "English and others": 1, "Norwegian": 6, "Greek": 2, "Hebrew": 2, "Italian": 2, "Russian": 5, "Turkish": 4, "Korean": 2, "Finnish": 2, "Estonian": 2, "Basque": 2, "Brazilian Portuguese": 2, "Danish": 2, "German (Swiss)": 1, "Hindi": 2, "Icelandic": 2, "Mongolian": 1, "Uighur": 2, "Persian": 1, "Azari": 1, "Portugese": 1, "Swedish": 1, "Cantonese": 1}, "is_language_filter": true}, "Inclusion criteria": {"type": "list", "nullable": true, "null_count": 28}, "Exclusion criteria": {"type": "list", "nullable": true, "null_count": 70}, "Other characteristics": {"type": "list", "nullable": true, "null_count": 46}, "Reading instructions": {"type": "list", "nullable": true, "null_count": 61}, "# Items": {"type": "numeric", "nullable": true, "null_count": 7, "min": 1.0, "max": 7577.0}, "Items per subject": {"type": "list", "nullable": true, "null_count": 42}, "Mean\u00b1SD words per item": {"type": "list", "nullable": true, "null_count": 56}, "Mean\u00b1SD chars per item": {"type": "list", "nullable": true, "null_count": 60}, "Mean\u00b1SD chars per word": {"type": "list", "nullable": true, "null_count": 64}, "Mean\u00b1SD chars seen per participant": {"type": "list", "nullable": true, "null_count": 65}, "Mean\u00b1SD words seen per participant": {"type": "list", "nullable": true, "null_count": 62}, "Total # chars all items": {"type": "list", "nullable": true, "null_count": 65}, "Total # words all items": {"type": "list", "nullable": true, "null_count": 31}, "Stimulus description": {"type": "list", "nullable": true, "null_count": 4}, "Stimulus language": {"type": "categorical", "nullable": false, "null_count": 0, "unique_values": ["Basque", "Brazilian Portuguese", "Cantonese (trad. chars.)", "Chinese", "Czech", "Danish", "Dutch", "English", "Estonian", "Finnish", "French", "German", "Greek", "Hebrew", "Hindi", "Icelandic", "Italian", "Japanese", "Korean", "Mandarin", "Mandarin (simp. chars.)", "Mandarin (trad. chars.)", "Mongolian", "Norwegian", "Persian", "Portugese", "Russian", "Serbian", "Spanish", "Swedish", "Turkish", "Uighur", "Urdu"], "language_counts": {"Mandarin (simp. chars.)": 12, "English": 41, "Japanese": 2, "Mandarin": 1, "Mandarin (trad. chars.)": 2, "Serbian": 3, "Danish": 2, "Spanish": 4, "Dutch": 6, "Chinese": 2, "French": 1, "Czech": 1, "German": 12, "Norwegian": 3, "Greek": 1, "Hebrew": 1, "Italian": 1, "Russian": 3, "Turkish": 3, "Korean": 2, "Finnish": 1, "Estonian": 1, "Basque": 1, "Brazilian Portuguese": 1, "Hindi": 2, "Icelandic": 1, "Mongolian": 1, "Uighur": 2, "Persian": 1, "Urdu": 1, "Portugese": 1, "Swedish": 1, "Cantonese (trad. chars.)": 1}, "is_language_filter": true}, "Stimulus language family": {"type": "categorical", "nullable": false, "null_count": 0, "unique_values": ["Basque", "Finno-Ugric", "Germanic", "Hellenic", "Indo-Aryan", "Iranian", "Japonic", "Koreanic", "Mongolian", "Romance", "Semitic", "Sinitic", "Slavic", "Turkic"], "language_counts": {"Sinitic": 17, "Germanic": 66, "Japonic": 2, "Slavic": 7, "Romance": 8, "Hellenic": 1, "Semitic": 1, "Turkic": 5, "Koreanic": 2, "Finno-Ugric": 2, "Basque": 1, "Indo-Aryan": 3, "Mongolian": 1, "Iranian": 1}, "is_language_filter": true}, "Stimulus naturalness": {"type": "categorical", "nullable": true, "null_count": 4, "unique_values": ["Constructed", "Naturalistic", "Partially constructed"]}, "Stimulus length category": {"type": "categorical", "nullable": true, "null_count": 5, "unique_values": ["Both single sentences and text passages", "Single sentences", "Text passages"]}, "Stimulus source": {"type": "url", "nullable": true, "null_count": 48}, "Comprehension questions": {"type": "list", "nullable": true, "null_count": 42}, "Text annotation": {"type": "list", "nullable": true, "null_count": 63}, "Stimulus license": {"type": "text", "nullable": true, "null_count": 78}, "Raw data": {"type": "categorical", "nullable": false, "null_count": 0, "unique_values": ["no", "unclear", "yes"]}, "Gaze events": {"type": "categorical", "nullable": false, "null_count": 0, "unique_values": ["no", "unclear", "yes"]}, "Reading measures": {"type": "categorical", "nullable": false, "null_count": 0, "unique_values": ["no", "unclear", "yes"]}, "Scripts": {"type": "categorical", "nullable": true, "null_count": 6, "unique_values": ["no", "unclear", "yes"]}, "Data license": {"type": "text", "nullable": true, "null_count": 62}, "Other available data": {"type": "text", "nullable": true, "null_count": 78}, "Notes on availability": {"type": "url", "nullable": true, "null_count": 69}, "Eye-tracker": {"type": "categorical", "nullable": true, "null_count": 7, "unique_values": ["Dr Bouis Oculometer Eyetracker", "ET-enabled virtual-reality", "EyeLink", "EyeLink 1000", "EyeLink 1000 Plus", "EyeLink I", "EyeLink II", "EyeLink Portable Duo", "GP3 HD Eye Tracker", "Gazepoint GP3", "IView-X", "Ober-2TM", "SMI RED 250 remote", "SMI RED-m", "SMI iView X HED", "Tobii", "Tobii EyeTracking 4C", "Tobii Pro Spectrum", "Tobii TX 300", "Tobii TX300 screen-based", "Tobii X2-30", "Webcam"]}, "Mount": {"type": "list", "nullable": true, "null_count": 46}, "Sampling frequency (Hz)": {"type": "numeric", "nullable": true, "null_count": 11, "min": 60.0, "max": 2000.0}, "Eye-to-screen distance (cm)": {"type": "numeric", "nullable": true, "null_count": 49, "min": 43.0, "max": 143.0}, "Eye-to-camera distance (cm)": {"type": "numeric", "nullable": true, "null_count": 73, "min": 55.0, "max": 120.0}, "Monitor": {"type": "url", "nullable": true, "null_count": 51}, "Resolution": {"type": "list", "nullable": true, "null_count": 58}, "Text presentation": {"type": "list", "nullable": true, "null_count": 73}, "Font": {"type": "list", "nullable": true, "null_count": 59}, "Font size": {"type": "numeric", "nullable": true, "null_count": 63, "min": 12.0, "max": 28.0}, "Monospaced": {"type": "text", "nullable": true, "null_count": 68}, "Character per visual angle": {"type": "numeric", "nullable": true, "null_count": 59, "min": 0.75, "max": 3.33}, "Font color": {"type": "categorical", "nullable": true, "null_count": 65, "unique_values": ["Black", "White"]}, "Spacing": {"type": "text", "nullable": true, "null_count": 78}, "Background color": {"type": "list", "nullable": true, "null_count": 65}};
const COLUMN_TYPES = {"Name": "text", "Data accessibility": "categorical", "pymovements": "boolean", "Full name": "text", "Dataset characteristics": "url", "# Participants": "numeric", "Age range": "text", "Age mean\u00b1SD": "list", "Native language": "list", "Inclusion criteria": "list", "Exclusion criteria": "list", "Other characteristics": "list", "Reading instructions": "list", "# Items": "numeric", "Items per subject": "list", "Mean\u00b1SD words per item": "list", "Mean\u00b1SD chars per item": "list", "Mean\u00b1SD chars per word": "list", "Mean\u00b1SD chars seen per participant": "list", "Mean\u00b1SD words seen per participant": "list", "Total # chars all items": "list", "Total # words all items": "list", "Stimulus description": "list", "Stimulus language": "categorical", "Stimulus language family": "categorical", "Stimulus naturalness": "categorical", "Stimulus length category": "categorical", "Stimulus source": "url", "Comprehension questions": "list", "Text annotation": "list", "Stimulus license": "text", "Raw data": "categorical", "Gaze events": "categorical", "Reading measures": "categorical", "Scripts": "categorical", "Data license": "text", "Other available data": "text", "Notes on availability": "url", "Eye-tracker": "categorical", "Mount": "list", "Sampling frequency (Hz)": "numeric", "Eye-to-screen distance (cm)": "numeric", "Eye-to-camera distance (cm)": "numeric", "Monitor": "url", "Resolution": "list", "Text presentation": "list", "Font": "list", "Font size": "numeric", "Monospaced": "text", "Character per visual angle": "numeric", "Font color": "categorical", "Spacing": "text", "Background color": "list"};
const COLUMNS = ["Name", "Data accessibility", "pymovements", "Full name", "Dataset characteristics", "# Participants", "Age range", "Age mean\u00b1SD", "Native language", "Inclusion criteria", "Exclusion criteria", "Other characteristics", "Reading instructions", "# Items", "Items per subject", "Mean\u00b1SD words per item", "Mean\u00b1SD chars per item", "Mean\u00b1SD chars per word", "Mean\u00b1SD chars seen per participant", "Mean\u00b1SD words seen per participant", "Total # chars all items", "Total # words all items", "Stimulus description", "Stimulus language", "Stimulus language family", "Stimulus naturalness", "Stimulus length category", "Stimulus source", "Comprehension questions", "Text annotation", "Stimulus license", "Raw data", "Gaze events", "Reading measures", "Scripts", "Data license", "Other available data", "Notes on availability", "Eye-tracker", "Mount", "Sampling frequency (Hz)", "Eye-to-screen distance (cm)", "Eye-to-camera distance (cm)", "Monitor", "Resolution", "Text presentation", "Font", "Font size", "Monospaced", "Character per visual angle", "Font color", "Spacing", "Background color"];
const COLUMN_DESCRIPTIONS = {"Name": "The name of the dataset", "Data accessibility": "If and how the eye-tracking data is accessible. Restriced means that the data can be accessed but only by, e.g., writing an e-mail to the authors, signing up, or filling in a form.", "pymovements": "If the dataset is integrated into pymovements.", "Full name": "The full name of the dataset if the name is an abbreviation.", "Dataset characteristics": "Any characteristics of the dataset that could make it particularly interesting or special.", "# Participants": "Number of participants.", "Age range": "The age range min-max of all participants.", "Age mean\u00b1SD": "The mean and standard deviation of the age across all participants.", "Native language": "The native languages of the participants if known.", "Inclusion criteria": "Criteria applied to include participants, if known.", "Exclusion criteria": "Criteria applied to exclude participants, if known.", "Other characteristics": "Other notable characteristics of the participants.", "Reading instructions": "The instructions participants received before reading the text.", "# Items": "Number of items overall.", "Items per subject": "Number of items each participant read.", "Mean\u00b1SD words per item": "Mean and standard deviation of words per item.", "Mean\u00b1SD chars per item": "The mean and standard deviation of characters per item.", "Mean\u00b1SD chars per word": "The mean number of characters per word including the standard deviation if applicable", "Mean\u00b1SD chars seen per participant": "The mean and standard deviation of characters seen by each participant.", "Mean\u00b1SD words seen per participant": "The mean and standard deviation of words seen by each participant.", "Total # chars all items": "The total number of characters across all items.", "Total # words all items": "The total number of words across all items.", "Stimulus description": "A short description of the stimuli.", "Stimulus language": "The language of the stimulus.", "Stimulus language family": "The language family of the stimulus texts. The language family indicated for one language is chosen one or two levels up when illustrating the families as a tree such that multiple languages are summarized with that term but the category is not too broad. For example, Indo-Eurpoean languages are categorized as Romance, Germanic, etc., to avoid the very broad category of Indo-Eurpoean. Purely geographical categories (e.g., Western, Eastern) were skipped and the higher-ranking category was chosen.", "Stimulus naturalness": "Degree of naturalness of the stimuli. Naturalistic stimuli are existing texts that are only adapted when necessary, for example to make them self-contained. Partially constructed stimuli are created by combining elements, such as by selecting target words or by controlling certain characteristics across all stimuli but choosing the stimuli from existing sources. Constructed stimuli are made up specifically for the experiment, while still aiming to be naturalistic. Minimal pair stimuli are constructed as pairs with only minimal differences between each pair in order to study specific phenomena.", "Stimulus length category": "Broad categories for the length of the texts, e.g., sentences or passages.", "Stimulus source": "The link of the stimulus source. ", "Comprehension questions": "If there were comprehension qeustions.", "Text annotation": "If the stimulus texs are annotated or not and which annotation.", "Stimulus license": "How the stimulus texts are licensed. ", "Raw data": "If raw data is made available for this dataset.", "Gaze events": "If fixation data is made available for this dataset.", "Reading measures": "If reading measures are made available for this dataset.", "Scripts": "If scripts are made available for this dataset.", "Data license": "The license the data is shared under.", "Other available data": "If any other data is available, e.g. psychometric tests.", "Notes on availability": "If there is a special procedure, login, form etc. required to access your data.", "Eye-tracker": "The brand and type of eye-tracker used.", "Mount": "Eye-tracker mount and type of head support used for the participants.", "Sampling frequency (Hz)": "\"\"", "Eye-to-screen distance (cm)": "\"\"", "Eye-to-camera distance (cm)": "\"\"", "Monitor": "The monitor used to present the stimuli.", "Resolution": "Resolution of the screen in pixels", "Text presentation": "The format and the way the text is presented on the screen.", "Font": "The font name.", "Font size": "The size of the font as reported.", "Monospaced": "Whether the font was monospaced or not.", "Character per visual angle": "How many characters per visual angle were read.", "Font color": "The color of the font the stimuli were presented in.", "Spacing": "The line spacing if there are multiple lines on each page.", "Background color": "The color of the background behind the stimuli."};
const BIBTEX = "@misc{datasetreview2026,\n author = {Jakobi, Deborah N. and Reich, David R. and Prasse, Paul and Hofmann, Jana and Bolliger, Lena S. and J{\"a}ger, Lena A.},\n title = {{Eye-Tracking-while-Reading: A Living Survey of Datasets with Open Library Support}},\n howpublished = {online},\n year = {2026},\n note = {under review}\n}\n";
// Category mapping
const COLUMN_CATEGORIES = {
// General Information
'Name': 'general',
'Data accessibility': 'general',
'Link (data)': 'general',
'Link (paper)': 'general',
'pymovements': 'general',
'Full name': 'general',
'Characteristics': 'general',
'Dataset characteristics': 'general',
'Abstract': 'general',
// Participants
'# Participants': 'participants',
'Age range': 'participants',
'Age meanΒ±SD': 'participants',
'Native language': 'participants',
'Inclusion criteria': 'participants',
'Exclusion criteria': 'participants',
'Other characteristics': 'participants',
'Reading instructions': 'participants',
// Stimuli
'# Items': 'stimuli',
'Items per subject': 'stimuli',
'MeanΒ±SD words per item': 'stimuli',
'MeanΒ±SD chars per item': 'stimuli',
'MeanΒ±SD chars per word': 'stimuli',
'MeanΒ±SD chars seen per participant': 'stimuli',
'MeanΒ±SD words seen per participant': 'stimuli',
'Total # chars all items': 'stimuli',
'Total # words all items': 'stimuli',
'Stimulus description': 'stimuli',
'Stimulus language': 'stimuli',
'Stimulus language family': 'stimuli',
'Stimulus type': 'stimuli',
'Stimulus naturalness': 'stimuli',
'Stimulus length category': 'stimuli',
'Stimulus source': 'stimuli',
'Comprehension questions': 'stimuli',
'Text annotation': 'stimuli',
'Stimulus license': 'stimuli',
// Available data
'Raw data': 'available-data',
'Fixations': 'available-data',
'Gaze events': 'available-data',
'Reading measures': 'available-data',
'Scripts': 'available-data',
'Other available data': 'available-data',
'Data license': 'available-data',
'Notes on availability': 'available-data',
// Lab setup
'Eye-tracker': 'lab-setup',
'Mount': 'lab-setup',
'Sampling frequency (Hz)': 'lab-setup',
'Eye-to-screen distance (cm)': 'lab-setup',
'Eye-to-camera distance (cm)': 'lab-setup',
'Monitor': 'lab-setup',
// Stimulus layout
'Font': 'stimulus-layout',
'Font size': 'stimulus-layout',
'Monospaced': 'stimulus-layout',
'Character per visual angle': 'stimulus-layout',
'Font color': 'stimulus-layout',
'Background color': 'stimulus-layout',
'Spacing': 'stimulus-layout',
'Resolution': 'stimulus-layout',
'Text presentation': 'stimulus-layout'
};
let filteredData = [...DATA];
let currentFilters = {};
let sortState = {}; // Track sort direction per column
// Fullscreen toggle - prefer the table container if present
function toggleFullscreen() {
const target = document.getElementById('tableContainer') || document.documentElement;
if (!document.fullscreenElement) {
if (target.requestFullscreen) {
target.requestFullscreen();
}
} else {
if (document.exitFullscreen) {
document.exitFullscreen();
}
}
}
// Reflect fullscreen state on the button text
document.addEventListener('fullscreenchange', () => {
const btn = document.getElementById('fullscreenToggle');
const active = !!document.fullscreenElement;
document.body.classList.toggle('fullscreen-active', active);
if (btn) {
btn.textContent = active ? 'Exit Fullscreen' : 'Fullscreen';
}
});
// Toggle freeze first column
function toggleFreezeColumn() {
const tableContainer = document.getElementById('tableContainer');
const btn = document.getElementById('freezeColumnToggle');
const isFrozen = tableContainer.classList.toggle('frozen-column');
if (btn) {
btn.textContent = isFrozen ? 'π Unfreeze Name Column' : 'π Freeze Name Column';
}
}
// Initialize the table
function initializeTable() {
renderHeader();
renderTable(DATA);
createFilters();
// Start with filters collapsed
const controls = document.querySelector('.controls');
if (controls) {
controls.classList.add('collapsed');
const toggleIcon = document.querySelector('.toggle-icon');
if (toggleIcon) {
toggleIcon.textContent = 'βΆ';
}
}
}
// Render table header with category row
function renderHeader() {
const thead = document.querySelector('#dataTable thead');
thead.innerHTML = '';
// Create category header row
const categoryRow = document.createElement('tr');
categoryRow.className = 'category-header-row';
// Group columns by category and count
const categoryGroups = [];
let currentCategory = null;
let currentCount = 0;
COLUMNS.forEach(col => {
const category = COLUMN_CATEGORIES[col] || 'other';
if (category !== currentCategory) {
if (currentCategory !== null) {
categoryGroups.push({ category: currentCategory, count: currentCount });
}
currentCategory = category;
currentCount = 1;
} else {
currentCount++;
}
});
// Add last group
if (currentCategory !== null) {
categoryGroups.push({ category: currentCategory, count: currentCount });
}
// Create category header cells with colspan
categoryGroups.forEach(group => {
const th = document.createElement('th');
th.setAttribute('colspan', group.count);
// Add category-specific class for coloring
th.classList.add('header-' + group.category);
// Format category name: convert 'available-data' to 'Available Data'
const categoryName = group.category
.split('-')
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ');
th.textContent = categoryName;
categoryRow.appendChild(th);
});
thead.appendChild(categoryRow);
// Create column header row
const headerRow = document.createElement('tr');
headerRow.id = 'headerRow';
COLUMNS.forEach(col => {
const th = document.createElement('th');
const type = COLUMN_TYPES[col] || 'text';
const category = COLUMN_CATEGORIES[col] || '';
// Add category class for coloring
if (category) {
th.classList.add('category-' + category);
}
// Add description tooltip if available
if (COLUMN_DESCRIPTIONS[col]) {
th.setAttribute('title', COLUMN_DESCRIPTIONS[col]);
}
let colText = col;
// Add link for pymovements column
if (col === 'pymovements') {
colText = `pymovements <a href="https://github.com/pymovements/pymovements" target="_blank" rel="noopener" style="display: inline; margin-left: 4px; color: #3498db; text-decoration: none; font-size: 1.1em;" title="Visit pymovements GitHub">π</a>`;
}
th.innerHTML = `${colText} <span class="sort-indicator">β
</span>`;
th.style.cursor = 'pointer';
th.onclick = () => sortTable(col);
headerRow.appendChild(th);
});
thead.appendChild(headerRow);
}
// Render table body
function renderTable(dataToRender) {
const tbody = document.getElementById('tableBody');
tbody.innerHTML = '';
if (dataToRender.length === 0) {
tbody.innerHTML = '<tr><td colspan="' + COLUMNS.length + '" class="empty-message">No datasets match your filters</td></tr>';
updateInfo(0);
return;
}
dataToRender.forEach(row => {
const tr = document.createElement('tr');
COLUMNS.forEach(col => {
const td = document.createElement('td');
// Add category class for coloring
const category = COLUMN_CATEGORIES[col] || '';
if (category) {
td.classList.add('category-' + category);
}
// Special handling for Name column with links
if (col === 'Name') {
const nameValue = row[col];
const links = row._links || [];
const bibtex = row._bibtex;
const td_element = td;
let cellHTML = '<strong>' + escapeHtml(nameValue) + '</strong>';
if (links.length > 0 || bibtex) {
cellHTML += '<div style="margin-top: 6px; font-size: 0.85em; display: flex; gap: 10px; align-items: center; flex-wrap: wrap;">';
links.forEach(link => {
const [type, url] = link;
const icon = type === 'data' ? 'π₯' : 'π';
const label = type === 'data' ? 'data' : 'paper';
cellHTML += `<a href="${url}" target="_blank" rel="noopener" title="${url}" style="display: inline-flex; align-items: center; gap: 6px; color: #3498db; font-weight: 600;">${icon} ${label}</a>`;
});
if (bibtex) {
cellHTML += `<a href="#" class="bibtex-link" data-bibtex="${escapeHtml(bibtex).replace(/"/g, '"')}" title="View BibTeX citation" style="display: inline-flex; align-items: center; gap: 6px; color: #3498db; font-weight: 600;">π bibtex</a>`;
}
cellHTML += '</div>';
}