forked from sidneycadot/oeis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcharmap.py
executable file
·58 lines (48 loc) · 4.74 KB
/
charmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#! /usr/bin/env python3
# The 95 printable ASCII characters.
# This includes the space character (0x20), but excludes control characters (0x00--0x1f) and the DEL character (0x7f).
ASCII = frozenset(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")
# These are the characters that actually occur:
occuring_characters = {
'N' : ASCII | frozenset("\xa0\xad°´·ºÁÃ×àáäåèéíîóöøúüĀńőŜσωआटभयर्ṭ’•…∈≤≥⌈⌉fffifl"),
'C' : ASCII | frozenset("¢£§«°±²´·º»½ÁÇ×ÜßàáäåçèéëíîïñòóôõöøùúüýāăćčęěħıłńőřśşšťžΧβγμπρστωϱавдеилмнопрстучшыьяաבוכלᵣᵤḠ\u200b—‘’“”…′ℕ↑⇒∈∏∑∞∩∫≅≈≠≤≥⊂⊆⊗⌈⌉\u3000八發\uf020fifl\ufeff𝒩𝓁"),
'D' : ASCII | frozenset("\x7f§«°±´¸»ÁÇÉÖ×ÚÜßàáäåçèéêëíîïñóôõöøùúüýăąćČčěłńőŒřŚŞşŠšũūżžǎ́Λλμπϕ\u2002\u2009\u200e‐—’“”…∞∪≡fffi"),
'H' : ASCII | frozenset("\x81£§©«®°±´µ·»ÁÂÃÅÆÉÕÖ×ÚÜßàáâäåçèéêëíîïñòóôõöøúûüýĀāăćČčěğĭıłńņňőœřśşŠšţūŽžΓΔΛΣΨαβγδζθπστφωϕНРСагдезийклнопрстхчыяאבגדוכלקרשתṭ\u200e—’“”…∏∑√∣≡⌊⌋fffifl"),
'F' : ASCII | frozenset("°²´·ºÁÇ×ÜàáäçèéêíñóôöøúüćńőřşžΓβλ‐‘’”…∞≍≤≥⌈⌉\u3000\ufeff;"),
'e' : ASCII | frozenset("¢¨¯°´·×ßáäçèéíôöüīńβλρω\u200b—‘’“”•…∆⊗│:"),
'p' : ASCII | frozenset("Äéóöø‘’"),
't' : ASCII | frozenset("\x8a®°¹¼×áçèéíñóöúüŠπ…\u2028√≠≤≥\u3000\uf08a\uf0a3\uf0ae\uf0b3\uf0b9"),
'o' : ASCII | frozenset("\x8d£«¯´·»Áßáäçèéêíîïðòö÷üπ“”…€←∪≠⊤⌊⌿⍳⍴⍸○"),
'Y' : ASCII | frozenset("ßáéñöøńőΧ’…⊂\u3000"),
'A' : ASCII | frozenset("ÁÅÆÇÉØÜßàáâäçèéëíñóôöøúüČńņőşš"),
'E' : ASCII | frozenset("´ÁÉßàáãäçèéíñóôöøüýčěłńőš’"),
}
# These are the characters that are deemed acceptable:
#
# Compared to the actually occurring characters, we have removed the following characters from the acceptable characters:
#
# - the ligature characters: 'ff', 'fi', 'fl'
# - fullwidth colon character ':' (0xff1a)
# - fullwidth colon character ';' (0xff1b)
acceptable_characters = {
'N' : ASCII | frozenset("\xa0\xad°´·ºÁÃ×àáäåèéíîóöøúüĀńőŜσωआटभयर्ṭ’•…∈≤≥⌈⌉"),
'C' : ASCII | frozenset("¢£§«°±²´·º»½ÁÇ×ÜßàáäåçèéëíîïñòóôõöøùúüýāăćčęěħıłńőřśşšťžΧβγμπρστωϱавдеилмнопрстучшыьяաבוכלᵣᵤḠ\u200b—‘’“”…′ℕ↑⇒∈∏∑∞∩∫≅≈≠≤≥⊂⊆⊗⌈⌉\u3000八發\uf020fifl\ufeff𝒩𝓁"),
'D' : ASCII | frozenset("\x7f§«°±´¸»ÁÇÉÖ×ÚÜßàáäåçèéêëíîïñóôõöøùúüýăąćČčěłńőŒřŚŞşŠšũūżžǎ́Λλμπϕ\u2002\u2009\u200e‐—’“”…∞∪≡"),
'H' : ASCII | frozenset("\x81£§©«®°±´µ·»ÁÂÃÅÆÉÕÖ×ÚÜßàáâäåçèéêëíîïñòóôõöøúûüýĀāăćČčěğĭıłńņňőœřśşŠšţūŽžΓΔΛΣΨαβγδζθπστφωϕНРСагдезийклнопрстхчыяאבגדוכלקרשתṭ\u200e—’“”…∏∑√∣≡⌊⌋"),
'F' : ASCII | frozenset("°²´·ºÁÇ×ÜàáäçèéêíñóôöøúüćńőřşžΓβλ‐‘’”…∞≍≤≥⌈⌉\u3000\ufeff"),
'e' : ASCII | frozenset("¢¨¯°´·×ßáäçèéíôöüīńβλρω\u200b—‘’“”•…∆⊗│"),
'p' : ASCII | frozenset("Äéóöø‘’"),
't' : ASCII | frozenset("\x8a®°¹¼×áçèéíñóöúüŠπ…\u2028√≠≤≥\u3000\uf08a\uf0a3\uf0ae\uf0b3\uf0b9"),
'o' : ASCII | frozenset("\x8d£«¯´·»Áßáäçèéêíîïðòö÷üπ“”…€←∪≠⊤⌊⌿⍳⍴⍸○"),
'Y' : ASCII | frozenset("ßáéñöøńőΧ’…⊂\u3000"),
'A' : ASCII | frozenset("ÁÅÆÇÉØÜßàáâäçèéëíñóôöøúüČńņőşš"),
'E' : ASCII | frozenset("´ÁÉßàáãäçèéíñóôöøüýčěłńőš’"),
}
def main():
for key in sorted(acceptable_characters):
assert occuring_characters[key].issuperset(acceptable_characters[key])
unwanted_characters = occuring_characters[key] - acceptable_characters[key]
if len(unwanted_characters) > 0:
print("key {} has unwanted characters: {}".format(key, ", ".join("{!r}".format(c) for c in sorted(unwanted_characters))))
if __name__ == "__main__":
main()