1717from collections .abc import Iterator
1818from time import strftime
1919from typing import NamedTuple
20- from urllib .parse import urlparse , urlunparse
21- from urllib .parse import ParseResult as Url
2220
23- HTACCESS_CUSTOM = 'FPY.LI.custom .htaccess'
21+ HTACCESS_MAIN = 'FPY.LI.htaccess'
2422HTACCESS_SHORT = 'FPY.LI.short.htaccess'
25- HTACCESS_FILES = (HTACCESS_CUSTOM , HTACCESS_SHORT )
23+ HTACCESS_FILES = (HTACCESS_MAIN , HTACCESS_SHORT )
2624BASE_DOMAIN = 'fpy.li'
2725
28- type ShortCode = str
26+ type ShortCode = bytes
27+ type Url = str
2928type RedirMap = dict [ShortCode , Url ]
3029type TargetMap = dict [Url , ShortCode ]
3130
@@ -40,32 +39,33 @@ def load_redirects() -> tuple[RedirMap, TargetMap]:
4039 with open (filename ) as fp :
4140 for line in fp :
4241 if line .startswith ('RedirectTemp' ):
43- _ , short , field2 , * _ = line .split ()
44- short = short [1 :] # Remove leading slash
45- long = urlparse (field2 )
42+ _ , field1 , long , * _ = line .split ()
43+ short = field1 .encode ('ascii' )[1 :] # Remove leading slash
4644 assert short not in redirects , f'{ filename } : duplicate redirect from { short } '
4745 # htaccess.custom is live since 2022, I can't change it to remove duplicate targets
48- if filename != HTACCESS_CUSTOM :
49- assert long not in targets , f'{ filename } : duplicate redirect to { long } '
46+ #if filename != HTACCESS_MAIN:
47+ #assert long not in targets, f'{filename}: duplicate redirect to {long}'
48+ if long in targets :
49+ print (f'{ filename } : duplicate redirect to { long } ' )
5050 redirects [short ] = long
5151 targets [long ] = short
5252
5353 return redirects , targets
5454
5555
56- SDIGITS = '23456789abcdefghjkmnpqrstvwxyz'
56+ SDIGITS = b '23456789abcdefghjkmnpqrstvwxyz'
5757
5858
59- def gen_short (start_len = 1 ) -> Iterator [str ]:
59+ def gen_short (start_len = 1 ) -> Iterator [ShortCode ]:
6060 """Generate every possible sequence of SDIGITS, starting with start_len"""
6161 length = start_len
6262 while True :
6363 for short in itertools .product (SDIGITS , repeat = length ):
64- yield '' . join (short )
64+ yield bytes (short )
6565 length += 1
6666
6767
68- def gen_unused_short (redirects : dict ) -> Iterator [str ]:
68+ def gen_unused_short (redirects : dict ) -> Iterator [ShortCode ]:
6969 """Generate next available short URL of len >= 2."""
7070 for short in gen_short (2 ):
7171 if short not in redirects :
@@ -80,29 +80,30 @@ def shorten(urls: list[str]) -> list[ShortPair]:
8080 timestamp = strftime ('%Y-%m-%d %H:%M:%S' )
8181 with open (HTACCESS_SHORT , 'a' ) as fp :
8282 for long in urls :
83- url = urlparse (long )
8483 assert BASE_DOMAIN not in long , f'{ long } is a { BASE_DOMAIN } URL'
85- if url in targets :
86- short = targets [url ]
84+ if long in targets :
85+ short = targets [long ]
8786 else :
8887 short = next (iter_short )
89- redirects [short ] = url
90- targets [url ] = short
88+ redirects [short ] = long
89+ targets [long ] = short
9190 if timestamp :
9291 fp .write (f'\n # appended: { timestamp } \n ' )
9392 timestamp = None
94- fp .write (f'RedirectTemp /{ short } { urlunparse ( url ) } \n ' )
95- pairs .append ((short , url ))
93+ fp .write (f'RedirectTemp /{ short . decode ( 'ascii' ) } { long } \n ' )
94+ pairs .append ((short , long ))
9695
9796 return pairs
9897
9998
10099def main () -> None :
101100 """read URLS from filename arguments or stdin"""
102101 urls = [line .strip () for line in fileinput .input (encoding = 'utf-8' )]
103- for short , long in shorten (urls ):
104- print (f'{ BASE_DOMAIN } /{ short } \t { urlunparse (long )} ' )
102+ for pair in shorten (urls ):
103+ short = pair .code .decode ('ascii' )
104+ print (f'{ BASE_DOMAIN } /{ short } \t { pair .url } ' )
105105
106106
107107if __name__ == '__main__' :
108- main ()
108+ #main()
109+ load_redirects ()
0 commit comments