|
| 1 | +--- |
| 2 | +layout: post |
| 3 | +title: Exploring Transformer Architecture (Work in Progress) |
| 4 | +published: false |
| 5 | +--- |
| 6 | + |
| 7 | + |
| 8 | +{% highlight OCaml %} |
| 9 | +import string |
| 10 | +import tensorflow as tf |
| 11 | + |
| 12 | +input = tf.io.read_file("/Users/anu/PycharmProjects/TensorFlow2/shakespeare.txt") |
| 13 | +input = tf.strings.strip(input) |
| 14 | +input = tf.strings.regex_replace(input,' +', '') |
| 15 | +input = tf.strings.regex_replace(input,'\n', '') |
| 16 | +length = int(tf.strings.length(input)) |
| 17 | + |
| 18 | +table = tf.lookup.StaticHashTable( |
| 19 | + initializer=tf.lookup.KeyValueTensorInitializer( |
| 20 | + keys=tf.constant([c for c in string.printable]), |
| 21 | + values=tf.constant([ind for ind,x in enumerate([c for c in string.printable])]), |
| 22 | + ), |
| 23 | + default_value=tf.constant(-1), |
| 24 | + name="printable" |
| 25 | +) |
| 26 | + |
| 27 | +def random_sample(text): |
| 28 | + rand = tf.random.uniform(shape=[], minval=1, maxval=length - 200) |
| 29 | + start = int(rand) |
| 30 | + print(f'Start={int(rand)} Length={length} End={start + 200 + 1}') |
| 31 | + return tf.strings.substr(text,start, 200, unit='BYTE') |
| 32 | + |
| 33 | + |
| 34 | +def draw_random_sample(text): |
| 35 | + return random_sample(input) |
| 36 | + |
| 37 | +list = [] |
| 38 | + |
| 39 | +def map_fn(bytes): |
| 40 | + list.append(table.lookup(bytes)) |
| 41 | + return bytes |
| 42 | + |
| 43 | +sample = draw_random_sample(input) |
| 44 | + |
| 45 | +split_sample = tf.strings.bytes_split(sample) |
| 46 | +tf.map_fn(map_fn, tf.strings.bytes_split(split_sample)) |
| 47 | +print(tf.stack(list)) |
| 48 | + |
| 49 | + |
| 50 | +{% endhighlight %} |
0 commit comments