|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import sys |
| 4 | +import os |
| 5 | +import re |
| 6 | + |
| 7 | +# read first argument |
| 8 | +if len(sys.argv) < 2: |
| 9 | + print("Usage: stageoverlap.py <dil>") |
| 10 | + sys.exit(1) |
| 11 | + |
| 12 | +dilfile = sys.argv[1] |
| 13 | + |
| 14 | +seen_stages = set() |
| 15 | +computed_predicates = {} |
| 16 | +stage_number = 0 |
| 17 | + |
| 18 | +def process_stage(stage, cached): |
| 19 | + global stage_number |
| 20 | + stage_key = ' '.join(cached) |
| 21 | + # skip repeated stages (in case we're looking at DIL for several queries, e.g. from a .qls) |
| 22 | + if stage_key in seen_stages: |
| 23 | + return |
| 24 | + # don't count the query-stage as seen, since we don't want to skip those |
| 25 | + if not '#select' in cached: |
| 26 | + seen_stages.add(stage_key) |
| 27 | + stage_number += 1 |
| 28 | + print('STAGE ' + str(stage_number) + ':') |
| 29 | + print(str(len(cached)) + ' cached predicate(s)') |
| 30 | + print(' '.join(cached)) |
| 31 | + for predicate in stage: |
| 32 | + # strip trailing characters matching the regex '#[bf]+', i.e. disregard magic |
| 33 | + predicate = re.sub('#[bf]+$', '', predicate) |
| 34 | + # TODO: maybe also strip the hash? |
| 35 | + # predicate = re.sub('#[a-f0-9]+$', '', predicate) |
| 36 | + if predicate in computed_predicates.keys(): |
| 37 | + # skip db-relations and some generated predicates |
| 38 | + if predicate.startswith('@') or predicate.startswith('project#'): |
| 39 | + continue |
| 40 | + prior_stage = computed_predicates[predicate] |
| 41 | + print('Recompute from ' + str(prior_stage) + ': ' + predicate) |
| 42 | + else: |
| 43 | + computed_predicates[predicate] = stage_number |
| 44 | + print() |
| 45 | + |
| 46 | +with open(dilfile, 'r') as f: |
| 47 | + stage = [] |
| 48 | + cached = [] |
| 49 | + query = False |
| 50 | + for line in f: |
| 51 | + # skip lines starting with a space, i.e. predicate bodies |
| 52 | + if line.startswith(' '): continue |
| 53 | + # get the part of the line containing no spaces occuring before the first '(' |
| 54 | + # this is the predicate name |
| 55 | + parenpos = line.find('(') |
| 56 | + if parenpos != -1: |
| 57 | + start = line.rfind(' ', 0, parenpos) |
| 58 | + predicate = line[start+1:parenpos] |
| 59 | + if predicate.startswith('`'): |
| 60 | + # remove the leading and trailing backticks |
| 61 | + predicate = predicate[1:-1] |
| 62 | + stage.append(predicate) |
| 63 | + continue |
| 64 | + # query predicates, aka cached predicates, are written either as |
| 65 | + # 'query <predicatename> = ...' on one line, or split across 2+ lines |
| 66 | + if line.startswith('query '): |
| 67 | + predicate = line.split(' ')[1] |
| 68 | + cached.append(predicate) |
| 69 | + continue |
| 70 | + if line == 'query\n': |
| 71 | + query = True |
| 72 | + continue |
| 73 | + if query: |
| 74 | + predicate = line.split(' ')[0] |
| 75 | + cached.append(predicate) |
| 76 | + query = False |
| 77 | + continue |
| 78 | + if line == '/* ---------- END STAGE ---------- */\n': |
| 79 | + process_stage(stage, cached) |
| 80 | + stage = [] |
| 81 | + cached = [] |
0 commit comments