|
| 1 | +import re |
| 2 | + |
| 3 | +all_notes = "| C,, D,, E,, F,, G,, A,, B,, C, D, E, F, G, A, B, C D E F G A B c d e f g a b c' d' e' f' g' a' b' c'' d'' e'' f'' g'' a'' b''".split() |
| 4 | +note2number = dict([(n, i) for (i, n) in enumerate(all_notes)]) |
| 5 | +note_pattern = re.compile(r"([ABCDEFG],*|[abcdefg]'*)") |
| 6 | +repl_by_spaces = lambda m: ' ' * len(m.group(0)) |
| 7 | + |
| 8 | +remove_non_notes = re.compile(r'(?xm)' + '|'.join([ |
| 9 | + r'\%\%beginps(.|\s)+?\%\%endps', # remove embedded postscript |
| 10 | + r'\%\%begintext(.|\s)+?\%\%endtext', # remove text |
| 11 | + r'\[\w:.*?\]', # remove embedded fields |
| 12 | + r'(?m)^\w:.*?$', # remove normal fields |
| 13 | + r'(?m)%.*$', # remove comments |
| 14 | + r'\[\w:.*?\]', # remove embedded fields |
| 15 | + r'\\"', # remove escaped " characters |
| 16 | + r'".*?"', # remove strings |
| 17 | + r'\\"', # remove escaped quote characters |
| 18 | + r'\{.*?\}', # remove grace notes |
| 19 | + r'!.+?!', # remove ornaments like eg. !pralltriller! |
| 20 | + r'\+.+?\+', # remove ornaments like eg. +pralltriller+ |
| 21 | + r'\{.*?\}', # remove grace notes |
| 22 | + ])) |
| 23 | + |
| 24 | +def remove_non_note_fragments(abc): |
| 25 | + # replace non-note fragments of the text by replacing them by spaces (thereby preserving offsets), but keep also bar and repeat symbols |
| 26 | + return remove_non_notes.sub(repl_by_spaces, abc.replace('\r', '\n')) |
| 27 | + |
| 28 | +def get_intervals_from_abc(abc): |
| 29 | + abc = remove_non_note_fragments(abc) |
| 30 | + notes = [note2number[m] for m in note_pattern.findall(abc)] |
| 31 | + intervals = [i2-i1 for (i1,i2) in zip(notes[:-1], notes[1:])] |
| 32 | + return ''.join([chr(74+i) for i in intervals]) |
| 33 | + |
| 34 | +def get_matches(abc, abc_search_intervals): |
| 35 | + abc = remove_non_note_fragments(abc) |
| 36 | + matches = list(note_pattern.finditer(abc)) # the matched notes in the abc |
| 37 | + notes = [note2number[m.group(0)] for m in matches] # the note number for each note |
| 38 | + offsets = [m.span(0) for m in matches] # the start/end offset in the ABC code for each note |
| 39 | + intervals = [i2-i1 for (i1,i2) in zip(notes[:-1], notes[1:])] # the intervals between pairs of notes |
| 40 | + intervals = ''.join([chr(74+i) for i in intervals]) # the intervals coded as strings |
| 41 | + # find the search string among all the intervals |
| 42 | + for m in re.finditer(re.escape(abc_search_intervals), intervals): |
| 43 | + i = m.start(0) # the offset in the search string is the same as the index of the note |
| 44 | + start, end = offsets[i][0], offsets[i+len(abc_search_intervals)][1] |
| 45 | + yield (start, end) # return start and end offset in the ABC code for the matched sequence of notes |
| 46 | + |
| 47 | +def abc_matches_iter(abc, search_string): |
| 48 | + search = get_intervals_from_abc(search_string) |
| 49 | + offset = 0 # the total character offset from the beginning of the abc text |
| 50 | + tunes = abc.split('X:') # split the code up per tune |
| 51 | + |
| 52 | + for i, tune in enumerate(tunes): |
| 53 | + if i > 0: tune = 'X:' + tune # add back the X: to the tunes from which it was removed during the split |
| 54 | + |
| 55 | + # find matches |
| 56 | + for start_offset_in_tune, end_offset_in_tune in get_matches(tune, search): |
| 57 | + # convert the offsets within the tune to global offsets |
| 58 | + start, end = (offset+start_offset_in_tune, offset+end_offset_in_tune) |
| 59 | + yield (start, end) |
| 60 | + |
| 61 | + offset += len(tune) |
| 62 | + |
| 63 | +if __name__ == '__main__': |
| 64 | + search = 'CEG' |
| 65 | + abc = '''' GE CE GE''' |
| 66 | + |
| 67 | + for r in abc_matches_iter(abc, search): |
| 68 | + pass |
0 commit comments