1 """
2 Iterator based sre token scanner
3 """
4 import sre_parse, sre_compile, sre_constants
5 from sre_constants import BRANCH, SUBPATTERN
6 from re import VERBOSE, MULTILINE, DOTALL
7 import re
8
9 __all__ = ['Scanner', 'pattern']
10
11 FLAGS = (VERBOSE | MULTILINE | DOTALL)
14 self.actions = [None]
15
16 s = sre_parse.Pattern()
17 s.flags = flags
18 p = []
19 for idx, token in enumerate(lexicon):
20 phrase = token.pattern
21 try:
22 subpattern = sre_parse.SubPattern(s,
23 [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
24 except sre_constants.error:
25 raise
26 p.append(subpattern)
27 self.actions.append(token)
28
29 s.groups = len(p)+1
30 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
31 self.scanner = sre_compile.compile(p)
32
33
34 - def iterscan(self, string, idx=0, context=None):
35 """
36 Yield match, end_idx for each match
37 """
38 match = self.scanner.scanner(string, idx).match
39 actions = self.actions
40 lastend = idx
41 end = len(string)
42 while True:
43 m = match()
44 if m is None:
45 break
46 matchbegin, matchend = m.span()
47 if lastend == matchend:
48 break
49 action = actions[m.lastindex]
50 if action is not None:
51 rval, next_pos = action(m, context)
52 if next_pos is not None and next_pos != matchend:
53
54 matchend = next_pos
55 match = self.scanner.scanner(string, matchend).match
56 yield rval, matchend
57 lastend = matchend
58
64 return decorator
65