You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
328 lines
12 KiB
328 lines
12 KiB
""" |
|
Functions for reversing a regular expression (used in reverse URL resolving). |
|
Used internally by Django and not intended for external use. |
|
|
|
This is not, and is not intended to be, a complete reg-exp decompiler. It |
|
should be good enough for a large class of URLS, however. |
|
""" |
|
|
|
# Mapping of an escape character to a representative of that class. So, e.g., |
|
# "\w" is replaced by "x" in a reverse URL. A value of None means to ignore |
|
# this sequence. Any missing key is mapped to itself. |
|
ESCAPE_MAPPINGS = { |
|
"A": None, |
|
"b": None, |
|
"B": None, |
|
"d": u"0", |
|
"D": u"x", |
|
"s": u" ", |
|
"S": u"x", |
|
"w": u"x", |
|
"W": u"!", |
|
"Z": None, |
|
} |
|
|
|
class Choice(list): |
|
""" |
|
Used to represent multiple possibilities at this point in a pattern string. |
|
We use a distinguished type, rather than a list, so that the usage in the |
|
code is clear. |
|
""" |
|
|
|
class Group(list): |
|
""" |
|
Used to represent a capturing group in the pattern string. |
|
""" |
|
|
|
class NonCapture(list): |
|
""" |
|
Used to represent a non-capturing group in the pattern string. |
|
""" |
|
|
|
def normalize(pattern): |
|
""" |
|
Given a reg-exp pattern, normalizes it to a list of forms that suffice for |
|
reverse matching. This does the following: |
|
|
|
(1) For any repeating sections, keeps the minimum number of occurrences |
|
permitted (this means zero for optional groups). |
|
(2) If an optional group includes parameters, include one occurrence of |
|
that group (along with the zero occurrence case from step (1)). |
|
(3) Select the first (essentially an arbitrary) element from any character |
|
class. Select an arbitrary character for any unordered class (e.g. '.' |
|
or '\w') in the pattern. |
|
(5) Ignore comments and any of the reg-exp flags that won't change |
|
what we construct ("iLmsu"). "(?x)" is an error, however. |
|
(6) Raise an error on all other non-capturing (?...) forms (e.g. |
|
look-ahead and look-behind matches) and any disjunctive ('|') |
|
constructs. |
|
|
|
Django's URLs for forward resolving are either all positional arguments or |
|
all keyword arguments. That is assumed here, as well. Although reverse |
|
resolving can be done using positional args when keyword args are |
|
specified, the two cannot be mixed in the same reverse() call. |
|
""" |
|
# Do a linear scan to work out the special features of this pattern. The |
|
# idea is that we scan once here and collect all the information we need to |
|
# make future decisions. |
|
result = [] |
|
non_capturing_groups = [] |
|
consume_next = True |
|
pattern_iter = next_char(iter(pattern)) |
|
num_args = 0 |
|
|
|
# A "while" loop is used here because later on we need to be able to peek |
|
# at the next character and possibly go around without consuming another |
|
# one at the top of the loop. |
|
try: |
|
ch, escaped = pattern_iter.next() |
|
except StopIteration: |
|
return zip([u''], [[]]) |
|
|
|
try: |
|
while True: |
|
if escaped: |
|
result.append(ch) |
|
elif ch == '.': |
|
# Replace "any character" with an arbitrary representative. |
|
result.append(u".") |
|
elif ch == '|': |
|
# FIXME: One day we'll should do this, but not in 1.0. |
|
raise NotImplementedError |
|
elif ch == "^": |
|
pass |
|
elif ch == '$': |
|
break |
|
elif ch == ')': |
|
# This can only be the end of a non-capturing group, since all |
|
# other unescaped parentheses are handled by the grouping |
|
# section later (and the full group is handled there). |
|
# |
|
# We regroup everything inside the capturing group so that it |
|
# can be quantified, if necessary. |
|
start = non_capturing_groups.pop() |
|
inner = NonCapture(result[start:]) |
|
result = result[:start] + [inner] |
|
elif ch == '[': |
|
# Replace ranges with the first character in the range. |
|
ch, escaped = pattern_iter.next() |
|
result.append(ch) |
|
ch, escaped = pattern_iter.next() |
|
while escaped or ch != ']': |
|
ch, escaped = pattern_iter.next() |
|
elif ch == '(': |
|
# Some kind of group. |
|
ch, escaped = pattern_iter.next() |
|
if ch != '?' or escaped: |
|
# A positional group |
|
name = "_%d" % num_args |
|
num_args += 1 |
|
result.append(Group(((u"%%(%s)s" % name), name))) |
|
walk_to_end(ch, pattern_iter) |
|
else: |
|
ch, escaped = pattern_iter.next() |
|
if ch in "iLmsu#": |
|
# All of these are ignorable. Walk to the end of the |
|
# group. |
|
walk_to_end(ch, pattern_iter) |
|
elif ch == ':': |
|
# Non-capturing group |
|
non_capturing_groups.append(len(result)) |
|
elif ch != 'P': |
|
# Anything else, other than a named group, is something |
|
# we cannot reverse. |
|
raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch) |
|
else: |
|
ch, escaped = pattern_iter.next() |
|
if ch != '<': |
|
raise ValueError("Non-reversible reg-exp portion: '(?P%s'" % ch) |
|
# We are in a named capturing group. Extra the name and |
|
# then skip to the end. |
|
name = [] |
|
ch, escaped = pattern_iter.next() |
|
while ch != '>': |
|
name.append(ch) |
|
ch, escaped = pattern_iter.next() |
|
param = ''.join(name) |
|
result.append(Group(((u"%%(%s)s" % param), param))) |
|
walk_to_end(ch, pattern_iter) |
|
elif ch in "*?+{": |
|
# Quanitifers affect the previous item in the result list. |
|
count, ch = get_quantifier(ch, pattern_iter) |
|
if ch: |
|
# We had to look ahead, but it wasn't need to compute the |
|
# quanitifer, so use this character next time around the |
|
# main loop. |
|
consume_next = False |
|
|
|
if count == 0: |
|
if contains(result[-1], Group): |
|
# If we are quantifying a capturing group (or |
|
# something containing such a group) and the minimum is |
|
# zero, we must also handle the case of one occurrence |
|
# being present. All the quantifiers (except {0,0}, |
|
# which we conveniently ignore) that have a 0 minimum |
|
# also allow a single occurrence. |
|
result[-1] = Choice([None, result[-1]]) |
|
else: |
|
result.pop() |
|
elif count > 1: |
|
result.extend([result[-1]] * (count - 1)) |
|
else: |
|
# Anything else is a literal. |
|
result.append(ch) |
|
|
|
if consume_next: |
|
ch, escaped = pattern_iter.next() |
|
else: |
|
consume_next = True |
|
except StopIteration: |
|
pass |
|
except NotImplementedError: |
|
# A case of using the disjunctive form. No results for you! |
|
return zip([u''], [[]]) |
|
|
|
return zip(*flatten_result(result)) |
|
|
|
def next_char(input_iter): |
|
""" |
|
An iterator that yields the next character from "pattern_iter", respecting |
|
escape sequences. An escaped character is replaced by a representative of |
|
its class (e.g. \w -> "x"). If the escaped character is one that is |
|
skipped, it is not returned (the next character is returned instead). |
|
|
|
Yields the next character, along with a boolean indicating whether it is a |
|
raw (unescaped) character or not. |
|
""" |
|
for ch in input_iter: |
|
if ch != '\\': |
|
yield ch, False |
|
continue |
|
ch = input_iter.next() |
|
representative = ESCAPE_MAPPINGS.get(ch, ch) |
|
if representative is None: |
|
continue |
|
yield representative, True |
|
|
|
def walk_to_end(ch, input_iter): |
|
""" |
|
The iterator is currently inside a capturing group. We want to walk to the |
|
close of this group, skipping over any nested groups and handling escaped |
|
parentheses correctly. |
|
""" |
|
if ch == '(': |
|
nesting = 1 |
|
else: |
|
nesting = 0 |
|
for ch, escaped in input_iter: |
|
if escaped: |
|
continue |
|
elif ch == '(': |
|
nesting += 1 |
|
elif ch == ')': |
|
if not nesting: |
|
return |
|
nesting -= 1 |
|
|
|
def get_quantifier(ch, input_iter): |
|
""" |
|
Parse a quantifier from the input, where "ch" is the first character in the |
|
quantifier. |
|
|
|
Returns the minimum number of occurences permitted by the quantifier and |
|
either None or the next character from the input_iter if the next character |
|
is not part of the quantifier. |
|
""" |
|
if ch in '*?+': |
|
try: |
|
ch2, escaped = input_iter.next() |
|
except StopIteration: |
|
ch2 = None |
|
if ch2 == '?': |
|
ch2 = None |
|
if ch == '+': |
|
return 1, ch2 |
|
return 0, ch2 |
|
|
|
quant = [] |
|
while ch != '}': |
|
ch, escaped = input_iter.next() |
|
quant.append(ch) |
|
quant = quant[:-1] |
|
values = ''.join(quant).split(',') |
|
|
|
# Consume the trailing '?', if necessary. |
|
try: |
|
ch, escaped = input_iter.next() |
|
except StopIteration: |
|
ch = None |
|
if ch == '?': |
|
ch = None |
|
return int(values[0]), ch |
|
|
|
def contains(source, inst): |
|
""" |
|
Returns True if the "source" contains an instance of "inst". False, |
|
otherwise. |
|
""" |
|
if isinstance(source, inst): |
|
return True |
|
if isinstance(source, NonCapture): |
|
for elt in source: |
|
if contains(elt, inst): |
|
return True |
|
return False |
|
|
|
def flatten_result(source): |
|
""" |
|
Turns the given source sequence into a list of reg-exp possibilities and |
|
their arguments. Returns a list of strings and a list of argument lists. |
|
Each of the two lists will be of the same length. |
|
""" |
|
if source is None: |
|
return [u''], [[]] |
|
if isinstance(source, Group): |
|
if source[1] is None: |
|
params = [] |
|
else: |
|
params = [source[1]] |
|
return [source[0]], [params] |
|
result = [u''] |
|
result_args = [[]] |
|
pos = last = 0 |
|
for pos, elt in enumerate(source): |
|
if isinstance(elt, basestring): |
|
continue |
|
piece = u''.join(source[last:pos]) |
|
if isinstance(elt, Group): |
|
piece += elt[0] |
|
param = elt[1] |
|
else: |
|
param = None |
|
last = pos + 1 |
|
for i in range(len(result)): |
|
result[i] += piece |
|
if param: |
|
result_args[i].append(param) |
|
if isinstance(elt, (Choice, NonCapture)): |
|
if isinstance(elt, NonCapture): |
|
elt = [elt] |
|
inner_result, inner_args = [], [] |
|
for item in elt: |
|
res, args = flatten_result(item) |
|
inner_result.extend(res) |
|
inner_args.extend(args) |
|
new_result = [] |
|
new_args = [] |
|
for item, args in zip(result, result_args): |
|
for i_item, i_args in zip(inner_result, inner_args): |
|
new_result.append(item + i_item) |
|
new_args.append(args[:] + i_args) |
|
result = new_result |
|
result_args = new_args |
|
if pos >= last: |
|
piece = u''.join(source[last:]) |
|
for i in range(len(result)): |
|
result[i] += piece |
|
return result, result_args |
|
|
|
|