diff options
author | Terry Jan Reedy <tjreedy@udel.edu> | 2016-05-22 19:10:31 -0400 |
---|---|---|
committer | Terry Jan Reedy <tjreedy@udel.edu> | 2016-05-22 19:10:31 -0400 |
commit | 0d9220e162f1e5f8caa3d7ebaa54665776d361a1 (patch) | |
tree | a4fa9f7cafdfc93fde86b8ffd6088d739bb93e01 /Lib/idlelib/HyperParser.py | |
parent | dc4f7c09cc934524a763498250e30123a0d9d2c5 (diff) | |
download | cpython-0d9220e162f1e5f8caa3d7ebaa54665776d361a1.tar.gz cpython-0d9220e162f1e5f8caa3d7ebaa54665776d361a1.zip |
Issue #24225: Rename many idlelib/*.py and idlelib/idle_test/test_*.py files.
Diffstat (limited to 'Lib/idlelib/HyperParser.py')
-rw-r--r-- | Lib/idlelib/HyperParser.py | 313 |
1 files changed, 0 insertions, 313 deletions
diff --git a/Lib/idlelib/HyperParser.py b/Lib/idlelib/HyperParser.py deleted file mode 100644 index 77cb057ce21..00000000000 --- a/Lib/idlelib/HyperParser.py +++ /dev/null @@ -1,313 +0,0 @@ -"""Provide advanced parsing abilities for ParenMatch and other extensions. - -HyperParser uses PyParser. PyParser mostly gives information on the -proper indentation of code. HyperParser gives additional information on -the structure of code. -""" - -import string -from keyword import iskeyword -from idlelib import PyParse - - -# all ASCII chars that may be in an identifier -_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_") -# all ASCII chars that may be the first char of an identifier -_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_") - -# lookup table for whether 7-bit ASCII chars are valid in a Python identifier -_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)] -# lookup table for whether 7-bit ASCII chars are valid as the first -# char in a Python identifier -_IS_ASCII_ID_FIRST_CHAR = \ - [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)] - - -class HyperParser: - def __init__(self, editwin, index): - "To initialize, analyze the surroundings of the given index." - - self.editwin = editwin - self.text = text = editwin.text - - parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth) - - def index2line(index): - return int(float(index)) - lno = index2line(text.index(index)) - - if not editwin.context_use_ps1: - for context in editwin.num_context_lines: - startat = max(lno - context, 1) - startatindex = repr(startat) + ".0" - stopatindex = "%d.end" % lno - # We add the newline because PyParse requires a newline - # at end. We add a space so that index won't be at end - # of line, so that its status will be the same as the - # char before it, if should. - parser.set_str(text.get(startatindex, stopatindex)+' \n') - bod = parser.find_good_parse_start( - editwin._build_char_in_string_func(startatindex)) - if bod is not None or startat == 1: - break - parser.set_lo(bod or 0) - else: - r = text.tag_prevrange("console", index) - if r: - startatindex = r[1] - else: - startatindex = "1.0" - stopatindex = "%d.end" % lno - # We add the newline because PyParse requires it. We add a - # space so that index won't be at end of line, so that its - # status will be the same as the char before it, if should. - parser.set_str(text.get(startatindex, stopatindex)+' \n') - parser.set_lo(0) - - # We want what the parser has, minus the last newline and space. - self.rawtext = parser.str[:-2] - # Parser.str apparently preserves the statement we are in, so - # that stopatindex can be used to synchronize the string with - # the text box indices. - self.stopatindex = stopatindex - self.bracketing = parser.get_last_stmt_bracketing() - # find which pairs of bracketing are openers. These always - # correspond to a character of rawtext. - self.isopener = [i>0 and self.bracketing[i][1] > - self.bracketing[i-1][1] - for i in range(len(self.bracketing))] - - self.set_index(index) - - def set_index(self, index): - """Set the index to which the functions relate. - - The index must be in the same statement. - """ - indexinrawtext = (len(self.rawtext) - - len(self.text.get(index, self.stopatindex))) - if indexinrawtext < 0: - raise ValueError("Index %s precedes the analyzed statement" - % index) - self.indexinrawtext = indexinrawtext - # find the rightmost bracket to which index belongs - self.indexbracket = 0 - while (self.indexbracket < len(self.bracketing)-1 and - self.bracketing[self.indexbracket+1][0] < self.indexinrawtext): - self.indexbracket += 1 - if (self.indexbracket < len(self.bracketing)-1 and - self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and - not self.isopener[self.indexbracket+1]): - self.indexbracket += 1 - - def is_in_string(self): - """Is the index given to the HyperParser in a string?""" - # The bracket to which we belong should be an opener. - # If it's an opener, it has to have a character. - return (self.isopener[self.indexbracket] and - self.rawtext[self.bracketing[self.indexbracket][0]] - in ('"', "'")) - - def is_in_code(self): - """Is the index given to the HyperParser in normal code?""" - return (not self.isopener[self.indexbracket] or - self.rawtext[self.bracketing[self.indexbracket][0]] - not in ('#', '"', "'")) - - def get_surrounding_brackets(self, openers='([{', mustclose=False): - """Return bracket indexes or None. - - If the index given to the HyperParser is surrounded by a - bracket defined in openers (or at least has one before it), - return the indices of the opening bracket and the closing - bracket (or the end of line, whichever comes first). - - If it is not surrounded by brackets, or the end of line comes - before the closing bracket and mustclose is True, returns None. - """ - - bracketinglevel = self.bracketing[self.indexbracket][1] - before = self.indexbracket - while (not self.isopener[before] or - self.rawtext[self.bracketing[before][0]] not in openers or - self.bracketing[before][1] > bracketinglevel): - before -= 1 - if before < 0: - return None - bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) - after = self.indexbracket + 1 - while (after < len(self.bracketing) and - self.bracketing[after][1] >= bracketinglevel): - after += 1 - - beforeindex = self.text.index("%s-%dc" % - (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) - if (after >= len(self.bracketing) or - self.bracketing[after][0] > len(self.rawtext)): - if mustclose: - return None - afterindex = self.stopatindex - else: - # We are after a real char, so it is a ')' and we give the - # index before it. - afterindex = self.text.index( - "%s-%dc" % (self.stopatindex, - len(self.rawtext)-(self.bracketing[after][0]-1))) - - return beforeindex, afterindex - - # the set of built-in identifiers which are also keywords, - # i.e. keyword.iskeyword() returns True for them - _ID_KEYWORDS = frozenset({"True", "False", "None"}) - - @classmethod - def _eat_identifier(cls, str, limit, pos): - """Given a string and pos, return the number of chars in the - identifier which ends at pos, or 0 if there is no such one. - - This ignores non-identifier eywords are not identifiers. - """ - is_ascii_id_char = _IS_ASCII_ID_CHAR - - # Start at the end (pos) and work backwards. - i = pos - - # Go backwards as long as the characters are valid ASCII - # identifier characters. This is an optimization, since it - # is faster in the common case where most of the characters - # are ASCII. - while i > limit and ( - ord(str[i - 1]) < 128 and - is_ascii_id_char[ord(str[i - 1])] - ): - i -= 1 - - # If the above loop ended due to reaching a non-ASCII - # character, continue going backwards using the most generic - # test for whether a string contains only valid identifier - # characters. - if i > limit and ord(str[i - 1]) >= 128: - while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier(): - i -= 4 - if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier(): - i -= 2 - if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier(): - i -= 1 - - # The identifier candidate starts here. If it isn't a valid - # identifier, don't eat anything. At this point that is only - # possible if the first character isn't a valid first - # character for an identifier. - if not str[i:pos].isidentifier(): - return 0 - elif i < pos: - # All characters in str[i:pos] are valid ASCII identifier - # characters, so it is enough to check that the first is - # valid as the first character of an identifier. - if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]: - return 0 - - # All keywords are valid identifiers, but should not be - # considered identifiers here, except for True, False and None. - if i < pos and ( - iskeyword(str[i:pos]) and - str[i:pos] not in cls._ID_KEYWORDS - ): - return 0 - - return pos - i - - # This string includes all chars that may be in a white space - _whitespace_chars = " \t\n\\" - - def get_expression(self): - """Return a string with the Python expression which ends at the - given index, which is empty if there is no real one. - """ - if not self.is_in_code(): - raise ValueError("get_expression should only be called" - "if index is inside a code.") - - rawtext = self.rawtext - bracketing = self.bracketing - - brck_index = self.indexbracket - brck_limit = bracketing[brck_index][0] - pos = self.indexinrawtext - - last_identifier_pos = pos - postdot_phase = True - - while 1: - # Eat whitespaces, comments, and if postdot_phase is False - a dot - while 1: - if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: - # Eat a whitespace - pos -= 1 - elif (not postdot_phase and - pos > brck_limit and rawtext[pos-1] == '.'): - # Eat a dot - pos -= 1 - postdot_phase = True - # The next line will fail if we are *inside* a comment, - # but we shouldn't be. - elif (pos == brck_limit and brck_index > 0 and - rawtext[bracketing[brck_index-1][0]] == '#'): - # Eat a comment - brck_index -= 2 - brck_limit = bracketing[brck_index][0] - pos = bracketing[brck_index+1][0] - else: - # If we didn't eat anything, quit. - break - - if not postdot_phase: - # We didn't find a dot, so the expression end at the - # last identifier pos. - break - - ret = self._eat_identifier(rawtext, brck_limit, pos) - if ret: - # There is an identifier to eat - pos = pos - ret - last_identifier_pos = pos - # Now, to continue the search, we must find a dot. - postdot_phase = False - # (the loop continues now) - - elif pos == brck_limit: - # We are at a bracketing limit. If it is a closing - # bracket, eat the bracket, otherwise, stop the search. - level = bracketing[brck_index][1] - while brck_index > 0 and bracketing[brck_index-1][1] > level: - brck_index -= 1 - if bracketing[brck_index][0] == brck_limit: - # We were not at the end of a closing bracket - break - pos = bracketing[brck_index][0] - brck_index -= 1 - brck_limit = bracketing[brck_index][0] - last_identifier_pos = pos - if rawtext[pos] in "([": - # [] and () may be used after an identifier, so we - # continue. postdot_phase is True, so we don't allow a dot. - pass - else: - # We can't continue after other types of brackets - if rawtext[pos] in "'\"": - # Scan a string prefix - while pos > 0 and rawtext[pos - 1] in "rRbBuU": - pos -= 1 - last_identifier_pos = pos - break - - else: - # We've found an operator or something. - break - - return rawtext[last_identifier_pos:self.indexinrawtext] - - -if __name__ == '__main__': - import unittest - unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2) |