Source code for formatter2.tokens

import re
import logging
import functools

from . import tokenize_fork as tokenize
from .offsets import TOKEN_OFFSETS
from .types import TOKEN_TYPES

logger = logging.getLogger(__name__)
space_prefix_re = re.compile('^\s+')


[docs]class SmartList(list):

    def __init__(self, *items, **kwargs):
        self.logger = logger.getChild(self.__class__.__name__)
        size = len(items) or kwargs.get('size', 0)
        self.size = size
        assert size == 0 or size > 1
        if items:
            for item in items:
                self.append(item)
        else:
            for i in range(size):
                self.append(0)

[docs]    def copy(self):
        return SmartList(size=self.size, *self)

    def __iadd__(self, other, new=None):
        return self.__add__(other, self)

    def __add__(self, other, new=None):
        if new is None:
            new = self.copy()

        if isinstance(other, (list, tuple)):
            for i, v in enumerate(other):
                new[i] += v

        elif isinstance(other, int):
            for i, v in enumerate(self):
                new[i] += other

        else:
            raise TypeError('Unable to add type %r to %r' % (type(other),
                                                             self))

        return new

    def __isub__(self, other):
        return self.__sub__(other, self)

    def __sub__(self, other, new=None):
        if new is None:
            new = self.copy()

        if isinstance(other, (list, tuple)):
            for i, v in enumerate(self):
                new[i] -= v

        elif isinstance(other, int):
            for i, v in enumerate(self):
                new[i] -= other

        else:
            raise TypeError('Unable to subtract type %r to %r' % (type(other),
                                                                  self))
        return new

[docs]    def set(self, value):
        if isinstance(value, int):
            for i in range(len(self)):
                self[i] = value

        else:
            for i, v in enumerate(value):
                self[i] = v

    def __repr__(self):
        return '<%s%s>' % (
            self.__class__.__name__,
            list.__repr__(self),
        )

    def __str__(self):
        return '(%s)' % ','.join(map(str, self))


[docs]class Tokens(object):

[docs]    @classmethod
    def from_fh(self, fh):
        return self.from_readline(fh.readline)

[docs]    @classmethod
    def from_readline(self, readline):
        return Tokens(readline)

[docs]    def generate_tokens(self, readline):
        def _tab_to_space(readline):
            def __tab_to_space(match):
                return match.group(0).replace('\t', 4 * ' ')

            while True:
                try:
                    yield re.sub('^\s+', __tab_to_space, readline())
                except StopIteration:  # pragma: no cover
                    break

        readline = functools.partial(next, iter(_tab_to_space(readline)))
        offsets = TOKEN_OFFSETS
        stack = []
        logger = self.logger.getChild('generate_tokens')
        for token_data in tokenize.generate_tokens(readline):
            token = Token(offsets, *token_data)
            token.preprocess()
            offset = offsets.get(token, recurse=True)
            logger.debug('offset: %r', offset)
            if stack:
                logger.debug(
                    'stack: %r', [[y.token for y in x[1]] for x in stack])
            if offset.children:
                stack.append((
                    offsets,
                    [offsets.get(end, recurse=True) for end in offset.end],
                ))
                logger.debug('added %r to stack', token.token)
                offsets = offset.children
            elif stack and any(end == token for end in stack[-1][1]):
                logger.debug(
                    'removing %s from stack',
                    ', '.join(repr(x.token) for x in stack[-1][1]),
                )
                offsets = stack.pop()[0]

            yield token

    def __init__(self, readline):
        self.logger = logger.getChild(self.__class__.__name__)
        self.iterator = self.generate_tokens(readline)

    def __iter__(self):
        for item in self.iterator:
            yield item

[docs]    def to_str(self):
        'Convert the tokens back to a string'
        data = tokenize.untokenize(self.iterator)
        # Strip all trailing newlines at the end but make sure we end with a
        # newline
        data = data.rstrip() + '\n\n'
        # Strip the trailing whitespace for all lines
        lines = data.split('\n')
        lines = [l.rstrip() for l in lines]

        return '\n'.join(lines)

[docs]    def strip(self):
        'Strip all whitespace so we can begin formatting'
        self.iterator = self._strip(self.iterator)
        return self

    def _strip(self, iterator):
        current = next(iterator)
        for next_ in iterator:
            if current.end_row == next_.begin_row:
                delta = next_.begin_col - current.end_col
                if next_.offset.pre_collapse and current.offset.post_collapse:
                    # We need to collapse both, nothing special here
                    pass
                elif delta:
                    # One of the two doesn't need collapsing so substract 1
                    delta -= 1
                next_.col -= delta

            yield current
            current = next_

        yield current

[docs]    def format(self):
        '''
        Format the string, expects the extra whitespace to be removed already
        '''
        self.iterator = self._format(self.iterator)
        return self

    def _format(self, iterator):
        previous_offset = 0
        previous_line = None
        for token in iterator:
            if previous_line == token.begin_row:
                token.col += previous_offset
            else:
                previous_offset = 0
                previous_line = token.begin_row

            token.col += token.offset.pre
            previous_offset += token.offset.pre + token.offset.post
            yield token

    def __call__(self):
        return self.strip().format().to_str()


[docs]class Token(object):

    def __init__(self, offsets, tok_type, token, begin, end, line):
        assert tok_type != TOKEN_TYPES.ERRORTOKEN, ('Cannot format erroneous '
                                                    'code')
        self.offset = offsets[tok_type, token]
        self.tok_type = tok_type
        self.token = token
        self._row = SmartList(size=2)
        self._col = SmartList(size=2)
        self.begin = begin
        self.end = end
        self.line = line

    def _get_line(self):
        return self._line

    def _set_line(self, line):
        if hasattr(self, '_line'):
            self.end_col += len(line) - len(self._line)
        self._line = line

    @property
    def type(self):
        return TOKEN_TYPES[self.tok_type]

    def _get_begin_row(self):
        return self._row[0]

    def _set_begin_row(self, line):
        self._row[0] = line

    def _get_end_row(self):
        return self._row[1]

    def _set_end_row(self, line):
        self._row[1] = line

    def _get_begin_col(self):
        return self._col[0]

    def _set_begin_col(self, col):
        self._col[0] = col

    def _get_end_col(self):
        return self._col[1]

    def _set_end_col(self, col):
        self._col[1] = col

    def _get_col(self):
        return self._col

    def _set_col(self, col):
        self._col.set(col)

    def _get_row(self):
        return self._row

    def _set_row(self, row):
        self._row.set(row)

    def _get_begin(self):
        return SmartList(self.begin_row, self.begin_col)

    def _set_begin(self, begin):
        self.begin_row, self.begin_col = begin

    def _get_end(self):
        return SmartList(self.end_row, self.end_col)

    def _set_end(self, end):
        self.end_row, self.end_col = end

    col = property(_get_col, _set_col, doc='''The column.

    Setting this to a tuple will set the first to the begin and the latter to
    the end.

    This returns a :py:class:`~formatter2.tokens.SmartList` with the begin and
    end column.''')
    row = property(_get_row, _set_row, doc='''The row.

    Setting this to a tuple will set the first to the begin and the latter to
    the end.

    This returns a :py:class:`~formatter2.tokens.SmartList` with the begin and
    end row.''')
    begin = property(_get_begin, _set_begin, doc='''The begin coordinates.

    Settings this to a tuple will set the first to the row and the latter to
    the col.

    This returns a :py:class:`~formatter2.tokens.SmartList` with the row and
    the column.''')
    end = property(_get_end, _set_end, doc='''The end.

    Settings this to a tuple will set the first to the row and the latter to
    the col.

    This returns a :py:class:`~formatter2.tokens.SmartList` with the row and
    column.''')
    begin_col = property(_get_begin_col, _set_begin_col, doc='''The begin
    column.''')
    end_col = property(_get_end_col, _set_end_col, doc='''The end
    column.''')
    begin_row = property(_get_begin_row, _set_begin_row, doc='''The begin
    row.''')
    end_row = property(_get_end_row, _set_end_row, doc='''The end
    row.''')
    line = property(_get_line, _set_line, doc='''The line.

    Setting the line automatically updates the column as well.

    Returns a :func:`str` object with the line.''')

[docs]    def preprocess(self):
        ':func:`formatter2.types.TokenType.preprocess`'
        self.type.preprocess(self)

    def __len__(self):
        # Got to love magic numbers :)
        # We're trying to emulate a token that is valid for the tokenize lib
        # so we need some magic
        return 5

    def __iter__(self):
        # Return something the tokenize library accepts
        yield self.tok_type
        yield self.token
        yield self.begin
        yield self.end
        yield self.line

    def __eq__(self, other):
        if isinstance(other, Token):
            return any(end == other.begin for end in self.end)
        elif isinstance(other, int):
            return self.tok_type == other
        elif hasattr(other, 'token') and hasattr(other, 'type'):
            return self.token == other.token and self.type == other.type
        elif not other:
            return False
        elif isinstance(other, tuple) and len(other) == 2:
            return self.tok_type, self.token == other
        else:
            raise TypeError('Dont know how to compare %r to %r' % (
                self, other))

    def __lt__(self, other):
        if other:
            return self.end < other.begin
        else:
            return False

    def __le__(self, other):
        if other:
            return self.end <= other.begin
        else:
            return True

    def __gt__(self, other):
        if other:
            return self.end > other.begin
        else:
            return False

    def __ge__(self, other):
        if other:
            return self.end >= other.begin
        else:
            return True

    def __sub__(self, other):
        assert self.end_row == other.begin_row
        return other.begin_col - self.end_col

    def __str__(self):
        return repr((self.token, self.type.name, self.begin, self.end))

    def __repr__(self):
        line = self.line
        if line.rstrip() != line:
            line = line.rstrip() + r'#'
        if line.lstrip() != line:
            line = r'#' + line.lstrip()

        if self.offset:
            pre = self.offset.pre
            post = self.offset.post
        else:
            pre = post = '?'

        return '<%s[%s]: %r (%d,%d)-(%d,%d):(%s,%s) %s>' % (
            self.__class__.__name__,
            self.type,
            self.token,
            self.begin[0],
            self.begin[1],
            self.end[0],
            self.end[1],
            pre,
            post,
            line,
        )