Source code for bibpy.lexers.name_lexer

# -*- coding: utf-8 -*-

"""Bib(la)tex lexer for names."""

from bibpy.lexers.base_lexer import BaseLexer


[docs]class NameLexer(BaseLexer):
    """Lexer that splits names into parts.

    Any whitespace is stripped.

    """

    def __init__(self):
        """Initialise the lexer."""
        super().__init__()
        self.reset('')
        self.mode = 'normal'
        self._modes = {
            'normal': self.lex_name,
        }

        self._compile_regexes([
            ('ws_or_braces', (r'\s+|{|}|,', None))
        ])

[docs]    def reset(self, string):
        """Reset the internal state of the lexer."""
        super().reset(string)
        self._commas = 0

    @property
    def commas(self):
        """Return the indices of commas found at brace-level zero."""
        return self._commas

[docs]    def lex_name(self):
        """Lex a name and return its tokens."""
        part = []
        content = ''
        was_command = False

        while True:
            before, token = self.until('ws_or_braces')

            if not token:
                # We hit the end of the string
                if before:
                    part.append(self.make_token('content', before))

                yield self.make_token('part', part)
                break

            if token == '{':
                self.brace_level += 1
                content += before
                was_command = self.current_char == '\\'
            elif token == '}':
                self.brace_level -= 1

                if was_command:
                    was_command = False
                    content += before
                else:
                    if self.brace_level == 0:
                        content += before
                        part.append(self.make_token('braced', content))
                        content = ''
                    elif self.brace_level < 0:
                        self.raise_unbalanced()
            else:
                if self.brace_level > 0:
                    content += before + token
                else:
                    if token == ',':
                        self._commas += 1

                        if before:
                            part.append(self.make_token('content', before))

                        yield self.make_token('part', part)
                        part = []
                        content = ''
                    else:
                        # Token is whitespace
                        if before.strip():
                            if content:
                                part.append(
                                    self.make_token(
                                        'content',
                                        content + before.strip()
                                    )
                                )
                                content = ''
                            else:
                                part.append(
                                    self.make_token('content', before.strip())
                                )
Source code for bibpy.lexers.name_lexer

bibpy

Navigation

Related Topics