Source code for bibpy.lexers.name_lexer
# -*- coding: utf-8 -*-
"""Bib(la)tex lexer for names."""
from bibpy.lexers.base_lexer import BaseLexer
[docs]class NameLexer(BaseLexer):
"""Lexer that splits names into parts.
Any whitespace is stripped.
"""
def __init__(self):
"""Initialise the lexer."""
super().__init__()
self.reset('')
self.mode = 'normal'
self._modes = {
'normal': self.lex_name,
}
self._compile_regexes([
('ws_or_braces', (r'\s+|{|}|,', None))
])
[docs] def reset(self, string):
"""Reset the internal state of the lexer."""
super().reset(string)
self._commas = 0
@property
def commas(self):
"""Return the indices of commas found at brace-level zero."""
return self._commas
[docs] def lex_name(self):
"""Lex a name and return its tokens."""
part = []
content = ''
was_command = False
while True:
before, token = self.until('ws_or_braces')
if not token:
# We hit the end of the string
if before:
part.append(self.make_token('content', before))
yield self.make_token('part', part)
break
if token == '{':
self.brace_level += 1
content += before
was_command = self.current_char == '\\'
elif token == '}':
self.brace_level -= 1
if was_command:
was_command = False
content += before
else:
if self.brace_level == 0:
content += before
part.append(self.make_token('braced', content))
content = ''
elif self.brace_level < 0:
self.raise_unbalanced()
else:
if self.brace_level > 0:
content += before + token
else:
if token == ',':
self._commas += 1
if before:
part.append(self.make_token('content', before))
yield self.make_token('part', part)
part = []
content = ''
else:
# Token is whitespace
if before.strip():
if content:
part.append(
self.make_token(
'content',
content + before.strip()
)
)
content = ''
else:
part.append(
self.make_token('content', before.strip())
)