# -*- coding: utf-8 -*-
"""Conversion functions for postprocessing of bib(la)tex fields."""
import bibpy
from bibpy.date import DateRange
import bibpy.error
import bibpy.lexers
import bibpy.name
import bibpy.parser
import calendar
import re
_MONTH_ABBREVIATIONS = [
'jan',
'feb',
'mar',
'apr',
'may',
'jun',
'jul',
'aug',
'sep',
'oct',
'nov',
'dec',
]
# TODO: Let users split on default names and custom ones
_SPLIT_NAMES = frozenset([
'author',
'afterword',
'bookauthor',
'commentator',
'editor',
'editora',
'editorb',
'editorc',
'foreword',
'holder',
'introduction',
'language',
'origpublisher',
'publisher',
'shortauthor',
'shorteditor',
'translator',
])
[docs]def postprocess_braces(value, **options):
"""Remove any braces from a string value."""
if bibpy.is_string(value):
return "".join([e for e in bibpy.parser.parse_braced_expr(value)
if e not in '{}'])
return value
[docs]def postprocess_namelist(field, names, **options):
"""Split a string of authors into a list."""
if not names:
return []
# First, split on zero brace-level 'and'
names = list(bibpy.lexers.lex_namelist(names))
# Second, if requested, parse each name
if field in options.get('split_names', []):
return [postprocess_name(field, name) for name in names]
return names
[docs]def postprocess_name(field, author, **options):
"""Attempts to split an author name into its components."""
if author and bibpy.is_string(author):
return bibpy.name.Name.fromstring(author)
else:
return author
[docs]def postprocess_keywords(field, keywords, **options):
"""Split a string of keywords into a list."""
if not keywords:
return []
return [keyword.strip() for keyword in keywords.split(';')
if keyword.strip()]
[docs]def postprocess_int(field, value, **options):
"""Convert a string to an integer."""
try:
return int(value)
except (ValueError, TypeError):
return value
[docs]def postprocess_date(field, datestring, **options):
"""Convert a string to a :py:func:`bibpy.date.DateRange`."""
if not datestring:
return DateRange.empty()
return DateRange.fromstring(datestring)
[docs]def get_month_name(i):
"""Return the name of a month from its one-based index ."""
return calendar.month_name[i]
[docs]def postprocess_month(field, month, **options):
"""Convert a month number to its name."""
try:
i = int(month)
if i not in range(1, 13):
return month
return get_month_name(i)
except (ValueError, IndexError):
month_name = month.lower()
if month_name in _MONTH_ABBREVIATIONS:
# The first element of calendar.month_name is an empty string
return get_month_name(_MONTH_ABBREVIATIONS.index(month_name) + 1)
return month
[docs]def postprocess_keylist(field, keylist, **options):
"""Split a comma-separated string of keys into a list."""
if not keylist:
return []
stripped_keys = [key.strip() for key in keylist.split(',')]
return [key for key in stripped_keys if key]
[docs]def postprocess_pages(field, pages, **options):
"""Convert a page range to a 2-element tuple."""
values = re.split(r'\-+', pages)
if len(values) == 2:
try:
return (int(values[0]), int(values[1]))
except (ValueError, TypeError):
return pages
else:
return pages
# A dictionary of bib fields as keys and their postprocessing functions as
# values
postprocess_functions = {
'address': postprocess_namelist,
'afterword': postprocess_namelist,
'annotator': postprocess_namelist,
'author': postprocess_namelist,
'bookauthor': postprocess_namelist,
'commentator': postprocess_namelist,
'date': postprocess_date,
'edition': postprocess_int,
'editor': postprocess_namelist,
'editora': postprocess_namelist,
'editorb': postprocess_namelist,
'editorc': postprocess_namelist,
'eventdate': postprocess_date,
'foreword': postprocess_namelist,
'holder': postprocess_namelist,
'institution': postprocess_namelist,
'introduction': postprocess_namelist,
'keywords': postprocess_keywords,
'language': postprocess_namelist,
'location': postprocess_namelist,
'month': postprocess_month,
'number': postprocess_int,
'organization': postprocess_namelist,
'origdate': postprocess_date,
'origlocation': postprocess_namelist,
'origpublisher': postprocess_namelist,
'pages': postprocess_pages,
'pagetotal': postprocess_int,
'publisher': postprocess_namelist,
'related': postprocess_keylist,
'school': postprocess_namelist,
'shortauthor': postprocess_namelist,
'shorteditor': postprocess_namelist,
'translator': postprocess_namelist,
'urldate': postprocess_date,
'xdata': postprocess_keylist,
'volume': postprocess_int,
'year': postprocess_int
}
[docs]def find_postprocess_fields(parameter, value):
"""Find the fields that need to be postprocessed for a given parameter."""
if type(parameter) is bool:
return value if parameter else []
else:
return parameter
[docs]def postprocess(entry, fields, **options):
"""Postprocess a subset of fields in a list of parsed entries."""
remove_braces = find_postprocess_fields(options.get('remove_braces',
False), entry.fields)
split_names = find_postprocess_fields(options.get('split_names', False),
_SPLIT_NAMES)
fields = find_postprocess_fields(fields, entry.fields)
postprocess_fields = set()
postprocess_fields.update(remove_braces, split_names, fields)
for field in postprocess_fields:
value = getattr(entry, field, None)
if value is not None and value != '':
if field in fields and field in postprocess_functions:
value = postprocess_functions[field](
field, value,
split_names=split_names
)
if remove_braces:
if type(value) is list:
value = [postprocess_braces(e, remove_braces=remove_braces,
split_names=split_names)
for e in value]
else:
value = postprocess_braces(value,
remove_braces=remove_braces,
split_names=split_names)
setattr(entry, field, value)