Source code for bibpy.postprocess

# -*- coding: utf-8 -*-

"""Conversion functions for postprocessing of bib(la)tex fields."""

import bibpy
from bibpy.date import DateRange
import bibpy.error
import bibpy.lexers
import bibpy.name
import bibpy.parser
import calendar
import re

_MONTH_ABBREVIATIONS = [
    'jan',
    'feb',
    'mar',
    'apr',
    'may',
    'jun',
    'jul',
    'aug',
    'sep',
    'oct',
    'nov',
    'dec',
]

# TODO: Let users split on default names and custom ones
_SPLIT_NAMES = frozenset([
    'author',
    'afterword',
    'bookauthor',
    'commentator',
    'editor',
    'editora',
    'editorb',
    'editorc',
    'foreword',
    'holder',
    'introduction',
    'language',
    'origpublisher',
    'publisher',
    'shortauthor',
    'shorteditor',
    'translator',
])


[docs]def postprocess_braces(value, **options):
    """Remove any braces from a string value."""
    if bibpy.is_string(value):
        return "".join([e for e in bibpy.parser.parse_braced_expr(value)
                        if e not in '{}'])

    return value


[docs]def postprocess_namelist(field, names, **options):
    """Split a string of authors into a list."""
    if not names:
        return []

    # First, split on zero brace-level 'and'
    names = list(bibpy.lexers.lex_namelist(names))

    # Second, if requested, parse each name
    if field in options.get('split_names', []):
        return [postprocess_name(field, name) for name in names]

    return names


[docs]def postprocess_name(field, author, **options):
    """Attempts to split an author name into its components."""
    if author and bibpy.is_string(author):
        return bibpy.name.Name.fromstring(author)
    else:
        return author


[docs]def postprocess_keywords(field, keywords, **options):
    """Split a string of keywords into a list."""
    if not keywords:
        return []

    return [keyword.strip() for keyword in keywords.split(';')
            if keyword.strip()]


[docs]def postprocess_int(field, value, **options):
    """Convert a string to an integer."""
    try:
        return int(value)
    except (ValueError, TypeError):
        return value


[docs]def postprocess_date(field, datestring, **options):
    """Convert a string to a :py:func:`bibpy.date.DateRange`."""
    if not datestring:
        return DateRange.empty()

    return DateRange.fromstring(datestring)


[docs]def get_month_name(i):
    """Return the name of a month from its one-based index ."""
    return calendar.month_name[i]


[docs]def postprocess_month(field, month, **options):
    """Convert a month number to its name."""
    try:
        i = int(month)

        if i not in range(1, 13):
            return month

        return get_month_name(i)
    except (ValueError, IndexError):
        month_name = month.lower()

        if month_name in _MONTH_ABBREVIATIONS:
            # The first element of calendar.month_name is an empty string
            return get_month_name(_MONTH_ABBREVIATIONS.index(month_name) + 1)

    return month


[docs]def postprocess_keylist(field, keylist, **options):
    """Split a comma-separated string of keys into a list."""
    if not keylist:
        return []

    stripped_keys = [key.strip() for key in keylist.split(',')]

    return [key for key in stripped_keys if key]


[docs]def postprocess_pages(field, pages, **options):
    """Convert a page range to a 2-element tuple."""
    values = re.split(r'\-+', pages)

    if len(values) == 2:
        try:
            return (int(values[0]), int(values[1]))
        except (ValueError, TypeError):
            return pages
    else:
        return pages


# A dictionary of bib fields as keys and their postprocessing functions as
# values
postprocess_functions = {
    'address':       postprocess_namelist,
    'afterword':     postprocess_namelist,
    'annotator':     postprocess_namelist,
    'author':        postprocess_namelist,
    'bookauthor':    postprocess_namelist,
    'commentator':   postprocess_namelist,
    'date':          postprocess_date,
    'edition':       postprocess_int,
    'editor':        postprocess_namelist,
    'editora':       postprocess_namelist,
    'editorb':       postprocess_namelist,
    'editorc':       postprocess_namelist,
    'eventdate':     postprocess_date,
    'foreword':      postprocess_namelist,
    'holder':        postprocess_namelist,
    'institution':   postprocess_namelist,
    'introduction':  postprocess_namelist,
    'keywords':      postprocess_keywords,
    'language':      postprocess_namelist,
    'location':      postprocess_namelist,
    'month':         postprocess_month,
    'number':        postprocess_int,
    'organization':  postprocess_namelist,
    'origdate':      postprocess_date,
    'origlocation':  postprocess_namelist,
    'origpublisher': postprocess_namelist,
    'pages':         postprocess_pages,
    'pagetotal':     postprocess_int,
    'publisher':     postprocess_namelist,
    'related':       postprocess_keylist,
    'school':        postprocess_namelist,
    'shortauthor':   postprocess_namelist,
    'shorteditor':   postprocess_namelist,
    'translator':    postprocess_namelist,
    'urldate':       postprocess_date,
    'xdata':         postprocess_keylist,
    'volume':        postprocess_int,
    'year':          postprocess_int
}


[docs]def find_postprocess_fields(parameter, value):
    """Find the fields that need to be postprocessed for a given parameter."""
    if type(parameter) is bool:
        return value if parameter else []
    else:
        return parameter


[docs]def postprocess(entry, fields, **options):
    """Postprocess a subset of fields in a list of parsed entries."""
    remove_braces = find_postprocess_fields(options.get('remove_braces',
                                            False), entry.fields)
    split_names = find_postprocess_fields(options.get('split_names', False),
                                          _SPLIT_NAMES)
    fields = find_postprocess_fields(fields, entry.fields)

    postprocess_fields = set()
    postprocess_fields.update(remove_braces, split_names, fields)

    for field in postprocess_fields:
        value = getattr(entry, field, None)

        if value is not None and value != '':
            if field in fields and field in postprocess_functions:
                value = postprocess_functions[field](
                    field, value,
                    split_names=split_names
                )

            if remove_braces:
                if type(value) is list:
                    value = [postprocess_braces(e, remove_braces=remove_braces,
                                                split_names=split_names)
                             for e in value]
                else:
                    value = postprocess_braces(value,
                                               remove_braces=remove_braces,
                                               split_names=split_names)

            setattr(entry, field, value)
Source code for bibpy.postprocess

bibpy

Navigation

Related Topics