Source code for pythmpg.parse_jahn

"""
Parser for index symmetrization information in Jahn symbols.

Defines two functions intended to be called externally:
:func:`parse_jahn_symbol`, which parses Jahn symbols
into structured instruction blocks describing the needed
symmetrization or antisymmetrization over tensor axes, and
:func:`jahn_rank` to return the rank of the tensor.

Notes
-----
Jahn symbols passed to the parser should lack any preceding ``'a'``
or ``'e'`` characters.  Results are cached in the module-level
``jahn_dict`` to avoid redundant parsing of repeated symbols.
"""

# Dictionary used to hold results for Jahn symbols that have
# already been parsed as a result of earlier function calls
jahn_dict = {}

# Digits allowed in Jahn symbols
digits = set("12345678")
# Characters allowed in Jahn symbols
valid_chars = set("V[]{}12345678")

# Dictionary mapping open bracket to type of symmetrization
s_dict = {"[": "symm", "{": "asymm"}

# Current axis of tensor
# Global variable incremented as parsing proceeds
axis = 0


# define custom exception

[docs]
class ParsingError(Exception):
    pass



# Global print control variable
if_pr = False


# Define custom print function conditional on if_print
# Note that parsing errors are printed regardless
def jprint(text):
    if if_pr:
        print("  " + text)



[docs]
def parse_jahn_symbol(jahn, if_print=False):
    """
    Parse the index symmetrization information in a Jahn symbol.

    Converts the Jahn symbol into an instruction set represented
    as a nested tuple structure specifying which tensor axes are
    subject to symmetrization or antisymmetrization.  Results are
    cached in ``jahn_dict`` so that repeated calls with the same
    symbol entail no overhead.

    Parameters
    ----------
    jahn : str
        Jahn symbol to parse, lacking any preceding ``'a'`` or ``'e'``
        characters.
    if_print : bool, optional
        If ``True``, print intermediate parsing steps to stdout.
        Default is ``False``.

    Returns
    -------
    instructions : tuple or None
        Nested tuple specifying tensor axes and the symmetrization
        operations to be applied to them.  Returns ``None`` if the
        symbol contains illegal characters or if the parser fails.
    """

    global axis, if_pr
    if_pr = if_print

    # If Jahn symbol has already been parsed, return saved result
    if jahn in jahn_dict:
        return jahn_dict[jahn]

    # Preliminary clean-up, e.g., 'V[V2V2]V' -> '1[22]1'
    if not set(jahn).issubset(valid_chars):
        jprint(f"\n  {jahn}  ->")
        print(f"\n  PARSING ERROR: Illegal characters in Jahn symbol {jahn}")
        jahn_dict[jahn] = None
        return None

    # Get rid of 'V' symbols to construct block text b_text
    j_len = len(jahn)
    b_text = ""
    i = 0
    while i < j_len:
        if jahn[i] == "V":
            if i + 1 == j_len:
                b_text += "1"
            else:
                if jahn[i + 1] in digits:
                    b_text += jahn[i + 1]
                    i += 1
                else:
                    b_text += "1"
        else:
            b_text += jahn[i]
        i += 1
    # Print cleaned-up Jahn symbol as block text
    jprint(f"\n  {jahn}  ->  {b_text}")

    # Now prepare for main parsing procedure
    axis = 0  # Will be incremented inside recursive procedure
    depth = 0  # Will go up and down with recursive calls and returns
    try:
        instructions = list_to_tuple("nosymm", b_text, depth)
    except ParsingError as e:
        print(f"\n  PARSING ERROR for '{jahn}': {e}")
        jahn_dict[jahn] = None
        return None

    jprint(f"Operation specification for tensor of rank {axis}:")
    jprint(f"{instructions}")

    jahn_dict[jahn] = instructions
    return instructions



# ----------------------------------------
# Define rank function (called externally)
# ----------------------------------------



[docs]
def jahn_rank(instructions):
    """
    Determine the tensor rank from a parsed operation specification.

    Parameters
    ----------
    instructions : tuple
        Nested instruction specification as returned by
        :func:`parse_jahn_symbol`.

    Returns
    -------
    rank : int
        Rank of the tensor (zero in the case of scalars).
    """
    # Find largest digit in string representation of instructions
    x = [int(char) for char in str(instructions) if char.isdigit()]
    # Add one to obtain the rank (or return 0 for a scalar)
    rank = 0 if len(x) == 0 else 1 + max(x)
    return rank



# --------------------------------------------------------------
# The remaining functions are intended as private to this module
# --------------------------------------------------------------



[docs]
def list_to_tuple(job, b_text, depth):
    """
    Recursively parse a block text string into an instruction tuple

    Constructs and returns an instruction block of the form
    ``(job, obj_list)``, where ``obj_list`` is a list of axes
    or sub-blocks, built by recursively processing any nested
    bracket groups found in ``b_text``.  The global ``axis``
    counter is also incremented for each tensor axis encountered.

    Parameters
    ----------
    job : str
        Symmetrization mode for this block: ``'nosymm'``, ``'symm'``,
        or ``'asymm'``.
    b_text : str
        Block text string to be parsed, e.g., ``'1[22]1'``.
    depth : int
        Current recursion depth, used for indented debug printing
        and as a guard against runaway recursion.

    Returns
    -------
    result : tuple
        Parsed instruction block in the form of a tuple
        ``(job, obj_list)``, or the unwrapped inner tuple when the
        block reduces to a single sub-tuple under ``'nosymm'``.

    Raises
    ------
    ParsingError
        If ``b_text`` contains elements of inconsistent type under a
        symmetrizing job, an empty list is found, fewer than two
        sub-blocks appear where multiple are required, or an
        unexpected object type is encountered.
    """

    global axis

    b_list = parse_string(b_text)
    jprint(depth * "  " + f"String '{b_text}' parsed to block list: {b_list}")

    obj_list = []

    if job == "nosymm":
        # no symmetrization to be performed; can be mix of types
        for x in b_list:
            if type(x) is int:
                obj_list.extend([axis + j for j in range(x)])
                axis += x  # update global variable
            elif type(x) is str:
                sub_job = s_dict[x[0]]
                obj_list.append(list_to_tuple(sub_job, x[1:-1], depth + 1))
                # parse returns a tuple (and also updates axis)
            else:
                raise ParsingError("Wrong type in list")
    else:
        # now we symmetrize or antisymmetrize
        # all objects in b_list must be identical ints or identical strings
        if len(set(b_list)) > 1:
            raise ParsingError(f"Symmetrized elements differ in {b_list}")
        elif len(set(b_list)) == 0:
            raise ParsingError(f"Empty list {b_list}")
        x = b_list[0]
        n = len(b_list)
        if type(x) is int:
            if n == 1:  # n = 1 implies symmetrization within one block
                obj_list.extend([axis + j for j in range(x)])
                axis += x  # update global variable
            else:  # n > 1 implies block symmetrization
                for _ in range(n):
                    obj_list.append(list_to_tuple("nosymm", str(x), depth + 1))
        elif type(x) is str:  # block symmetrization of symmetrized subblocks
            if n < 2:
                raise ParsingError(f"Expecting 2 or more sublocks; found {n}")
            sub_job = s_dict[x[0]]
            for _ in range(n):
                obj_list.append(list_to_tuple(sub_job, x[1:-1], depth + 1))
        else:
            raise ParsingError(f"Wrong type of object in b_list {b_list}")

    jprint(depth * "  " + f"Block List parsed to object list:  {obj_list}")
    if depth > 8:
        raise ParsingError(f"Recursion depth exceeded for block '{b_text}'")

    if job == "nosymm" and len(obj_list) == 1 and type(obj_list[0]) is tuple:
        return obj_list[0]  # In this trivial case, return sub-tuple
    else:
        return (job, obj_list)  # Return constructed tuple



# ------------------------------------------
# Define helper functions for parsing
# ------------------------------------------



[docs]
def parse_string(b_string):
    """
    Tokenise a block text string into a mixed list of ints and strings.

    Parameters
    ----------
    b_string : str
        String in Jahn block-text format, e.g., ``'1[{2}{2}]2'``.

    Returns
    -------
    b_list : list
        Mixed list of integers (single-digit axis counts) and strings
        (bracketed sub-blocks), e.g., ``[1, '[{2}{2}]', 2]``.

    Raises
    ------
    ParsingError
        If an unexpected character is encountered in ``b_string``.
    """
    i = 0
    b_list = []
    while i < len(b_string):
        c = b_string[i]
        if c in digits:  # add integer to b_list
            b_list.append(int(c))
            i += 1
        elif c in ["[", "{"]:
            i_end = i + find_match(b_string[i:])  # position of matching bracket
            # add sub_string to b_list
            b_list.append(b_string[i:i_end])
            i = i_end
        else:
            raise ParsingError(f"Function parse_string failed on {b_string}")

    return b_list




[docs]
def find_match(text):
    """
    Find the closing bracket that matches the opening bracket at index 0.

    Parameters
    ----------
    text : str
        String whose first character is either ``'['`` or ``'{'``.

    Returns
    -------
    ic : int
        Index of the character immediately after the matching closing
        bracket (i.e., the length of the balanced sub-string).

    Raises
    ------
    ParsingError
        If the string ends before a matching closing bracket is found.
    """
    c_dict = {"[": "]", "{": "}"}  # matching closing brackets
    cl = text[0]
    cr = c_dict[cl]
    depth = 1
    ic = 1
    while depth > 0:
        if ic >= len(text):
            raise ParsingError(f"Closure failed on string '{text}'")
        if text[ic] == cl:
            depth += 1
        if text[ic] == cr:
            depth -= 1
        ic += 1
    return ic