Source code for pythmpg.parse_jahn
"""
Parser for index symmetrization information in Jahn symbols.
Defines two functions intended to be called externally:
:func:`parse_jahn_symbol`, which parses Jahn symbols
into structured instruction blocks describing the needed
symmetrization or antisymmetrization over tensor axes, and
:func:`jahn_rank` to return the rank of the tensor.
Notes
-----
Jahn symbols passed to the parser should lack any preceding ``'a'``
or ``'e'`` characters. Results are cached in the module-level
``jahn_dict`` to avoid redundant parsing of repeated symbols.
"""
# Dictionary used to hold results for Jahn symbols that have
# already been parsed as a result of earlier function calls
jahn_dict = {}
# Digits allowed in Jahn symbols
digits = set("12345678")
# Characters allowed in Jahn symbols
valid_chars = set("V[]{}12345678")
# Dictionary mapping open bracket to type of symmetrization
s_dict = {"[": "symm", "{": "asymm"}
# Current axis of tensor
# Global variable incremented as parsing proceeds
axis = 0
# define custom exception
[docs]
class ParsingError(Exception):
pass
# Global print control variable
if_pr = False
# Define custom print function conditional on if_print
# Note that parsing errors are printed regardless
def jprint(text):
if if_pr:
print(" " + text)
[docs]
def parse_jahn_symbol(jahn, if_print=False):
"""
Parse the index symmetrization information in a Jahn symbol.
Converts the Jahn symbol into an instruction set represented
as a nested tuple structure specifying which tensor axes are
subject to symmetrization or antisymmetrization. Results are
cached in ``jahn_dict`` so that repeated calls with the same
symbol entail no overhead.
Parameters
----------
jahn : str
Jahn symbol to parse, lacking any preceding ``'a'`` or ``'e'``
characters.
if_print : bool, optional
If ``True``, print intermediate parsing steps to stdout.
Default is ``False``.
Returns
-------
instructions : tuple or None
Nested tuple specifying tensor axes and the symmetrization
operations to be applied to them. Returns ``None`` if the
symbol contains illegal characters or if the parser fails.
"""
global axis, if_pr
if_pr = if_print
# If Jahn symbol has already been parsed, return saved result
if jahn in jahn_dict:
return jahn_dict[jahn]
# Preliminary clean-up, e.g., 'V[V2V2]V' -> '1[22]1'
if not set(jahn).issubset(valid_chars):
jprint(f"\n {jahn} ->")
print(f"\n PARSING ERROR: Illegal characters in Jahn symbol {jahn}")
jahn_dict[jahn] = None
return None
# Get rid of 'V' symbols to construct block text b_text
j_len = len(jahn)
b_text = ""
i = 0
while i < j_len:
if jahn[i] == "V":
if i + 1 == j_len:
b_text += "1"
else:
if jahn[i + 1] in digits:
b_text += jahn[i + 1]
i += 1
else:
b_text += "1"
else:
b_text += jahn[i]
i += 1
# Print cleaned-up Jahn symbol as block text
jprint(f"\n {jahn} -> {b_text}")
# Now prepare for main parsing procedure
axis = 0 # Will be incremented inside recursive procedure
depth = 0 # Will go up and down with recursive calls and returns
try:
instructions = list_to_tuple("nosymm", b_text, depth)
except ParsingError as e:
print(f"\n PARSING ERROR for '{jahn}': {e}")
jahn_dict[jahn] = None
return None
jprint(f"Operation specification for tensor of rank {axis}:")
jprint(f"{instructions}")
jahn_dict[jahn] = instructions
return instructions
# ----------------------------------------
# Define rank function (called externally)
# ----------------------------------------
[docs]
def jahn_rank(instructions):
"""
Determine the tensor rank from a parsed operation specification.
Parameters
----------
instructions : tuple
Nested instruction specification as returned by
:func:`parse_jahn_symbol`.
Returns
-------
rank : int
Rank of the tensor (zero in the case of scalars).
"""
# Find largest digit in string representation of instructions
x = [int(char) for char in str(instructions) if char.isdigit()]
# Add one to obtain the rank (or return 0 for a scalar)
rank = 0 if len(x) == 0 else 1 + max(x)
return rank
# --------------------------------------------------------------
# The remaining functions are intended as private to this module
# --------------------------------------------------------------
[docs]
def list_to_tuple(job, b_text, depth):
"""
Recursively parse a block text string into an instruction tuple
Constructs and returns an instruction block of the form
``(job, obj_list)``, where ``obj_list`` is a list of axes
or sub-blocks, built by recursively processing any nested
bracket groups found in ``b_text``. The global ``axis``
counter is also incremented for each tensor axis encountered.
Parameters
----------
job : str
Symmetrization mode for this block: ``'nosymm'``, ``'symm'``,
or ``'asymm'``.
b_text : str
Block text string to be parsed, e.g., ``'1[22]1'``.
depth : int
Current recursion depth, used for indented debug printing
and as a guard against runaway recursion.
Returns
-------
result : tuple
Parsed instruction block in the form of a tuple
``(job, obj_list)``, or the unwrapped inner tuple when the
block reduces to a single sub-tuple under ``'nosymm'``.
Raises
------
ParsingError
If ``b_text`` contains elements of inconsistent type under a
symmetrizing job, an empty list is found, fewer than two
sub-blocks appear where multiple are required, or an
unexpected object type is encountered.
"""
global axis
b_list = parse_string(b_text)
jprint(depth * " " + f"String '{b_text}' parsed to block list: {b_list}")
obj_list = []
if job == "nosymm":
# no symmetrization to be performed; can be mix of types
for x in b_list:
if type(x) is int:
obj_list.extend([axis + j for j in range(x)])
axis += x # update global variable
elif type(x) is str:
sub_job = s_dict[x[0]]
obj_list.append(list_to_tuple(sub_job, x[1:-1], depth + 1))
# parse returns a tuple (and also updates axis)
else:
raise ParsingError("Wrong type in list")
else:
# now we symmetrize or antisymmetrize
# all objects in b_list must be identical ints or identical strings
if len(set(b_list)) > 1:
raise ParsingError(f"Symmetrized elements differ in {b_list}")
elif len(set(b_list)) == 0:
raise ParsingError(f"Empty list {b_list}")
x = b_list[0]
n = len(b_list)
if type(x) is int:
if n == 1: # n = 1 implies symmetrization within one block
obj_list.extend([axis + j for j in range(x)])
axis += x # update global variable
else: # n > 1 implies block symmetrization
for _ in range(n):
obj_list.append(list_to_tuple("nosymm", str(x), depth + 1))
elif type(x) is str: # block symmetrization of symmetrized subblocks
if n < 2:
raise ParsingError(f"Expecting 2 or more sublocks; found {n}")
sub_job = s_dict[x[0]]
for _ in range(n):
obj_list.append(list_to_tuple(sub_job, x[1:-1], depth + 1))
else:
raise ParsingError(f"Wrong type of object in b_list {b_list}")
jprint(depth * " " + f"Block List parsed to object list: {obj_list}")
if depth > 8:
raise ParsingError(f"Recursion depth exceeded for block '{b_text}'")
if job == "nosymm" and len(obj_list) == 1 and type(obj_list[0]) is tuple:
return obj_list[0] # In this trivial case, return sub-tuple
else:
return (job, obj_list) # Return constructed tuple
# ------------------------------------------
# Define helper functions for parsing
# ------------------------------------------
[docs]
def parse_string(b_string):
"""
Tokenise a block text string into a mixed list of ints and strings.
Parameters
----------
b_string : str
String in Jahn block-text format, e.g., ``'1[{2}{2}]2'``.
Returns
-------
b_list : list
Mixed list of integers (single-digit axis counts) and strings
(bracketed sub-blocks), e.g., ``[1, '[{2}{2}]', 2]``.
Raises
------
ParsingError
If an unexpected character is encountered in ``b_string``.
"""
i = 0
b_list = []
while i < len(b_string):
c = b_string[i]
if c in digits: # add integer to b_list
b_list.append(int(c))
i += 1
elif c in ["[", "{"]:
i_end = i + find_match(b_string[i:]) # position of matching bracket
# add sub_string to b_list
b_list.append(b_string[i:i_end])
i = i_end
else:
raise ParsingError(f"Function parse_string failed on {b_string}")
return b_list
[docs]
def find_match(text):
"""
Find the closing bracket that matches the opening bracket at index 0.
Parameters
----------
text : str
String whose first character is either ``'['`` or ``'{'``.
Returns
-------
ic : int
Index of the character immediately after the matching closing
bracket (i.e., the length of the balanced sub-string).
Raises
------
ParsingError
If the string ends before a matching closing bracket is found.
"""
c_dict = {"[": "]", "{": "}"} # matching closing brackets
cl = text[0]
cr = c_dict[cl]
depth = 1
ic = 1
while depth > 0:
if ic >= len(text):
raise ParsingError(f"Closure failed on string '{text}'")
if text[ic] == cl:
depth += 1
if text[ic] == cr:
depth -= 1
ic += 1
return ic