Skip to content

Dimension reduction

`aut_processing(aut_in)`

Processes automorphism data from a list of strings and prepares it for further analysis.

Parameters: aut_in (list): A list of strings representing automorphism data, where the first line contains 'N' and the second line contains 'z'.

Returns: str or int: The processed 'z' value for GAP input if the network is small enough, otherwise returns -1 to indicate skipping.

Source code in dsdp-lumping/lumping.py

def aut_processing(aut_in):
    """
    Processes automorphism data from a list of strings and prepares it for further analysis.

    Parameters:
    aut_in (list): A list of strings representing automorphism data, where the first line contains 'N' and the second line contains 'z'.

    Returns:
    str or int: The processed 'z' value for GAP input if the network is small enough, otherwise returns -1 to indicate skipping.
    """
    #example of aut_in:
    # ['N:=18;;', 'z:=[(5,6),(5,8),(13,15),(1,2),(1,3)];;', 'g:=Group(z);']

    # Regex patterns for extracting data from strings
    n_pattern = r'N:=([^;]+);;'
    z_pattern = r'z=([^;]+);;'

    # Extract and clean 'N' value (number of nodes)
    nstring = aut_in[0]
    nstring = nstring.replace('N:=','')
    nstring = nstring.replace(';;','')

    network_n = int(ast.literal_eval(nstring)) # Convert string to integer
    if network_n >= 30: # Skip large networks
        flag = -1
        return flag
    else:

        zout = aut_in[1]

        # Clean 'z' value if multiple ';' are present
        if zout.count(';') > 1:
            zout = zout.replace(';;',';')

        zaut = zout # Placeholder for GAP processing
        zautpath = os.path.join(lumpsout_path,'zaut_test.txt')

        # Write the 'z' data to a file
        txt_file = open(zautpath,"a")
        txt_file.write(zout)
        txt_file.close()
        return zaut

`cycle_number(p, n)`

Calculates the total number of cycles in a permutation.

Parameters: p (gap object): A permutation object in GAP. n (int): Number of nodes in the graph.

Returns: int: The total cycle count after considering moved points.

Source code in dsdp-lumping/lumping.py

def cycle_number(p,n):
    """
    Calculates the total number of cycles in a permutation.

    Parameters:
    p (gap object): A permutation object in GAP.
    n (int): Number of nodes in the graph.

    Returns:
    int: The total cycle count after considering moved points.
    """
    cs = gap.CycleStructurePerm(p)
    m = gap.NrMovedPoints(p)
    cyc_tot = 0

    for x in cs:
        if x is int:
            tot += x
    return cyc_tot+n-m

`delta_gen(log_extract, rho)`

Computes a value of 'delta' based on the graph parameters and the number of orbits.

Parameters: log_extract (dict): Extracted log data containing information about the graph. rho (int): Number of orbits in the automorphism group.

Returns: int: The computed 'delta' value.

Source code in dsdp-lumping/lumping.py

def delta_gen(log_extract,rho):
    """
    Computes a value of 'delta' based on the graph parameters and the number of orbits.

    Parameters:
    log_extract (dict): Extracted log data containing information about the graph.
    rho (int): Number of orbits in the automorphism group.

    Returns:
    int: The computed 'delta' value.
    """

    N = int(log_extract['vertices'])
    M = int(log_extract['edges'])
    nrho = int(rho)
    num = N * math.log10(M)
    denom = math.log10(nrho)
    delta = round(num/denom)
    return delta

`gen_row(stubpath)`

Generates a row of data for the output table based on the automorphism and log data.

Parameters: stubpath (str): The file path stub for the graph.

Returns: dict: A dictionary representing a row of data for the graph properties and automorphism group.

Source code in dsdp-lumping/lumping.py

def gen_row(stubpath):
    """
    Generates a row of data for the output table based on the automorphism and log data.

    Parameters:
    stubpath (str): The file path stub for the graph.

    Returns:
    dict: A dictionary representing a row of data for the graph properties and automorphism group.
    """

    new_row = dict()

    stub = os.path.basename(stubpath)  # Extract the filename from the path
    print(stub)

     # Define directories for the automorphism and log files
    aut_directory = BASE_PATH / 'data' / 'processed' / 'gap_output'
    log_directory = BASE_PATH / 'data' / 'processed' / 'saucy_output'

    aut_filename = str(aut_directory / stub) + '.gap'
    log_filename = str(log_directory / stub) + '.log'

    if Path(log_filename).is_file():
        log_extract = read_log(log_filename) # Read log data

    if Path(aut_filename).is_file():
        aut_in = read_am(aut_filename)
        zaut = aut_processing(aut_in) # Read automorphism data

        n_orbits, zorder, zorbits = norbits(zaut,int(log_extract['vertices']),'polya')

        # Create a new row for the output table
        new_row = {
            'graph_name'    : stub,
            'n_nodes'       : log_extract['vertices'],
            'M_edges'       : log_extract['edges'],
            'aut_grp_order' : zorder,
            'rho'           : n_orbits,
            'avg_support'   : log_extract['total support'],
            'tot_support'   : log_extract['average support'],
            #'colours'       : colours,
            'orbits'        : str(zorbits),
            'delta'         : delta_gen(log_extract,n_orbits)
            }

        datpath = lumpsout_path / 'rowdat.json'

        # Write the row data to a JSON file
        with open(datpath,'w') as fp:
            json.dump(new_row,fp)

        coloursfolder = BASE_PATH / 'data' / 'processed' / 'lumping_output'
        colourspath = coloursfolder / 'orbitcolours.txt'

        coloursf = open(colourspath,"a")
        coloursf.write(str(zorbits))
        coloursf.close()
    return new_row

`main()`

Main function to process graph data, compute automorphism group information, and generate an output table saved to a CSV file.

Source code in dsdp-lumping/lumping.py

def main():
    """
    Main function to process graph data, compute automorphism group information,
    and generate an output table saved to a CSV file.
    """

    outpath = lumpsout_path / 'lumps.csv' # Output path for CSV
    directory_path = BASE_PATH / 'data' / 'processed' /  'gap_output'

    stubs = [file.stem for file in directory_path.iterdir() if file.is_file()]
    # stubs = ['reptilia-tortoise-network-sg'] # List of graph stubs to process
    inter_table = []

    # Process each stub and generate rows for the table
    for i in range(0,len(stubs)):
        rowi = gen_row(stubs[i])
        inter_table.append(rowi)

    output_table = pd.DataFrame(inter_table)  # Convert array to DataFrame

    # Define the column names for the output CSV
    out_colnames = [
        'graph_name',
        'n_nodes',
        'M_edges',
        'aut_grp_order',
        'rho',
        'avg_support',
        'tot_support',
        'delta']
        #'colours']

    output_table = pd.DataFrame(inter_table)

    # Write the output table to a CSV file
    output_table.to_csv(outpath,index=False)

    return

`norbits(zin, N, rho_method)`

Counts the number of orbits and computes related group information using GAP.

Parameters: zin (str): GAP expression defining the group with generators. N (int): Number of nodes in the network. rho_method (str): Method for orbit calculation ('tuple' or 'polya').

tuple: - int: The number of orbits. - int: The order of the group. - list: The list of orbits (if applicable).

Source code in dsdp-lumping/lumping.py

def norbits(zin,N,rho_method):
    """
    Counts the number of orbits and computes related group information using GAP.

    Parameters:
    zin (str): GAP expression defining the group with generators.
    N (int): Number of nodes in the network.
    rho_method (str): Method for orbit calculation ('tuple' or 'polya').

    Returns:
    tuple: 
        - int: The number of orbits.
        - int: The order of the group.
        - list: The list of orbits (if applicable).
    """

    z = gap.eval(zin) # Evaluate the input using GAP
    G = gap.eval('zgroup:=GroupWithGenerators(z);')  # Create a group with generators

    # Depending on the method, compute orbits
    if rho_method == 'tuple':
        zorbits = gap.OrbitsDomain(G,Tuples([0,1],3),Permuted)
        orbit_count = len(zorbits)

    elif rho_method == 'polya':
        zorbits = -1
        orbit_count = int(polya_enum(G,N)) # Use Polya enumeration to count orbits
        zorbits = list(gap.Orbits(G))

    zorder = int(gap.Size(G)) # Get the order of the group
    return orbit_count, zorder, zorbits

`polya_enum(G, N)`

Performs Polya enumeration for a group.

Parameters: G (gap object): The group object in GAP. N (int): Number of nodes in the network.

Returns: float: The Polya enumeration result for the group.

Source code in dsdp-lumping/lumping.py

def polya_enum(G,N):
    """
    Performs Polya enumeration for a group.

    Parameters:
    G (gap object): The group object in GAP.
    N (int): Number of nodes in the network.

    Returns:
    float: The Polya enumeration result for the group.
    """
    cl = gap.ConjugacyClasses(G)
    tot = 0
    clist = sorted(cl)

    for item in clist:
        rep = gap.Representative(item)
        cn = cycle_number(rep,N)
        size = gap.Size(item)
        increment = size*2^cn
        tot += increment

    order_g = gap.Order(G)

    gorder = rationals_fix(str(order_g))
    tot = rationals_fix(str(tot))

    quot = tot/gorder
    return quot

`rationals_fix(rational)`

Converts a rational number in string format to a float.

Parameters: rational (str): A string representation of a rational number (e.g., '3/4').

Returns: float: The numerical value of the rational number.

Source code in dsdp-lumping/lumping.py

def rationals_fix(rational):
    """
    Converts a rational number in string format to a float.

    Parameters:
    rational (str): A string representation of a rational number (e.g., '3/4').

    Returns:
    float: The numerical value of the rational number.
    """
    if '/' in rational:
        num, denom = rational.split('/',1)
        num = num.strip()
        denom = num.strip()
        quot = int(num)/int(denom)
        return quot

    else:
        return float(rational)

`read_am(aut_filename)`

Reads in the automorphism data from a .gap file.

Parameters: aut_filename (str or Path): The path to the .gap file containing automorphism data.

Returns: list: A list of strings, where each string represents a line from the automorphism file.

Source code in dsdp-lumping/lumping.py

def read_am(aut_filename):
    """
    Reads in the automorphism data from a .gap file.

    Parameters:
    aut_filename (str or Path): The path to the .gap file containing automorphism data.

    Returns:
    list: A list of strings, where each string represents a line from the automorphism file.
    """
    aut_in = []
    with open(aut_filename, 'r') as file: #Open file in read mode
        for line in file:
                aut_in.append(line.strip()) # Read each line and strip newlines
    return  aut_in

`read_log(log_filename)`

Reads log data from a .log file generated by Saucy.

Parameters: log_filename (str or Path): The path to the .log file.

Returns: dict: A dictionary with extracted log data, using specific keywords as keys.

Source code in dsdp-lumping/lumping.py

def read_log(log_filename):
    """
    Reads log data from a .log file generated by Saucy.

    Parameters:
    log_filename (str or Path): The path to the .log file.

    Returns:
    dict: A dictionary with extracted log data, using specific keywords as keys.
    """
    grp_kw = [ # Keywords to extract from log files
        'vertices',
        'edges',
        'up size',
        'levels',
        'nodes',
        'generators',
        'total support',
        'average support',
        'nodes per generator',
        'bad nodes',
        'cpu time (s)'
        ]

    extracted_data = {} # Dictionary to store extracted data

    # Open log file and parse the data
    with open(log_filename,'r') as file:
        for line in file:
            if '=' in line:
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip()

            if key in grp_kw:
                extracted_data[key] = value # Store key-value pairs
    return extracted_data