Module `lang_main.analysis.graphs`

Functions

def add_betweenness_centrality(graph: DiGraph | Graph, edge_weight_property: str | None = None, property_name: str = 'betweenness_centrality') ‑> None

Expand source code

def add_betweenness_centrality(
    graph: DiGraph | Graph,
    edge_weight_property: str | None = None,
    property_name: str = PROPERTY_NAME_BETWEENNESS_CENTRALITY,
) -> None:
    """adds the betweenness centrality as property to each node of the given graph
    Operation is performed inplace.

    Parameters
    ----------
    graph : DiGraph | Graph
        Graph with betweenness centrality as node property added inplace
    edge_weight_property : str | None, optional
        property of the edges which contains the weight information,
        not necessarily needed, by default 'None'
    property_name : str, optional
        target name for the property containing the betweenness centrality in nodes,
        by default PROPERTY_NAME_BETWEENNESS_CENTRALITY
    """

    node_property_mapping = cast(
        dict[str, float],
        nx.betweenness_centrality(graph, normalized=True, weight=edge_weight_property),  # type: ignore
    )
    nx.set_node_attributes(
        graph,
        node_property_mapping,
        name=property_name,
    )

adds the betweenness centrality as property to each node of the given graph Operation is performed inplace.

Parameters

graph : DiGraph | Graph: Graph with betweenness centrality as node property added inplace
edge_weight_property : str | None, optional: property of the edges which contains the weight information, not necessarily needed, by default 'None'
property_name : str, optional: target name for the property containing the betweenness centrality in nodes, by default PROPERTY_NAME_BETWEENNESS_CENTRALITY

def add_importance_metric(graph: DiGraph | Graph, property_name: str = 'importance', property_name_weighted_degree: str = 'degree_weighted', property_name_betweenness: str = 'betweenness_centrality') ‑> None

Expand source code

def add_importance_metric(
    graph: DiGraph | Graph,
    property_name: str = PROPERTY_NAME_IMPORTANCE,
    property_name_weighted_degree: str = PROPERTY_NAME_DEGREE_WEIGHTED,
    property_name_betweenness: str = PROPERTY_NAME_BETWEENNESS_CENTRALITY,
) -> None:
    """Adds a custom importance metric as property to each node of the given graph.
    Can be used to decide which nodes are of high importance and also to build node size
    mappings.
    Operation is performed inplace.

    Parameters
    ----------
    graph : DiGraph | Graph
        Graph with weighted degree as node property added inplace
    property_name : str, optional
        target name for the property containing the weighted degree in nodes,
        by default PROPERTY_NAME_DEGREE_WEIGHTED
    property_name_betweenness : str, optional
        target name for the property containing the betweenness centrality in nodes,
        by default PROPERTY_NAME_BETWEENNESS_CENTRALITY
    """
    # build mapping for importance metric
    node_property_mapping: dict[str, float] = {}
    for node in cast(Iterable[str], graph.nodes):
        node_data = cast(dict[str, float], graph.nodes[node])

        if property_name_weighted_degree not in node_data:
            raise NodePropertyNotContainedError(
                (
                    f'Node data does not contain weighted degree '
                    f'with name {property_name_weighted_degree}.'
                )
            )
        elif property_name_betweenness not in node_data:
            raise NodePropertyNotContainedError(
                (
                    f'Node data does not contain betweenness centrality '
                    f'with name {property_name_betweenness}.'
                )
            )

        prio = node_data[property_name_weighted_degree] * node_data[property_name_betweenness]
        node_property_mapping[node] = prio

    nx.set_node_attributes(
        graph,
        node_property_mapping,
        name=property_name,
    )

Adds a custom importance metric as property to each node of the given graph. Can be used to decide which nodes are of high importance and also to build node size mappings. Operation is performed inplace.

Parameters

graph : DiGraph | Graph: Graph with weighted degree as node property added inplace
property_name : str, optional: target name for the property containing the weighted degree in nodes, by default PROPERTY_NAME_DEGREE_WEIGHTED
property_name_betweenness : str, optional: target name for the property containing the betweenness centrality in nodes, by default PROPERTY_NAME_BETWEENNESS_CENTRALITY

def add_weighted_degree(graph: DiGraph | Graph, edge_weight_property: str = 'weight', property_name: str = 'degree_weighted') ‑> None

Expand source code

def add_weighted_degree(
    graph: DiGraph | Graph,
    edge_weight_property: str = 'weight',
    property_name: str = PROPERTY_NAME_DEGREE_WEIGHTED,
) -> None:
    """adds the weighted degree as property to each node of the given graph
    Operation is performed inplace.

    Parameters
    ----------
    graph : DiGraph | Graph
        Graph with weighted degree as node property added inplace
    edge_weight_property : str, optional
        property of the edges which contains the weight information, by default 'weight'
    property_name : str, optional
        target name for the property containing the weighted degree in nodes,
        by default PROPERTY_NAME_DEGREE_WEIGHTED
    """
    node_property_mapping = cast(
        dict[str, float],
        dict(graph.degree(weight=edge_weight_property)),  # type: ignore
    )
    nx.set_node_attributes(
        graph,
        node_property_mapping,
        name=property_name,
    )

adds the weighted degree as property to each node of the given graph Operation is performed inplace.

Parameters

graph : DiGraph | Graph: Graph with weighted degree as node property added inplace
edge_weight_property : str, optional: property of the edges which contains the weight information, by default 'weight'
property_name : str, optional: target name for the property containing the weighted degree in nodes, by default PROPERTY_NAME_DEGREE_WEIGHTED

def convert_graph_to_cytoscape(graph: Graph | DiGraph) ‑> tuple[list[lang_main.types.CytoscapeData], lang_main.types.WeightData]

Expand source code

def convert_graph_to_cytoscape(
    graph: Graph | DiGraph,
) -> tuple[list[CytoscapeData], WeightData]:
    cyto_data: list[CytoscapeData] = []
    # iterate over nodes
    nodes = cast(Iterable[NodeTitle], graph.nodes)
    for node in nodes:
        node_data: CytoscapeData = {
            'data': {
                'id': node,
                'label': node,
            }
        }
        cyto_data.append(node_data)
    # iterate over edges
    weights: set[int] = set()

    edges = cast(
        Iterable[
            tuple[
                NodeTitle,
                NodeTitle,
                EdgeWeight,
            ]
        ],
        graph.edges.data('weight', default=1),  # type: ignore
    )
    for source, target, weight in edges:
        weights.add(weight)
        edge_data: CytoscapeData = {
            'data': {
                'source': source,
                'target': target,
                'weight': weight,
            }
        }
        cyto_data.append(edge_data)

    # TODO: add internal behaviour (if edge added check for new min/max)
    min_weight: int = 0
    max_weight: int = 0
    if weights:
        min_weight = min(weights)
        max_weight = max(weights)
    weight_metadata: WeightData = {'min': min_weight, 'max': max_weight}

    return cyto_data, weight_metadata

def convert_graph_to_undirected(graph: DiGraph, logging: bool = False, cast_int: bool = False) ‑> networkx.classes.graph.Graph

Expand source code

def convert_graph_to_undirected(
    graph: DiGraph,
    logging: bool = LOGGING_DEFAULT_GRAPHS,
    cast_int: bool = False,
) -> Graph:
    dtype = np.float32
    if cast_int:
        dtype = np.uint32
    # get adjacency matrix
    adj_mat = typing.cast(DataFrame, nx.to_pandas_adjacency(G=graph, dtype=dtype))
    arr = typing.cast(npt.NDArray[np.float32 | np.uint32], adj_mat.to_numpy())
    if not cast_int:
        arr = arr * (10**EDGE_WEIGHT_DECIMALS)
        arr = np.round(arr, decimals=0)
        arr = arr.astype(np.uint32)
    # build undirected array: adding edges of lower triangular matrix to upper one
    arr_upper = np.triu(arr)
    arr_lower = np.tril(arr)
    arr_lower = np.rot90(np.fliplr(arr_lower))
    arr_new = arr_upper + arr_lower
    if not cast_int:
        arr_new = (arr_new / 10**EDGE_WEIGHT_DECIMALS).astype(np.float32)
        arr_new = np.round(arr_new, decimals=EDGE_WEIGHT_DECIMALS)
    # assign new data and create graph
    adj_mat.loc[:] = arr_new  # type: ignore
    graph_undir = typing.cast(Graph, nx.from_pandas_adjacency(df=adj_mat))

    # info about graph
    if logging:
        logger.info('Successfully converted graph to one with undirected edges.')
    _ = get_graph_metadata(graph=graph_undir, logging=logging)

    return graph_undir

def filter_graph_by_edge_weight(graph: TokenGraph, bound_lower: int | None, bound_upper: int | None, property: str = 'weight') ‑> TokenGraph

Expand source code

def filter_graph_by_edge_weight(
    graph: TokenGraph,
    bound_lower: int | None,
    bound_upper: int | None,
    property: str = 'weight',
) -> TokenGraph:
    """filters all edges which are within the provided bounds
    inclusive limits: bound_lower <= edge_weight <= bound_upper are retained

    Parameters
    ----------
    bound_lower : int | None
        lower bound for edge weights, edges with weight equal to this value are retained
    bound_upper : int | None
        upper bound for edge weights, edges with weight equal to this value are retained

    Returns
    -------
    TokenGraph
        a copy of the graph with filtered edges
    """
    original_graph_edges = copy.deepcopy(graph.edges)
    filtered_graph = graph.copy()

    if not any((bound_lower, bound_upper)):
        logger.warning('No bounds provided, returning original graph.')
        return filtered_graph

    for edge in original_graph_edges:
        weight = typing.cast(int, filtered_graph[edge[0]][edge[1]][property])
        if bound_lower is not None and weight < bound_lower:
            filtered_graph.remove_edge(edge[0], edge[1])
        if bound_upper is not None and weight > bound_upper:
            filtered_graph.remove_edge(edge[0], edge[1])

    filtered_graph.to_undirected(inplace=True, logging=False)
    filtered_graph.update_metadata(logging=False)

    return filtered_graph

filters all edges which are within the provided bounds inclusive limits: bound_lower <= edge_weight <= bound_upper are retained

Parameters

bound_lower : int | None: lower bound for edge weights, edges with weight equal to this value are retained
bound_upper : int | None: upper bound for edge weights, edges with weight equal to this value are retained

Returns

TokenGraph: a copy of the graph with filtered edges

def filter_graph_by_node_degree(graph: TokenGraph, bound_lower: int | None, bound_upper: int | None) ‑> TokenGraph

Expand source code

def filter_graph_by_node_degree(
    graph: TokenGraph,
    bound_lower: int | None,
    bound_upper: int | None,
) -> TokenGraph:
    """filters all nodes which are within the provided bounds by their degree,
    inclusive limits: bound_lower <= node_degree <= bound_upper are retained

    Parameters
    ----------
    bound_lower : int | None
        lower bound for node degree, nodes with degree equal to this value are retained
    bound_upper : int | None
        upper bound for node degree, nodes with degree equal to this value are retained

    Returns
    -------
    TokenGraph
        a copy of the graph with filtered nodes
    """
    # filter nodes by degree
    original_graph_nodes = copy.deepcopy(graph.nodes)
    filtered_graph = graph.copy()
    filtered_graph_degree = copy.deepcopy(filtered_graph.degree)

    if not any([bound_lower, bound_upper]):
        logger.warning('No bounds provided, returning original graph.')
        return filtered_graph

    for node in original_graph_nodes:
        degree = cast(int, filtered_graph_degree[node])  # type: ignore
        if bound_lower is not None and degree < bound_lower:
            filtered_graph.remove_node(node)
        if bound_upper is not None and degree > bound_upper:
            filtered_graph.remove_node(node)

    filtered_graph.to_undirected(inplace=True, logging=False)
    filtered_graph.update_metadata(logging=False)

    return filtered_graph

filters all nodes which are within the provided bounds by their degree, inclusive limits: bound_lower <= node_degree <= bound_upper are retained

Parameters

bound_lower : int | None: lower bound for node degree, nodes with degree equal to this value are retained
bound_upper : int | None: upper bound for node degree, nodes with degree equal to this value are retained

Returns

TokenGraph: a copy of the graph with filtered nodes

def filter_graph_by_number_edges(graph: TokenGraph, limit: int | None, property: str = 'weight', descending: bool = True) ‑> TokenGraph

Expand source code

def filter_graph_by_number_edges(
    graph: TokenGraph,
    limit: int | None,
    property: str = 'weight',
    descending: bool = True,
) -> TokenGraph:
    graph = graph.copy()
    # edges
    original = set(graph.edges(data=property))  # type: ignore
    original_sorted = sorted(original, key=lambda tup: tup[2], reverse=descending)
    if limit is not None:
        chosen = set(original_sorted[:limit])
    else:
        chosen = set(original_sorted)
    edges_to_drop = original.difference(chosen)
    graph.remove_edges_from(edges_to_drop)

    return graph

def get_graph_metadata(graph: Graph | DiGraph, logging: bool = False) ‑> dict[str, float]

Expand source code

def get_graph_metadata(
    graph: Graph | DiGraph,
    logging: bool = LOGGING_DEFAULT_GRAPHS,
) -> dict[str, float]:
    # info about graph
    graph_info: dict[str, float] = {}
    # nodes and edges
    num_nodes = len(graph.nodes)
    num_edges = len(graph.edges)
    # edge weights
    min_edge_weight: int = 1_000_000
    max_edge_weight: int = 0
    for edge in graph.edges:
        weight = typing.cast(int, graph[edge[0]][edge[1]]['weight'])
        if weight < min_edge_weight:
            min_edge_weight = weight
        if weight > max_edge_weight:
            max_edge_weight = weight

    # memory
    edge_mem = sum([sys.getsizeof(e) for e in graph.edges])
    node_mem = sum([sys.getsizeof(n) for n in graph.nodes])
    total_mem = edge_mem + node_mem

    graph_info.update(
        num_nodes=num_nodes,
        num_edges=num_edges,
        min_edge_weight=min_edge_weight,
        max_edge_weight=max_edge_weight,
        node_memory=node_mem,
        edge_memory=edge_mem,
        total_memory=total_mem,
    )

    if logging:
        logger.info('Graph properties: %d Nodes, %d Edges', num_nodes, num_edges)
        logger.info('Node memory: %.2f KB', (node_mem / 1024))
        logger.info('Edge memory: %.2f KB', (edge_mem / 1024))
        logger.info('Total memory: %.2f KB', (total_mem / 1024))

    return graph_info

def normalise_array_linear(array: npt.NDArray[np.float32]) ‑> numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]]

Expand source code

def normalise_array_linear(
    array: npt.NDArray[np.float32],
) -> npt.NDArray[np.float32]:
    """apply standard linear normalisation

    Parameters
    ----------
    array : npt.NDArray[np.float_]
        array which shall be normalised

    Returns
    -------
    npt.NDArray[np.float32]
        min/max normalised array
    """
    div = array.max() - array.min()
    if div != 0:
        arr_norm = (array - array.min()) / div
        return arr_norm.astype(np.float32)
    else:
        return np.zeros(shape=array.shape, dtype=np.float32)

apply standard linear normalisation

Parameters

array : npt.NDArray[np.float_]: array which shall be normalised

Returns

npt.NDArray[np.float32]: min/max normalised array

def pipe_add_graph_metrics(*graphs: DiGraph | Graph) ‑> tuple[networkx.classes.digraph.DiGraph | networkx.classes.graph.Graph, ...]

Expand source code

def pipe_add_graph_metrics(
    *graphs: DiGraph | Graph,
) -> tuple[DiGraph | Graph, ...]:
    collection: list[DiGraph | Graph] = []
    for graph in graphs:
        graph_copy = copy.deepcopy(graph)
        add_weighted_degree(graph_copy)
        add_betweenness_centrality(graph_copy)
        add_importance_metric(graph_copy)
        collection.append(graph_copy)

    return tuple(collection)

def pipe_rescale_graph_edge_weights(graph: TokenGraph) ‑> tuple[TokenGraph, networkx.classes.graph.Graph]

Expand source code

def pipe_rescale_graph_edge_weights(
    graph: TokenGraph,
) -> tuple[TokenGraph, Graph]:
    """helper function to allow calls in pipelines

    Parameters
    ----------
    graph : TokenGraph
        token graph pushed through pipeline

    Returns
    -------
    tuple[TokenGraph, Graph]
        token graph (directed) and undirected version with rescaled edge weights
    """
    graph = graph.copy()

    return graph.rescale_edge_weights()

helper function to allow calls in pipelines

Parameters

graph : TokenGraph: token graph pushed through pipeline

Returns

tuple[TokenGraph, Graph]: token graph (directed) and undirected version with rescaled edge weights

def rescale_edge_weights(graph: Graph | DiGraph | TokenGraph, weight_property: str = 'weight') ‑> networkx.classes.graph.Graph | networkx.classes.digraph.DiGraph | TokenGraph

Expand source code

def rescale_edge_weights(
    graph: Graph | DiGraph | TokenGraph,
    weight_property: str = 'weight',
) -> Graph | DiGraph | TokenGraph:
    graph = graph.copy()
    # check non-emptiness
    verify_non_empty_graph(graph, including_edges=True)
    # check if all edges contain weight property
    verify_property(graph, property=weight_property)

    weights = cast(list[int], [data['weight'] for data in graph.edges.values()])
    w_log = cast(npt.NDArray[np.float32], np.log(weights, dtype=np.float32))
    weights_norm = normalise_array_linear(w_log)
    weights_adjusted = weight_scaling(weights_norm)
    # assign new weight values
    for idx, (node_1, node_2) in enumerate(graph.edges):
        graph[node_1][node_2]['weight'] = weights_adjusted[idx]

    return graph

def save_to_GraphML(graph: DiGraph | Graph, saving_path: Path, filename: str | None = None) ‑> None

Expand source code

def save_to_GraphML(
    graph: DiGraph | Graph,
    saving_path: Path,
    filename: str | None = None,
) -> None:
    if filename is not None:
        saving_path = saving_path.joinpath(filename)
    saving_path = saving_path.with_suffix('.graphml')
    nx.write_graphml(G=graph, path=saving_path)
    logger.info('Successfully saved graph as GraphML file under %s.', saving_path)

def static_graph_analysis(graph: TokenGraph) ‑> tuple[TokenGraph]

Expand source code

def static_graph_analysis(
    graph: TokenGraph,
) -> tuple[TokenGraph]:
    """helper function to allow the calculation of static metrics in pipelines

    Parameters
    ----------
    tk_graph_directed : TokenGraph
        token graph (directed)

    Returns
    -------
    tuple[TokenGraph]
        token graph (directed) with included undirected version and calculated KPIs
    """
    graph = graph.copy()
    graph.perform_static_analysis()

    return (graph,)

helper function to allow the calculation of static metrics in pipelines

Parameters

tk_graph_directed : TokenGraph: token graph (directed)

Returns

tuple[TokenGraph]: token graph (directed) with included undirected version and calculated KPIs

Expand source code

def update_graph(
    graph: Graph | DiGraph,
    *,
    batch: Iterable[tuple[Hashable, Hashable]] | None = None,
    parent: Hashable | None = None,
    child: Hashable | None = None,
    weight_connection: int | None = None,
) -> None:
    if weight_connection is None:
        weight_connection = 1
    # check if edge not in Graph
    if batch is not None:
        graph.add_edges_from(batch, weight=weight_connection)
    elif not graph.has_edge(parent, child):
        # create new edge, nodes will be created if not already present
        graph.add_edge(parent, child, weight=weight_connection)
    else:
        # update edge
        graph[parent][child]['weight'] += weight_connection

def verify_non_empty_graph(graph: DiGraph | Graph, including_edges: bool = True) ‑> None

Expand source code

def verify_non_empty_graph(
    graph: DiGraph | Graph,
    including_edges: bool = True,
) -> None:
    """check if the given graph is empty, presence of nodes is checked first,
    then of edges

    Parameters
    ----------
    graph : DiGraph | Graph
        graph to check for emptiness
    including_edges : bool, optional
        whether to check for non-existence of edges, by default True

    Raises
    ------
    EmptyGraphError
        if graph does not contain any nodes and therefore edges
    EmptyEdgesError
        if graph does not contain any edges
    """
    if not tuple(graph.nodes):
        raise EmptyGraphError(f'Graph object >>{graph}<< does not contain any nodes.')
    elif including_edges and not tuple(graph.edges):
        raise EmptyEdgesError(f'Graph object >>{graph}<< does not contain any edges.')

check if the given graph is empty, presence of nodes is checked first, then of edges

Parameters

graph : DiGraph | Graph: graph to check for emptiness
including_edges : bool, optional: whether to check for non-existence of edges, by default True

Raises

EmptyGraphError: if graph does not contain any nodes and therefore edges
EmptyEdgesError: if graph does not contain any edges

def verify_property(graph: Graph | DiGraph, property: str) ‑> None

Expand source code

def verify_property(
    graph: Graph | DiGraph,
    property: str,
) -> None:
    for node_1, node_2 in graph.edges:
        if property not in graph[node_1][node_2]:
            raise EdgePropertyNotContainedError(
                (
                    f'Edge property >>{property}<< not '
                    f'available for edge >>({node_1}, {node_2})<<'
                )
            )

def weight_scaling(weights: npt.NDArray[np.float32], a: float = 1.1, b: float = 0.05) ‑> numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]]

Expand source code

def weight_scaling(
    weights: npt.NDArray[np.float32],
    a: float = 1.1,
    b: float = 0.05,
) -> npt.NDArray[np.float32]:
    """non-linear scaling of already normalised edge weights [0;1]: bigger weights
    have smaller weight delta than smaller weights. Bigger values für parameter
    `b` reinforce this effect.
    Based on:
    https://math.stackexchange.com/questions/4297805/exponential-function-that-passes-through-0-0-and-1-1-with-variable-slope

    With default values the range of edge weights lies approximately between [0.1; 1]

    Parameters
    ----------
    weights : npt.NDArray[np.float32]
        pre-normalised edge weights as 1D array
    a : float, optional
        factor to determine the value for edge weights with value 0
        with default approx. 0.1, by default 1.1
    b : float, optional
        adjust the curvature, smaller values increase it, by default 0.05

    Returns
    -------
    npt.NDArray[np.float32]
        non-linear adjusted edge weights as 1D array
    """
    adjusted_weights = (b**weights - a) / (b - a)

    return np.round(adjusted_weights, decimals=EDGE_WEIGHT_DECIMALS)

non-linear scaling of already normalised edge weights [0;1]: bigger weights have smaller weight delta than smaller weights. Bigger values für parameter b reinforce this effect. Based on: https://math.stackexchange.com/questions/4297805/exponential-function-that-passes-through-0-0-and-1-1-with-variable-slope

With default values the range of edge weights lies approximately between [0.1; 1]

Parameters

weights : npt.NDArray[np.float32]: pre-normalised edge weights as 1D array
a : float, optional: factor to determine the value for edge weights with value 0 with default approx. 0.1, by default 1.1
b : float, optional: adjust the curvature, smaller values increase it, by default 0.05

Returns

npt.NDArray[np.float32]: non-linear adjusted edge weights as 1D array

Classes

class TokenGraph (name: str = 'TokenGraph', enable_logging: bool = True, incoming_graph_data: Any | None = None, **attr)

Expand source code

class TokenGraph(DiGraph):
    def __init__(
        self,
        name: str = 'TokenGraph',
        enable_logging: bool = True,
        incoming_graph_data: Any | None = None,
        **attr,
    ) -> None:
        super().__init__(incoming_graph_data, **attr)
        # logging of different actions
        self.logging = enable_logging
        # properties
        self._name = name
        # directed and undirected graph data
        self._directed = self
        self._metadata_directed: dict[str, float] = {}
        self._undirected: Graph | None = None
        self._metadata_undirected: dict[str, float] = {}
        # indicate rescaled weights
        self.rescaled_weights: bool = False

    def __repr__(self) -> str:
        return self.__str__()

    def __str__(self) -> str:
        return (
            f'TokenGraph(name: {self.name}, number of nodes: '
            f'{len(self.nodes)}, number of edges: '
            f'{len(self.edges)})'
        )

    def disable_logging(self) -> None:
        self.logging = False

    # !! only used to verify that saving was done correctly
    """
    def __key(self) -> tuple[Hashable, ...]:
        return (self.name, tuple(self.nodes), tuple(self.edges))
    
    def __hash__(self) -> int:
        return hash(self.__key())
    """

    def copy(self) -> Self:
        """returns a (deep) copy of the graph

        Returns
        -------
        Self
            deep copy of the graph
        """
        return copy.deepcopy(self)

    @property
    def name(self) -> str:
        return self._name

    @property
    def directed(self) -> Self:
        return self._directed

    @property
    def undirected(self) -> Graph:
        if self._undirected is None:
            self._undirected = self.to_undirected(inplace=False, logging=False)

        return self._undirected

    @property
    def metadata_directed(self) -> dict[str, float]:
        return self._metadata_directed

    @property
    def metadata_undirected(self) -> dict[str, float]:
        return self._metadata_undirected

    @overload
    def to_undirected(
        self,
        inplace: Literal[True] = ...,
        logging: bool | None = ...,
    ) -> None: ...

    @overload
    def to_undirected(
        self,
        inplace: Literal[False],
        logging: bool | None = ...,
    ) -> Graph: ...

    def to_undirected(
        self,
        inplace: bool = True,
        logging: bool | None = None,
    ) -> Graph | None:
        if logging is None:
            logging = self.logging
        # cast to integer edge weights only if edges were not rescaled previously
        cast_int: bool = True
        if self.rescaled_weights:
            cast_int = False

        self._undirected = convert_graph_to_undirected(
            graph=self,
            logging=logging,
            cast_int=cast_int,
        )
        self._metadata_undirected = get_graph_metadata(graph=self._undirected, logging=False)
        if not inplace:
            return self._undirected

    def update_metadata(
        self,
        logging: bool | None = None,
    ) -> None:
        if logging is None:
            logging = self.logging

        self._metadata_directed = get_graph_metadata(graph=self, logging=logging)
        if self._undirected is not None:
            self._metadata_undirected = get_graph_metadata(
                graph=self._undirected, logging=logging
            )

    def rescale_edge_weights(
        self,
    ) -> tuple[TokenGraph, Graph]:
        """generate new instances of the directed and undirected TokenGraph with
        rescaled edge weights
        Only this method ensures that undirected graphs are scaled properly. If
        the underlying `to_undirected` method of the directed and rescaled
        TokenGraph instance is called the weights are not rescaled again. Thus,
        the maximum edge weight can exceed the theoretical maximum value of 1. To
        ensure consistent behaviour across different applications of the conversion to
        undirected graphs new instances are returned, especially for the undirected
        graph.
        In contrast, the new directed TokenGraph contains an undirected version without
        rescaling of the weights. Therefore, this undirected version differs from the version
        returned by this method.

        Returns
        -------
        tuple[TokenGraph, Graph]
            directed and undirected instances
        """
        self.to_undirected(inplace=True, logging=False)
        token_graph = rescale_edge_weights(self.directed)
        token_graph.rescaled_weights = True
        token_graph.update_metadata(logging=False)
        undirected = rescale_edge_weights(self.undirected)

        return token_graph, undirected

    def perform_static_analysis(self) -> None:
        """calculate different metrics directly on the data of the underlying graphs
        (directed and undirected)

        Current operations:
            - adding weighted degree
        """
        add_weighted_degree(self)
        add_weighted_degree(self.undirected)

    def _save_prepare(
        self,
        path: Path,
        filename: str | None = None,
    ) -> Path:
        if filename is not None:
            saving_path = path.joinpath(f'{filename}')
        else:
            saving_path = path.joinpath(f'{self.name}')

        return saving_path

    def to_GraphML(
        self,
        path: Path,
        filename: str | None = None,
        directed: bool = False,
    ) -> None:
        """save one of the stored graphs to GraphML format on disk,

        Parameters
        ----------
        path : Path
            target path for saving the file
        filename : str | None, optional
            filename to be given, by default None
        directed : bool, optional
            indicator whether directed or undirected graph
            should be exported, by default False (undirected)

        Raises
        ------
        ValueError
            undirected graph should be exported but is not available
        """
        saving_path = self._save_prepare(path=path, filename=filename)

        if directed:
            target_graph = self.directed
        else:
            target_graph = self.undirected

        save_to_GraphML(graph=target_graph, saving_path=saving_path)

    def to_pickle(
        self,
        path: Path,
        filename: str | None = None,
    ) -> None:
        """save whole TokenGraph object as pickle file

        Parameters
        ----------
        path : Path
            target path for saving the file
        filename : str | None, optional
            filename to be given, by default None
        """
        saving_path = self._save_prepare(path=path, filename=filename)
        saving_path = saving_path.with_suffix('.pkl')
        save_pickle(obj=self, path=saving_path)

    @classmethod
    def from_file(
        cls,
        path: Path,
        node_type_graphml: type = str,
    ) -> Self:
        # !! no validity checks for pickle files
        # !! GraphML files not correct because not all properties
        # !! are parsed correctly
        # TODO REWORK
        match path.suffix:
            case '.graphml':
                graph = typing.cast(Self, nx.read_graphml(path, node_type=node_type_graphml))
                logger.info('Successfully loaded graph from GraphML file %s.', path)
            case '.pkl' | '.pickle':
                graph = typing.cast(Self, load_pickle(path))
                logger.info('Successfully loaded graph from pickle file %s.', path)
            case _:
                raise ValueError('File format not supported.')

        return graph

Base class for directed graphs.

A DiGraph stores nodes and edges with optional data, or attributes.

DiGraphs hold directed edges. Self loops are allowed but multiple (parallel) edges are not.

Nodes can be arbitrary (hashable) Python objects with optional key/value attributes. By convention None is not used as a node.

Edges are represented as links between nodes with optional key/value attributes.

Parameters

incoming_graph_data : input graph (optional, default: None): Data to initialize graph. If None (default) an empty graph is created. The data can be any format that is supported by the to_networkx_graph() function, currently including edge list, dict of dicts, dict of lists, NetworkX graph, 2D NumPy array, SciPy sparse matrix, or PyGraphviz graph.
attr : keyword arguments, optional (default= no attributes): Attributes to add to graph as key=value pairs.

Examples

Create an empty graph structure (a "null graph") with no nodes and no edges.

>>> G = nx.DiGraph()

G can be grown in several ways.

Nodes:

Add one node at a time:

>>> G.add_node(1)

Add the nodes from any container (a list, dict, set or even the lines from a file or the nodes from another graph).

>>> G.add_nodes_from([2, 3])
>>> G.add_nodes_from(range(100, 110))
>>> H = nx.path_graph(10)
>>> G.add_nodes_from(H)

In addition to strings and integers any hashable Python object (except None) can represent a node, e.g. a customized node object, or even another Graph.

>>> G.add_node(H)

Edges:

G can also be grown by adding edges.

Add one edge,

>>> G.add_edge(1, 2)

a list of edges,

>>> G.add_edges_from([(1, 2), (1, 3)])

or a collection of edges,

>>> G.add_edges_from(H.edges)

If some edges connect nodes not yet in the graph, the nodes are added automatically. There are no errors when adding nodes or edges that already exist.

Attributes:

Each graph, node, and edge can hold key/value attribute pairs in an associated attribute dictionary (the keys must be hashable). By default these are empty, but can be added or changed using add_edge, add_node or direct manipulation of the attribute dictionaries named graph, node and edge respectively.

>>> G = nx.DiGraph(day="Friday")
>>> G.graph
{'day': 'Friday'}

Add node attributes using add_node(), add_nodes_from() or G.nodes

>>> G.add_node(1, time="5pm")
>>> G.add_nodes_from([3], time="2pm")
>>> G.nodes[1]
{'time': '5pm'}
>>> G.nodes[1]["room"] = 714
>>> del G.nodes[1]["room"]  # remove attribute
>>> list(G.nodes(data=True))
[(1, {'time': '5pm'}), (3, {'time': '2pm'})]

Add edge attributes using add_edge(), add_edges_from(), subscript notation, or G.edges.

>>> G.add_edge(1, 2, weight=4.7)
>>> G.add_edges_from([(3, 4), (4, 5)], color="red")
>>> G.add_edges_from([(1, 2, {"color": "blue"}), (2, 3, {"weight": 8})])
>>> G[1][2]["weight"] = 4.7
>>> G.edges[1, 2]["weight"] = 4

Warning: we protect the graph data structure by making G.edges[1, 2] a read-only dict-like structure. However, you can assign to attributes in e.g. G.edges[1, 2]. Thus, use 2 sets of brackets to add/change data attributes: G.edges[1, 2]['weight'] = 4 (For multigraphs: MG.edges[u, v, key][name] = value).

Shortcuts:

Many common graph features allow python syntax to speed reporting.

>>> 1 in G  # check if node in graph
True
>>> [n for n in G if n < 3]  # iterate through nodes
[1, 2]
>>> len(G)  # number of nodes in graph
5

Often the best way to traverse all edges of a graph is via the neighbors. The neighbors are reported as an adjacency-dict G.adj or G.adjacency()

>>> for n, nbrsdict in G.adjacency():
...     for nbr, eattr in nbrsdict.items():
...         if "weight" in eattr:
...             # Do something useful with the edges
...             pass

But the edges reporting object is often more convenient:

>>> for u, v, weight in G.edges(data="weight"):
...     if weight is not None:
...         # Do something useful with the edges
...         pass

Reporting:

Simple graph information is obtained using object-attributes and methods. Reporting usually provides views instead of containers to reduce memory usage. The views update as the graph is updated similarly to dict-views. The objects nodes, edges and adj provide access to data attributes via lookup (e.g. nodes[n], edges[u, v], adj[u][v]) and iteration (e.g. nodes.items(), nodes.data('color'), nodes.data('color', default='blue') and similarly for edges) Views exist for nodes, edges, neighbors()/adj and degree.

For details on these and other miscellaneous methods, see below.

Subclasses (Advanced):

The Graph class uses a dict-of-dict-of-dict data structure. The outer dict (node_dict) holds adjacency information keyed by node. The next dict (adjlist_dict) represents the adjacency information and holds edge data keyed by neighbor. The inner dict (edge_attr_dict) represents the edge data and holds edge attribute values keyed by attribute names.

Each of these three dicts can be replaced in a subclass by a user defined dict-like object. In general, the dict-like features should be maintained but extra features can be added. To replace one of the dicts create a new graph class by changing the class(!) variable holding the factory for that dict-like structure. The variable names are node_dict_factory, node_attr_dict_factory, adjlist_inner_dict_factory, adjlist_outer_dict_factory, edge_attr_dict_factory and graph_attr_dict_factory.

node_dict_factory : function, (default: dict) Factory function to be used to create the dict containing node attributes, keyed by node id. It should require no arguments and return a dict-like object

node_attr_dict_factory: function, (default: dict) Factory function to be used to create the node attribute dict which holds attribute values keyed by attribute name. It should require no arguments and return a dict-like object

adjlist_outer_dict_factory : function, (default: dict) Factory function to be used to create the outer-most dict in the data structure that holds adjacency info keyed by node. It should require no arguments and return a dict-like object.

adjlist_inner_dict_factory : function, optional (default: dict) Factory function to be used to create the adjacency list dict which holds edge data keyed by neighbor. It should require no arguments and return a dict-like object

edge_attr_dict_factory : function, optional (default: dict) Factory function to be used to create the edge attribute dict which holds attribute values keyed by attribute name. It should require no arguments and return a dict-like object.

graph_attr_dict_factory : function, (default: dict) Factory function to be used to create the graph attribute dict which holds attribute values keyed by attribute name. It should require no arguments and return a dict-like object.

Typically, if your extension doesn't impact the data structure all methods will inherited without issue except: to_directed/to_undirected. By default these methods create a DiGraph/Graph class and you probably want them to create your extension of a DiGraph/Graph. To facilitate this we define two class variables that you can set in your subclass.

to_directed_class : callable, (default: DiGraph or MultiDiGraph) Class to create a new graph structure in the to_directed method. If None, a NetworkX class (DiGraph or MultiDiGraph) is used.

to_undirected_class : callable, (default: Graph or MultiGraph) Class to create a new graph structure in the to_undirected method. If None, a NetworkX class (Graph or MultiGraph) is used.

Subclassing Example

Create a low memory graph class that effectively disallows edge attributes by using a single attribute dict for all edges. This reduces the memory used, but you lose edge attributes.

>>> class ThinGraph(nx.Graph):
...     all_edge_dict = {"weight": 1}
...
...     def single_edge_dict(self):
...         return self.all_edge_dict
...
...     edge_attr_dict_factory = single_edge_dict
>>> G = ThinGraph()
>>> G.add_edge(2, 1)
>>> G[2][1]
{'weight': 1}
>>> G.add_edge(2, 2)
>>> G[2][1] is G[2][2]
True

Initialize a graph with edges, name, or graph attributes.

Parameters

incoming_graph_data : input graph (optional, default: None): Data to initialize graph. If None (default) an empty graph is created. The data can be an edge list, or any NetworkX graph object. If the corresponding optional Python packages are installed the data can also be a 2D NumPy array, a SciPy sparse array, or a PyGraphviz graph.
attr : keyword arguments, optional (default= no attributes): Attributes to add to graph as key=value pairs.

Examples

>>> G = nx.Graph()  # or DiGraph, MultiGraph, MultiDiGraph, etc
>>> G = nx.Graph(name="my graph")
>>> e = [(1, 2), (2, 3), (3, 4)]  # list of edges
>>> G = nx.Graph(e)

Arbitrary graph attribute pairs (key=value) may be assigned

>>> G = nx.Graph(e, day="Friday")
>>> G.graph
{'day': 'Friday'}

Ancestors

networkx.classes.digraph.DiGraph
networkx.classes.graph.Graph

Static methods

def from_file(path: Path, node_type_graphml: type = builtins.str) ‑> Self

Instance variables

prop directed : Self

Expand source code

@property
def directed(self) -> Self:
    return self._directed

prop metadata_directed : dict[str, float]

Expand source code

@property
def metadata_directed(self) -> dict[str, float]:
    return self._metadata_directed

prop metadata_undirected : dict[str, float]

Expand source code

@property
def metadata_undirected(self) -> dict[str, float]:
    return self._metadata_undirected

prop name : str

Expand source code

@property
def name(self) -> str:
    return self._name

String identifier of the graph.

This graph attribute appears in the attribute dict G.graph keyed by the string "name". as well as an attribute (technically a property) G.name. This is entirely user controlled.

prop undirected : Graph

Expand source code

@property
def undirected(self) -> Graph:
    if self._undirected is None:
        self._undirected = self.to_undirected(inplace=False, logging=False)

    return self._undirected

Methods

def copy(self) ‑> Self

Expand source code

def copy(self) -> Self:
    """returns a (deep) copy of the graph

    Returns
    -------
    Self
        deep copy of the graph
    """
    return copy.deepcopy(self)

returns a (deep) copy of the graph

Returns

Self: deep copy of the graph

def disable_logging(self) ‑> None

Expand source code

def disable_logging(self) -> None:
    self.logging = False

def perform_static_analysis(self) ‑> None

Expand source code

def perform_static_analysis(self) -> None:
    """calculate different metrics directly on the data of the underlying graphs
    (directed and undirected)

    Current operations:
        - adding weighted degree
    """
    add_weighted_degree(self)
    add_weighted_degree(self.undirected)

calculate different metrics directly on the data of the underlying graphs (directed and undirected)

Current operations: - adding weighted degree

def rescale_edge_weights(self) ‑> tuple[TokenGraph, networkx.classes.graph.Graph]

Expand source code

def rescale_edge_weights(
    self,
) -> tuple[TokenGraph, Graph]:
    """generate new instances of the directed and undirected TokenGraph with
    rescaled edge weights
    Only this method ensures that undirected graphs are scaled properly. If
    the underlying `to_undirected` method of the directed and rescaled
    TokenGraph instance is called the weights are not rescaled again. Thus,
    the maximum edge weight can exceed the theoretical maximum value of 1. To
    ensure consistent behaviour across different applications of the conversion to
    undirected graphs new instances are returned, especially for the undirected
    graph.
    In contrast, the new directed TokenGraph contains an undirected version without
    rescaling of the weights. Therefore, this undirected version differs from the version
    returned by this method.

    Returns
    -------
    tuple[TokenGraph, Graph]
        directed and undirected instances
    """
    self.to_undirected(inplace=True, logging=False)
    token_graph = rescale_edge_weights(self.directed)
    token_graph.rescaled_weights = True
    token_graph.update_metadata(logging=False)
    undirected = rescale_edge_weights(self.undirected)

    return token_graph, undirected

generate new instances of the directed and undirected TokenGraph with rescaled edge weights Only this method ensures that undirected graphs are scaled properly. If the underlying to_undirected method of the directed and rescaled TokenGraph instance is called the weights are not rescaled again. Thus, the maximum edge weight can exceed the theoretical maximum value of 1. To ensure consistent behaviour across different applications of the conversion to undirected graphs new instances are returned, especially for the undirected graph. In contrast, the new directed TokenGraph contains an undirected version without rescaling of the weights. Therefore, this undirected version differs from the version returned by this method.

Returns

tuple[TokenGraph, Graph]: directed and undirected instances

def to_GraphML(self, path: Path, filename: str | None = None, directed: bool = False) ‑> None

Expand source code

def to_GraphML(
    self,
    path: Path,
    filename: str | None = None,
    directed: bool = False,
) -> None:
    """save one of the stored graphs to GraphML format on disk,

    Parameters
    ----------
    path : Path
        target path for saving the file
    filename : str | None, optional
        filename to be given, by default None
    directed : bool, optional
        indicator whether directed or undirected graph
        should be exported, by default False (undirected)

    Raises
    ------
    ValueError
        undirected graph should be exported but is not available
    """
    saving_path = self._save_prepare(path=path, filename=filename)

    if directed:
        target_graph = self.directed
    else:
        target_graph = self.undirected

    save_to_GraphML(graph=target_graph, saving_path=saving_path)

save one of the stored graphs to GraphML format on disk,

Parameters

path : Path: target path for saving the file
filename : str | None, optional: filename to be given, by default None
directed : bool, optional: indicator whether directed or undirected graph should be exported, by default False (undirected)

Raises

ValueError: undirected graph should be exported but is not available

def to_pickle(self, path: Path, filename: str | None = None) ‑> None

Expand source code

def to_pickle(
    self,
    path: Path,
    filename: str | None = None,
) -> None:
    """save whole TokenGraph object as pickle file

    Parameters
    ----------
    path : Path
        target path for saving the file
    filename : str | None, optional
        filename to be given, by default None
    """
    saving_path = self._save_prepare(path=path, filename=filename)
    saving_path = saving_path.with_suffix('.pkl')
    save_pickle(obj=self, path=saving_path)

save whole TokenGraph object as pickle file

Parameters

path : Path: target path for saving the file
filename : str | None, optional: filename to be given, by default None

def to_undirected(self, inplace: bool = True, logging: bool | None = None) ‑> networkx.classes.graph.Graph | None

Expand source code

def to_undirected(
    self,
    inplace: bool = True,
    logging: bool | None = None,
) -> Graph | None:
    if logging is None:
        logging = self.logging
    # cast to integer edge weights only if edges were not rescaled previously
    cast_int: bool = True
    if self.rescaled_weights:
        cast_int = False

    self._undirected = convert_graph_to_undirected(
        graph=self,
        logging=logging,
        cast_int=cast_int,
    )
    self._metadata_undirected = get_graph_metadata(graph=self._undirected, logging=False)
    if not inplace:
        return self._undirected

Returns an undirected representation of the digraph.

Parameters

reciprocal : bool (optional)
If True only keep edges that appear in both directions
in the original digraph.
as_view : bool (optional, default=False)

If True return an undirected view of the original directed graph.

Returns

G : Graph: An undirected graph with the same name and nodes and with edge (u, v, data) if either (u, v, data) or (v, u, data) is in the digraph. If both edges exist in digraph and their edge data is different, only one edge is created with an arbitrary choice of which edge data to use. You must check and correct for this manually if desired.

Notes

If edges in both directions (u, v) and (v, u) exist in the graph, attributes for the new undirected edge will be a combination of the attributes of the directed edges. The edge data is updated in the (arbitrary) order that the edges are encountered. For more customized control of the edge attributes use add_edge().

This returns a "deepcopy" of the edge, node, and graph attributes which attempts to completely copy all of the data and references.

This is in contrast to the similar G=DiGraph(D) which returns a shallow copy of the data.

See the Python copy module for more information on shallow and deep copies, https://docs.python.org/3/library/copy.html.

Warning: If you have subclassed DiGraph to use dict-like objects in the data structure, those changes do not transfer to the Graph created by this method.

Examples

>>> G = nx.path_graph(2)  # or MultiGraph, etc
>>> H = G.to_directed()
>>> list(H.edges)
[(0, 1), (1, 0)]
>>> G2 = H.to_undirected()
>>> list(G2.edges)
[(0, 1)]

def update_metadata(self, logging: bool | None = None) ‑> None

Expand source code

def update_metadata(
    self,
    logging: bool | None = None,
) -> None:
    if logging is None:
        logging = self.logging

    self._metadata_directed = get_graph_metadata(graph=self, logging=logging)
    if self._undirected is not None:
        self._metadata_undirected = get_graph_metadata(
            graph=self._undirected, logging=logging
        )

Module `lang_main.analysis.graphs`

Functions

Parameters

Parameters

Parameters

Parameters

Returns

Parameters

Returns

Parameters

Returns

Parameters

Returns

Parameters

Returns

Parameters

Raises

Parameters

Returns

Classes

Parameters

See Also

Examples

Parameters

See Also

Examples

Ancestors

Static methods

Instance variables

Methods

Returns

Returns

Parameters

Raises

Parameters

Parameters

Returns

See Also

Notes

Examples