Source code for graphslim.coarsening.coarsening_base

import copy

import numpy as np
import scipy as sp
import torch
from pygsp import graphs
from torch_geometric.utils import to_dense_adj

from graphslim.coarsening.utils import contract_variation_edges, contract_variation_linear, get_proximity_measure, \
    matching_optimal, matching_greedy, get_coarsening_matrix, coarsen_matrix, coarsen_vector, zero_diag
from graphslim.dataset.convertor import pyg2gsp, csr2ei, ei2csr
from graphslim.dataset.utils import save_reduced
from graphslim.evaluation import *
from graphslim.utils import one_hot, to_tensor


[docs] class Coarsen: """ Graph Coarsening This class provides methods to coarsen a graph, which involves reducing the number of nodes while preserving certain properties of the original graph. Parameters ---------- setting : str The setting for coarsening. data : object The data to be coarsened. args : Namespace Arguments containing configurations and device information. **kwargs : dict, optional Additional keyword arguments. Methods ------- reduce(data, verbose=True, save=True) Reduces the data by coarsening the graph. coarsening(H) Performs the coarsening process on the graph H. process_coarsened(data, candidate, C_list, Gc_list) Processes the coarsened graphs and returns the features, labels, masks, and edges. """ def __init__(self, setting, data, args): """ Initializes the Coarsen object. Parameters ---------- setting : str The setting for coarsening. data : object The data to be coarsened. args : Namespace Arguments containing configurations and device information. **kwargs : dict Additional keyword arguments. """ self.setting = setting self.args = args self.device = args.device # pass data for initialization
[docs] @verbose_time_memory def reduce(self, data, verbose=True, save=True): """ Reduces the data by coarsening the graph. Parameters ---------- data : object The data to be reduced. verbose : bool, optional If True, prints verbose output. Defaults to True. save : bool, optional If True, saves the reduced data. Defaults to True. Returns ------- object The reduced data with updated attributes `adj_syn`, `feat_syn`, and `labels_syn`. """ args = self.args # setting = self.setting # device = self.device cpu_data = copy.deepcopy(data) if args.setting == 'trans': gps_graph = graphs.Graph(W=data.adj_full) candidate, C_list, Gc_list = self.coarsening(gps_graph) coarsen_features, coarsen_train_labels, coarsen_train_mask, coarsen_edge = self.process_coarsened( cpu_data, candidate, C_list, Gc_list) train_idx = np.nonzero(coarsen_train_mask.numpy())[0] coarsen_features = coarsen_features[train_idx] coarsen_edge = ei2csr(coarsen_edge, coarsen_train_mask.shape[0])[np.ix_(train_idx, train_idx)] coarsen_train_labels = coarsen_train_labels[train_idx] else: gps_graph = graphs.Graph(W=data.adj_full) candidate, C_list, Gc_list = self.coarsening(gps_graph) coarsen_features, coarsen_train_labels, coarsen_train_mask, coarsen_edge = self.process_coarsened( cpu_data, candidate, C_list, Gc_list) coarsen_edge = ei2csr(coarsen_edge, coarsen_train_mask.shape[0]) data.adj_syn, data.feat_syn, data.labels_syn = to_tensor(coarsen_edge), coarsen_features, coarsen_train_labels if save: save_reduced(data.adj_syn, data.feat_syn, data.labels_syn, args) return data
[docs] def coarsening(self, H): """ Performs the coarsening process on the given graph. Parameters ---------- H : object The graph to be coarsened. Returns ------- candidate : list List of subgraphs sorted by size. C_list : list List of coarsening matrices. Gc_list : list List of coarsened graphs. """ if H.A.shape[-1] != H.A.shape[1]: H.logger.error('Inconsistent shape to extract components. Square matrix required.') return None if H.is_directed(): raise NotImplementedError('Directed graphs not supported yet.') graphs = [] visited = np.zeros(H.A.shape[-1], dtype=bool) while not visited.all(): stack = set([np.nonzero(~visited)[0][0]]) comp = [] while len(stack): v = stack.pop() if not visited[v]: comp.append(v) visited[v] = True stack.update(set([idx for idx in H.A[v, :].nonzero()[1] if not visited[idx]])) comp = sorted(comp) G = H.subgraph(comp) G.info = {'orig_idx': comp} graphs.append(G) print('the number of subgraphs is', len(graphs)) candidate = sorted(graphs, key=lambda x: len(x.info['orig_idx']), reverse=True) number = 0 C_list = [] Gc_list = [] while number < len(candidate): H = candidate[number] if len(H.info['orig_idx']) > 10: C, Gc, Call, Gall = self.coarsen(H) C_list.append(C) Gc_list.append(Gc) number += 1 return candidate, C_list, Gc_list
[docs] def process_coarsened(self, data, candidate, C_list, Gc_list): """ Processes the coarsened graph to extract relevant features and labels. Parameters ---------- data : object The original data. candidate : list List of candidate subgraphs. C_list : list List of coarsening matrices. Gc_list : list List of coarsened graphs. Returns ------- coarsen_features : torch.Tensor The coarsened features. coarsen_train_labels : torch.Tensor The coarsened training labels. coarsen_train_mask : torch.Tensor The coarsened training mask. coarsen_edge : torch.Tensor The coarsened edges. """ train_mask = data.train_mask val_mask = data.val_mask n_classes = max(data.y) + 1 if self.args.setting == 'trans': features = data.x labels = data.y else: features = data.x[train_mask] labels = data.y[train_mask] coarsen_node = 0 number = 0 coarsen_row = None coarsen_col = None coarsen_features = torch.Tensor([]) coarsen_train_labels = torch.Tensor([]) coarsen_train_mask = torch.Tensor([]).bool() while number < len(candidate): H = candidate[number] keep = H.info['orig_idx'] H_features = features[keep] H_labels = labels[keep] if self.args.setting == "trans": H_train_mask = train_mask[keep] else: H_train_mask = torch.ones(size=(len(H_labels),)) if len(H.info['orig_idx']) > 10 and torch.sum(H_train_mask) > 0: train_labels = one_hot(H_labels, n_classes) # Shape: (H_labels.shape[0], n_classes) if self.args.setting == "trans": train_labels[~H_train_mask] = torch.Tensor([0 for _ in range(n_classes)]) C = C_list[number] Gc = Gc_list[number] new_train_mask = torch.BoolTensor(np.sum(C.dot(train_labels), axis=1)) mix_label = torch.FloatTensor(C.dot(train_labels)) mix_label[mix_label > 0] = 1 mix_mask = torch.sum(mix_label, dim=1) new_train_mask[mix_mask > 1] = False coarsen_features = torch.cat([coarsen_features, torch.FloatTensor(C.dot(H_features))], dim=0) coarsen_train_labels = torch.cat( [coarsen_train_labels, torch.argmax(torch.FloatTensor(C.dot(train_labels)), dim=1).float()], dim=0) coarsen_train_mask = torch.cat([coarsen_train_mask, new_train_mask], dim=0) if coarsen_row is None: coarsen_row = Gc.W.tocoo().row coarsen_col = Gc.W.tocoo().col else: current_row = Gc.W.tocoo().row + coarsen_node current_col = Gc.W.tocoo().col + coarsen_node coarsen_row = np.concatenate([coarsen_row, current_row], axis=0) coarsen_col = np.concatenate([coarsen_col, current_col], axis=0) coarsen_node += Gc.W.shape[0] print(coarsen_node) number += 1 coarsen_edge = torch.from_numpy(np.array([coarsen_row, coarsen_col])).long() coarsen_train_labels = coarsen_train_labels.long() return coarsen_features, coarsen_train_labels, coarsen_train_mask, coarsen_edge