import copy
import numpy as np
import scipy as sp
import torch
from pygsp import graphs
from torch_geometric.utils import to_dense_adj
from graphslim.coarsening.utils import contract_variation_edges, contract_variation_linear, get_proximity_measure, \
matching_optimal, matching_greedy, get_coarsening_matrix, coarsen_matrix, coarsen_vector, zero_diag
from graphslim.dataset.convertor import pyg2gsp, csr2ei, ei2csr
from graphslim.dataset.utils import save_reduced
from graphslim.evaluation import *
from graphslim.utils import one_hot, to_tensor
[docs]
class Coarsen:
"""
Graph Coarsening
This class provides methods to coarsen a graph, which involves reducing the number of nodes while
preserving certain properties of the original graph.
Parameters
----------
setting : str
The setting for coarsening.
data : object
The data to be coarsened.
args : Namespace
Arguments containing configurations and device information.
**kwargs : dict, optional
Additional keyword arguments.
Methods
-------
reduce(data, verbose=True, save=True)
Reduces the data by coarsening the graph.
coarsening(H)
Performs the coarsening process on the graph H.
process_coarsened(data, candidate, C_list, Gc_list)
Processes the coarsened graphs and returns the features, labels, masks, and edges.
"""
def __init__(self, setting, data, args):
"""
Initializes the Coarsen object.
Parameters
----------
setting : str
The setting for coarsening.
data : object
The data to be coarsened.
args : Namespace
Arguments containing configurations and device information.
**kwargs : dict
Additional keyword arguments.
"""
self.setting = setting
self.args = args
self.device = args.device
# pass data for initialization
[docs]
@verbose_time_memory
def reduce(self, data, verbose=True, save=True):
"""
Reduces the data by coarsening the graph.
Parameters
----------
data : object
The data to be reduced.
verbose : bool, optional
If True, prints verbose output. Defaults to True.
save : bool, optional
If True, saves the reduced data. Defaults to True.
Returns
-------
object
The reduced data with updated attributes `adj_syn`, `feat_syn`, and `labels_syn`.
"""
args = self.args
# setting = self.setting
# device = self.device
cpu_data = copy.deepcopy(data)
if args.setting == 'trans':
gps_graph = graphs.Graph(W=data.adj_full)
candidate, C_list, Gc_list = self.coarsening(gps_graph)
coarsen_features, coarsen_train_labels, coarsen_train_mask, coarsen_edge = self.process_coarsened(
cpu_data, candidate, C_list, Gc_list)
train_idx = np.nonzero(coarsen_train_mask.numpy())[0]
coarsen_features = coarsen_features[train_idx]
coarsen_edge = ei2csr(coarsen_edge, coarsen_train_mask.shape[0])[np.ix_(train_idx, train_idx)]
coarsen_train_labels = coarsen_train_labels[train_idx]
else:
gps_graph = graphs.Graph(W=data.adj_full)
candidate, C_list, Gc_list = self.coarsening(gps_graph)
coarsen_features, coarsen_train_labels, coarsen_train_mask, coarsen_edge = self.process_coarsened(
cpu_data, candidate, C_list, Gc_list)
coarsen_edge = ei2csr(coarsen_edge, coarsen_train_mask.shape[0])
data.adj_syn, data.feat_syn, data.labels_syn = to_tensor(coarsen_edge), coarsen_features, coarsen_train_labels
if save:
save_reduced(data.adj_syn, data.feat_syn, data.labels_syn, args)
return data
[docs]
def coarsening(self, H):
"""
Performs the coarsening process on the given graph.
Parameters
----------
H : object
The graph to be coarsened.
Returns
-------
candidate : list
List of subgraphs sorted by size.
C_list : list
List of coarsening matrices.
Gc_list : list
List of coarsened graphs.
"""
if H.A.shape[-1] != H.A.shape[1]:
H.logger.error('Inconsistent shape to extract components. Square matrix required.')
return None
if H.is_directed():
raise NotImplementedError('Directed graphs not supported yet.')
graphs = []
visited = np.zeros(H.A.shape[-1], dtype=bool)
while not visited.all():
stack = set([np.nonzero(~visited)[0][0]])
comp = []
while len(stack):
v = stack.pop()
if not visited[v]:
comp.append(v)
visited[v] = True
stack.update(set([idx for idx in H.A[v, :].nonzero()[1] if not visited[idx]]))
comp = sorted(comp)
G = H.subgraph(comp)
G.info = {'orig_idx': comp}
graphs.append(G)
print('the number of subgraphs is', len(graphs))
candidate = sorted(graphs, key=lambda x: len(x.info['orig_idx']), reverse=True)
number = 0
C_list = []
Gc_list = []
while number < len(candidate):
H = candidate[number]
if len(H.info['orig_idx']) > 10:
C, Gc, Call, Gall = self.coarsen(H)
C_list.append(C)
Gc_list.append(Gc)
number += 1
return candidate, C_list, Gc_list
[docs]
def process_coarsened(self, data, candidate, C_list, Gc_list):
"""
Processes the coarsened graph to extract relevant features and labels.
Parameters
----------
data : object
The original data.
candidate : list
List of candidate subgraphs.
C_list : list
List of coarsening matrices.
Gc_list : list
List of coarsened graphs.
Returns
-------
coarsen_features : torch.Tensor
The coarsened features.
coarsen_train_labels : torch.Tensor
The coarsened training labels.
coarsen_train_mask : torch.Tensor
The coarsened training mask.
coarsen_edge : torch.Tensor
The coarsened edges.
"""
train_mask = data.train_mask
val_mask = data.val_mask
n_classes = max(data.y) + 1
if self.args.setting == 'trans':
features = data.x
labels = data.y
else:
features = data.x[train_mask]
labels = data.y[train_mask]
coarsen_node = 0
number = 0
coarsen_row = None
coarsen_col = None
coarsen_features = torch.Tensor([])
coarsen_train_labels = torch.Tensor([])
coarsen_train_mask = torch.Tensor([]).bool()
while number < len(candidate):
H = candidate[number]
keep = H.info['orig_idx']
H_features = features[keep]
H_labels = labels[keep]
if self.args.setting == "trans":
H_train_mask = train_mask[keep]
else:
H_train_mask = torch.ones(size=(len(H_labels),))
if len(H.info['orig_idx']) > 10 and torch.sum(H_train_mask) > 0:
train_labels = one_hot(H_labels, n_classes) # Shape: (H_labels.shape[0], n_classes)
if self.args.setting == "trans":
train_labels[~H_train_mask] = torch.Tensor([0 for _ in range(n_classes)])
C = C_list[number]
Gc = Gc_list[number]
new_train_mask = torch.BoolTensor(np.sum(C.dot(train_labels), axis=1))
mix_label = torch.FloatTensor(C.dot(train_labels))
mix_label[mix_label > 0] = 1
mix_mask = torch.sum(mix_label, dim=1)
new_train_mask[mix_mask > 1] = False
coarsen_features = torch.cat([coarsen_features, torch.FloatTensor(C.dot(H_features))], dim=0)
coarsen_train_labels = torch.cat(
[coarsen_train_labels, torch.argmax(torch.FloatTensor(C.dot(train_labels)), dim=1).float()], dim=0)
coarsen_train_mask = torch.cat([coarsen_train_mask, new_train_mask], dim=0)
if coarsen_row is None:
coarsen_row = Gc.W.tocoo().row
coarsen_col = Gc.W.tocoo().col
else:
current_row = Gc.W.tocoo().row + coarsen_node
current_col = Gc.W.tocoo().col + coarsen_node
coarsen_row = np.concatenate([coarsen_row, current_row], axis=0)
coarsen_col = np.concatenate([coarsen_col, current_col], axis=0)
coarsen_node += Gc.W.shape[0]
print(coarsen_node)
number += 1
coarsen_edge = torch.from_numpy(np.array([coarsen_row, coarsen_col])).long()
coarsen_train_labels = coarsen_train_labels.long()
return coarsen_features, coarsen_train_labels, coarsen_train_mask, coarsen_edge