Source code for graphslim.coarsening.vng

import copy

import numpy as np
import torch
from sklearn.cluster import KMeans

from graphslim.dataset.utils import save_reduced
from graphslim.evaluation.utils import verbose_time_memory
from graphslim.models import GCN
from graphslim.utils import one_hot


[docs] class VNG: """ A class that implements Virtual Node Graph (VNG) reduction for coarsening graphs. Refer to paper "Serving Graph Compression for Graph Neural Networks" https://openreview.net/forum?id=T-qVtA3pAxG. Parameters ---------- setting : str Configuration setting. data : object Data object containing the graph and feature information. args : object Arguments containing various settings for the coarsening process. **kwargs : dict, optional Additional keyword arguments. """ def __init__(self, setting, data, args, **kwargs): self.setting = setting self.args = args self.device = args.device # Pass data for initialization
[docs] @verbose_time_memory def reduce(self, data, verbose=False): """ Reduces the data by applying Virtual Node Graph (VNG) method. Parameters ---------- data : object The data to be reduced. verbose : bool, optional If True, prints verbose output. Defaults to False. Returns ------- object The reduced data with synthesized adjacency, features, and labels. """ args = self.args setting = self.setting cpu_data = copy.deepcopy(data) if args.setting == 'trans': model = eval(args.condense_model)(data.feat_full.shape[1], args.hidden, data.nclass, args).to(self.device) model.fit_with_val(data, train_iters=args.eval_epochs, normadj=True, verbose=verbose, setting=args.setting, reduced=False) embeds = model.predict(data.feat_full, data.adj_full, output_layer_features=True) embeds = [embed[data.train_mask] for embed in embeds] labels = one_hot(data.labels_train, data.nclass) coarsen_edge, coarsen_features, coarsen_labels = self.vng(data.feat_full[data.train_mask], embeds, data.adj_train, labels) else: model = eval(args.condense_model)(data.feat_full.shape[1], args.hidden, data.nclass, args).to(self.device) model.fit_with_val(data, train_iters=args.eval_epochs, normadj=True, verbose=verbose, setting=args.setting, reduced=False, reindex=True) model.eval() embeds = model.predict(data.feat_train, data.adj_train, output_layer_features=True) embeds = [embed for embed in embeds] labels = one_hot(data.labels_train, data.nclass) coarsen_edge, coarsen_features, coarsen_labels = self.vng(data.feat_full[data.train_mask], embeds, data.adj_train, labels) data.adj_syn, data.feat_syn, data.labels_syn = coarsen_edge, coarsen_features, coarsen_labels save_reduced(coarsen_edge, coarsen_features, coarsen_labels, args) return data
[docs] def vng(self, X_tr_0, embeds, adj, labels, verbose=False): """ Virtual Node Graph (VNG) method to coarsen the graph. Parameters ---------- embeds : list of tensors List of embeddings. adj : tensor Adjacency matrix. labels : tensor One-hot encoded labels. verbose : bool, optional If True, prints verbose output. Defaults to False. Returns ------- tuple A tuple containing: - coarsen_edge : tensor Coarsened adjacency matrix. - coarsen_features : tensor Coarsened features. - coarsen_labels : tensor Coarsened labels. """ X_tr_head = torch.concat(embeds, dim=1).cpu().detach().numpy() X_tr_0 = X_tr_0.cpu().detach().numpy() A_tr = adj column_sum = np.sum(A_tr, axis=0) column_sum = np.asarray(column_sum).reshape(-1) for i in range(X_tr_0.shape[0]): # Prevent column_sum from being zero if column_sum[i] == 0: column_sum[i] = 1 num_coarsen_node = int(self.args.reduction_rate * X_tr_0.shape[0]) # PROPAGATION FROM VIRTUAL REPRESENTATIVE NODES TO TESTING NODES kmeans = KMeans(n_clusters=num_coarsen_node, random_state=2024, n_init=1) kmeans.fit(X_tr_head, sample_weight=column_sum) E = np.zeros((num_coarsen_node, X_tr_0.shape[0])) M = np.zeros((num_coarsen_node, X_tr_0.shape[0])) for i in range(X_tr_0.shape[0]): cluster_label = kmeans.labels_[i] E[cluster_label, i] = column_sum[i] M[cluster_label, i] = 1 row_sums = E.sum(axis=1) E = E / row_sums[:, np.newaxis] X_vr_0 = E @ X_tr_0 if verbose: print("E: ", E.shape) print("X_vr_0: ", X_vr_0.shape) # PROPAGATION BETWEEN VIRTUAL REPRESENTATIVE NODES P = E @ X_tr_head Q = E @ A_tr @ X_tr_head Up, Sp, Vtp = np.linalg.svd(P, full_matrices=False) A_vr = Q @ Vtp.T @ np.diag(1 / Sp) @ Up.T if verbose: print("P: ", P.shape) print("Q: ", Q.shape) print("Up: ", Up.shape) print("Sp: ", Sp.shape) print("Vtp: ", Vtp.shape) print("Vp: ", Vtp.T.shape) print("A_vr Sparsity", len(np.nonzero(A_vr)[0]) / A_vr.shape[0] ** 2) print("A_vr: ", A_vr.shape) coarsen_features = torch.from_numpy(X_vr_0).float() coarsen_edge = torch.from_numpy(A_vr).float() coarsen_labels = torch.argmax(torch.FloatTensor(M.dot(labels)), dim=1).long() return coarsen_edge, coarsen_features, coarsen_labels