Source code for graphslim.sparsification.cent_pagerank

import numpy as np

from graphslim.sparsification.model_free_coreset_base import MFCoreSet
from scipy.sparse import csr_matrix, diags
from numpy.linalg import norm


[docs] class CentP(MFCoreSet): # select nodes with topk PR value in each class
[docs] def select(self, embedds=None): pr = self.pagerank_algorithm() # Retrieve PageRank values, assumed to be a dictionary or array idx_selected = [] for class_id, cnt in self.num_class_dict.items(): # Get indices of nodes in the training set that belong to the current class idx = self.idx_train[self.labels_train == class_id] pr_values = pr[idx] topk_indices = np.argsort(pr_values)[-cnt:] selected = idx[topk_indices] idx_selected.append(selected) # Concatenate all selected indices into a single array return np.hstack(idx_selected)
[docs] def pagerank_algorithm(self, damping_factor=0.85, max_iterations=100, convergence_threshold=0.0001): if self.args.setting == 'ind': adj = self.data.adj_train.astype(np.uint8) else: adj = self.data.adj_full.astype(np.uint8) n = adj.shape[0] adj = csr_matrix(adj) # Calculate out-degree out_degree = np.array(adj.sum(axis=1)).flatten() out_degree[out_degree == 0] = 1 # Avoid division by zero for isolated nodes # Create transition matrix transition_matrix = adj.multiply(1.0 / out_degree[:, None]) # Initialize PageRank vector pagerank = np.ones((n, 1)) / n momentum = (1 - damping_factor) * np.ones((n, 1)) / n # Iterate to compute PageRank for i in range(max_iterations): old_pagerank = pagerank.copy() pagerank = damping_factor * (transition_matrix @ old_pagerank) + momentum if norm(pagerank - old_pagerank, ord=1) < convergence_threshold: break return pagerank.flatten()