Source code for graphslim.dataset.convertor

# from deeprobust.graph.data import Dataset
from typing import Optional

import numpy as np
import torch
from pygsp import graphs
from scipy.sparse import coo_matrix
from torch_geometric.utils import to_undirected, to_dense_adj, remove_self_loops, add_self_loops
from torch_sparse import SparseTensor
import networkit as nk
from torch_geometric.data import Data, HeteroData
import dgl


[docs] def from_dgl(g, name, hetero=True): if g.is_homogeneous: data = Data() data.edge_index = torch.stack(g.edges(), dim=0) for attr, value in g.ndata.items(): data[attr] = value for attr, value in g.edata.items(): data[attr] = value return data data = HeteroData() data.name = name for node_type in g.ntypes: for attr, value in g.nodes[node_type].data.items(): data[node_type][attr] = value for edge_type in g.canonical_etypes: row, col = g.edges(form="uv", etype=edge_type) data[edge_type].edge_index = torch.stack([row, col], dim=0) for attr, value in g.edge_attr_schemes(edge_type).items(): data[edge_type][attr] = value data_out = Data() if not hetero: edge_index_list = [] for edge_type in g.canonical_etypes: edge_index_list.append(data[edge_type].edge_index) data_out.edge_index = add_self_loops(torch.cat(edge_index_list, dim=1))[0] data_out.x = data.node_stores[0]['feature'] # Features for each node # Assigning labels to data_out data_out.y = data.node_stores[0]['label'] # Labels for each node # Assuming the train, validation, and test masks are also in node_stores[0] #data_out.train_mask = data.node_stores[0]['train_mask'] # Training mask #data_out.val_mask = data.node_stores[0].get('val_mask', None) # Optional: Validation mask (if exists) #data_out.test_mask = data.node_stores[0]['test_mask'] # Test mask data_out.num_nodes = len(data_out.x) data_out.num_classes = max(data_out.y).item() + 1 return data_out
[docs] def pyg2gsp(edge_index): G = graphs.Graph(W=to_dense_adj(to_undirected(edge_index))[0]) return G
[docs] def csr2ei(adjacency_matrix_csr): adjacency_matrix_coo = adjacency_matrix_csr.tocoo() # Convert numpy arrays directly to a tensor edge_index = torch.tensor(np.vstack([adjacency_matrix_coo.row, adjacency_matrix_coo.col]), dtype=torch.long) return edge_index
[docs] def ei2csr(edge_index, num_nodes): edge_index = edge_index.numpy() scoo = coo_matrix((np.ones_like(edge_index[0]), (edge_index[0], edge_index[1])), shape=(num_nodes, num_nodes)) adjacency_matrix_csr = scoo.tocsr() return adjacency_matrix_csr
[docs] def dense2sparsetensor(mat: torch.Tensor, has_value: bool = True): if mat.dim() > 2: index = mat.abs().sum([i for i in range(2, mat.dim())]).nonzero() else: index = mat.nonzero() index = index.t() row = index[0] col = index[1] value: Optional[torch.Tensor] = None if has_value: value = mat[row, col] return SparseTensor( row=row, rowptr=None, col=col, value=value, sparse_sizes=(mat.size(0), mat.size(1)), is_sorted=True, trust_data=True, )
[docs] def networkit_to_pyg(graph): # Extract edges from Networkit graph edges = list(graph.edges()) edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous() # Check if the graph is weighted if graph.isWeighted(): edge_attr = torch.tensor([graph.weight(u, v) for u, v in edges], dtype=torch.float) else: edge_attr = None pyg_graph = Data(edge_index=edge_index, edge_attr=edge_attr) return pyg_graph
[docs] def pyg_to_networkit(pyg_graph): # Create an empty Networkit graph # if hasattr(pyg_graph, 'edge_attr') and pyg_graph.edge_attr is not None: # graph = nk.Graph(weighted=True, directed=False) # else: # graph = nk.Graph(weighted=False, directed=False) # Add edges to the Networkit graph edge_index = pyg_graph.edge_index.numpy() if hasattr(pyg_graph, 'edge_attr') and pyg_graph.edge_attr is not None: edge_attr = pyg_graph.edge_attr.numpy() graph = nk.GraphFromCoo(inputData=(edge_attr, (edge_index[0], edge_index[1])), n=pyg_graph.num_nodes, weighted=True, directed=False) else: graph = nk.GraphFromCoo(inputData=((edge_index[0], edge_index[1])), n=pyg_graph.num_nodes, weighted=False, directed=False) graph.indexEdges() return graph
[docs] def loadSparseGraph(dataset_name): """Load original graph from file from paper CHEN Y, YE H, VEDULA S, et al. Demystifying graph sparsification algorithms in graph properties preservation[M/OL]. GraphSlim package only supports undirected graph and we do not distinguish the weighted and unweighted pyg->nt->save sparsified nt->pyg->evaluation Args: dataset_name (str): dataset name config (dict): config loaded from json undirected_only (bool, optional): Set to True to override graph directness in config file and load undirected graph only. Defaults to False. This is used for sparsifiers that only support undirected graph. Returns: nk graph: original graph """ # else: # if config[dataset_name]["directed"] and config[dataset_name]["weighted"]: # originalGraph = nk.readGraph(f"../data/{dataset_name}/raw/dw.wel", nk.Format.EdgeListSpaceZero, # directed=True) # elif config[dataset_name]["directed"]: # originalGraph = nk.readGraph(f"../data/{dataset_name}/raw/duw.el", nk.Format.EdgeListSpaceZero, # directed=True) # elif config[dataset_name]["weighted"]: # originalGraph = nk.readGraph(f"../data/{dataset_name}/raw/udw.wel", nk.Format.EdgeListSpaceZero, # directed=False) # else: # originalGraph = nk.readGraph(f"../data/{dataset_name}/raw/uduw.el", nk.Format.EdgeListSpaceZero, # directed=False) nk.overview(originalGraph) nk.graph.Graph.indexEdges(originalGraph) return graph
# class Pyg2Dpr(Dataset): # def __init__(self, pyg_data, **kwargs): # try: # splits = pyg_data.get_idx_split() # except: # pass # # dataset_name = pyg_data.name # pyg_data = pyg_data[0] # n = pyg_data.num_nodes # # if dataset_name == 'ogbn-arxiv': # symmetrization # pyg_data.edge_index = to_undirected(pyg_data.edge_index, pyg_data.num_nodes) # # self.adj = sp.csr_matrix((np.ones(pyg_data.edge_index.shape[1]), # (pyg_data.edge_index[0], pyg_data.edge_index[1])), shape=(n, n)) # # self.features = pyg_data.x.numpy() # self.labels = pyg_data.y.numpy() # # if len(self.labels.shape) == 2 and self.labels.shape[1] == 1: # self.labels = self.labels.reshape(-1) # ogb-arxiv needs to reshape # # if hasattr(pyg_data, 'train_mask'): # # for fixed split # self.idx_train = mask_to_index(pyg_data.train_mask, n) # self.idx_val = mask_to_index(pyg_data.val_mask, n) # self.idx_test = mask_to_index(pyg_data.test_mask, n) # self.name = 'Pyg2Dpr' # else: # try: # # for ogb # self.idx_train = splits['train'] # self.idx_val = splits['valid'] # self.idx_test = splits['test'] # self.name = 'Pyg2Dpr' # except: # # for other datasets # self.idx_train, self.idx_val, self.idx_test = get_train_val_test( # nnodes=n, val_size=0.1, test_size=0.8, stratify=self.labels) # class Dpr2Pyg(InMemoryDataset): # # def __init__(self, dpr_data, transform=None, **kwargs): # root = 'data/' # dummy root; does not mean anything # self.dpr_data = dpr_data # super(Dpr2Pyg, self).__init__(root, transform) # pyg_data = self.process() # self.data, self.slices = self.collate([pyg_data]) # self.transform = transform # # def process(self): # dpr_data = self.dpr_data # # edge_index = torch.LongTensor(dpr_data.adj.nonzero()) # # if type(dpr_data.adj) == torch.Tensor: # # adj_selfloop = dpr_data.adj + torch.eye(dpr_data.adj.shape[0]).cuda() # # edge_index_selfloop = adj_selfloop.nonzero().T # # edge_index = edge_index_selfloop # # edge_weight = adj_selfloop[edge_index_selfloop[0], edge_index_selfloop[1]] # # else: # # adj_selfloop = dpr_data.adj + sp.eye(dpr_data.adj.shape[0]) # # edge_index = torch.LongTensor(adj_selfloop.nonzero()).cuda() # # edge_weight = torch.FloatTensor(adj_selfloop[adj_selfloop.nonzero()]).cuda() # # # by default, the features in pyg data is dense # if scipy.sparse.issparse(dpr_data.features): # x = torch.FloatTensor(dpr_data.features.todense()).float() # else: # x = torch.FloatTensor(dpr_data.features).float() # y = torch.LongTensor(dpr_data.labels) # # # try: # # x = torch.FloatTensor(dpr_data.features.cpu()).float().cuda() # # except: # # x = torch.FloatTensor(dpr_data.features).float().cuda() # # try: # # y = torch.LongTensor(dpr_data.labels.cpu()).cuda() # # except: # # y = dpr_data.labels # # data = Data(x=x, edge_index=edge_index, y=y) # data.train_mask = None # data.val_mask = None # data.test_mask = None # return data # # def get(self, idx): # data = self.data.__class__() # # if hasattr(self.data, '__num_nodes__'): # data.num_nodes = self.data.__num_nodes__[idx] # # for key in self.data.keys: # item, slices = self.data[key], self.slices[key] # s = list(repeat(slice(None), item.dim())) # s[self.data.__cat_dim__(key, item)] = slice(slices[idx], # slices[idx + 1]) # data[key] = item[s] # return data # # @property # def raw_file_names(self): # return ['some_file_1', 'some_file_2', ...] # # @property # def processed_file_names(self): # return ['data.pt'] # # def _download(self): # pass # class Data2Pyg: # # def __init__(self, data, device='cuda', transform=None, **kwargs): # self.data_train = Dpr2Pyg(data.data_train, transform=transform)[0].to(device) # self.data_val = Dpr2Pyg(data.data_val, transform=transform)[0].to(device) # self.data_test = Dpr2Pyg(data.data_test, transform=transform)[0].to(device) # self.nclass = data.nclass # self.nfeat = data.nfeat # self.class_dict = None # # def retrieve_class(self, c, num=256): # if self.class_dict is None: # self.class_dict = {} # for i in range(self.nclass): # self.class_dict['class_%s' % i] = (self.data_train.y == i).cpu().numpy() # idx = np.arange(len(self.data_train.y)) # idx = idx[self.class_dict['class_%s' % c]] # return np.random.permutation(idx)[:num]