-
Notifications
You must be signed in to change notification settings - Fork 64
/
rw.c
103 lines (82 loc) · 3.03 KB
/
rw.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*!
* \file
*
* \brief Various routines that perform random-walk based operations
on graphs stored as gk_csr_t matrices.
*
* \author George Karypis
* \version\verbatim $Id: rw.c 11078 2011-11-12 00:20:44Z karypis $ \endverbatim
*/
#include <GKlib.h>
/*************************************************************************/
/*! Computes the (personalized) page-rank of the vertices in a graph.
\param mat is the matrix storing the graph.
\param lamda is the restart probability.
\param eps is the error tolerance for convergance.
\param max_niter is the maximum number of allowed iterations.
\param pr on entry stores the restart distribution of the vertices.
This allows for the computation of personalized page-rank scores
by appropriately setting that parameter.
On return, pr stores the computed page ranks.
\returns the number of iterations that were performed.
*/
/**************************************************************************/
int gk_rw_PageRank(gk_csr_t *mat, float lamda, float eps, int max_niter, float *pr)
{
ssize_t i, j, k, iter, nrows;
double *rscale, *prold, *prnew, *prtmp;
double fromsinks, error;
ssize_t *rowptr;
int *rowind;
float *rowval;
nrows = mat->nrows;
rowptr = mat->rowptr;
rowind = mat->rowind;
rowval = mat->rowval;
prold = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prnew");
prnew = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: prold");
rscale = gk_dsmalloc(nrows, 0, "gk_rw_PageRank: rscale");
/* compute the scaling factors to get adjacency weights into transition
probabilities */
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++)
rscale[i] += rowval[j];
if (rscale[i] > 0)
rscale[i] = 1.0/rscale[i];
}
/* the restart distribution is the initial pr scores */
for (i=0; i<nrows; i++)
prnew[i] = pr[i];
/* get into the PR iteration */
for (iter=0; iter<max_niter; iter++) {
gk_SWAP(prnew, prold, prtmp);
gk_dset(nrows, 0.0, prnew);
/* determine the total current PR score of the sinks so that you
can distribute them to all nodes according to the restart
distribution. */
for (fromsinks=0.0, i=0; i<nrows; i++) {
if (rscale[i] == 0)
fromsinks += prold[i];
}
/* push random-walk scores to the outlinks */
for (i=0; i<nrows; i++) {
for (j=rowptr[i]; j<rowptr[i+1]; j++)
prnew[rowind[j]] += prold[i]*rscale[i]*rowval[j];
}
/* apply the restart conditions */
for (i=0; i<nrows; i++) {
prnew[i] = lamda*(fromsinks*pr[i]+prnew[i]) + (1.0-lamda)*pr[i];
}
/* compute the error */
for (error=0.0, i=0; i<nrows; i++)
error = (fabs(prnew[i]-prold[i]) > error ? fabs(prnew[i]-prold[i]) : error);
//printf("nrm1: %le maxfabserr: %le\n", gk_dsum(nrows, prnew, 1), error);
if (error < eps)
break;
}
/* store the computed pr scores into pr for output */
for (i=0; i<nrows; i++)
pr[i] = prnew[i];
gk_free((void **)&prnew, &prold, &rscale, LTERM);
return (int)(iter+1);
}