diff --git a/include/boost/graph/personalized_page_rank.hpp b/include/boost/graph/personalized_page_rank.hpp new file mode 100644 index 000000000..dfc5fea0d --- /dev/null +++ b/include/boost/graph/personalized_page_rank.hpp @@ -0,0 +1,236 @@ +// Copyright 2026 Emmanouil Krasanakis + +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +// Authors: Emmanouil Krasanakis + +#ifndef BOOST_GRAPH_PERSONALIZED_PAGE_RANK_HPP +#define BOOST_GRAPH_PERSONALIZED_PAGE_RANK_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost +{ +namespace graph +{ + struct rank_convergence + { + explicit rank_convergence(std::size_t iters, double tol=0) : iters(iters), tol(tol) {} // allowing tolerance for early stopping + template < typename RankMap, typename RankMap2, typename Graph > + bool operator()(const RankMap& current, const RankMap2& previous, const Graph& g) + { + if (--iters == 0) + return true; + if (!tol) + return false; + using rank_type = typename property_traits< RankMap >::value_type; + rank_type sum_abs(0); + for (auto v : boost::make_iterator_range(vertices(g))) + sum_abs += std::abs(get(current, v) - get(previous, v)); + return sum_abs*num_vertices(g) + void personalized_page_rank_step( + const Graph& g, + WeightMap weight_map, + PersonalizationMap personalization_map, + RankMap from_rank, + RankMap2 to_rank, + typename property_traits< RankMap >::value_type damping, + incidence_graph_tag) + { + using rank_type = typename property_traits< RankMap >::value_type; + rank_type l1_norm(0); // Computing the norm simultaneously avoids an extra summing iteration. + + // Initialize the constant part of maps. + for (auto v : boost::make_iterator_range(vertices(g))) + { + auto v_constant = rank_type(1 - damping) * get(personalization_map, v); + put(to_rank, v, v_constant); + l1_norm += v_constant; + } + + // Maintenance comment: + for (auto u : boost::make_iterator_range(vertices(g))) + { + rank_type u_rank_factor = damping * get(from_rank, u); + rank_type l1_accumulated_norm(0); // TBD: Consider making l1_norm volatile to reduce accumulation errors. + for (auto e : boost::make_iterator_range(out_edges(u, g))) + { + auto v = target(e, g); + rank_type u_rank_out = get(weight_map, e)*u_rank_factor; + put(to_rank, v, get(to_rank, v) + u_rank_out); + l1_accumulated_norm += u_rank_out; + } + l1_norm += l1_accumulated_norm; + } + // If there are negative edge weights, or if negative damping is used, l1_norm could be zero or near-zero. + // Division in those cases is conceptually correct for floating point weights, and actually expected behavior. + // That said, such edge cases are impossible to arise for all typical algorithm uses. + for (auto v : boost::make_iterator_range(vertices(g))) + put(to_rank, v, get(to_rank, v)/l1_norm); + } + + template < + typename Graph, + typename WeightMap, + typename PersonalizationMap, + typename RankMap, + typename RankMap2 > + void personalized_page_rank_step( + const Graph& g, + WeightMap weight_map, + PersonalizationMap personalization_map, + RankMap from_rank, + RankMap2 to_rank, + typename property_traits< RankMap >::value_type damping, + bidirectional_graph_tag) + { + using damping_type = typename property_traits< RankMap >::value_type; + damping_type l1_norm(0); // Computing the norm simultaneously avoids an extra summing iteration. + for (auto v : boost::make_iterator_range(vertices(g))) + { + damping_type rank(0); + for (auto e : boost::make_iterator_range(in_edges(v, g))) + rank += get(from_rank, source(e, g))*get(weight_map, e); + auto v_score = (damping_type(1) - damping) * get(personalization_map, v) + damping * rank; + put(to_rank, v, v_score); + l1_norm += v_score; + } + // See above function for potential division by zero comments. + for (auto v : boost::make_iterator_range(vertices(g))) + put(to_rank, v, get(to_rank, v)/l1_norm); + } + } // end namespace personalized_page_rank_detail + + template < + typename Graph, + typename WeightMap, + typename PersonalizationMap, + typename RankMap, + typename Done, + typename RankMap2 > + Done personalized_page_rank( + const Graph& g, + WeightMap weight_map, + PersonalizationMap personalization_map, + RankMap rank_map, + Done done, + typename property_traits< RankMap >::value_type damping, + RankMap2 rank_map2 + BOOST_GRAPH_ENABLE_IF_MODELS_PARM(Graph, vertex_list_graph_tag)) + { + using rank_type = typename property_traits< PersonalizationMap >::value_type; + rank_type personalization_norm(0); + for (auto v : boost::make_iterator_range(vertices(g))) + personalization_norm += get(personalization_map, v); + + // TBD: This implementation couples iterators when possible under reduced L1 cache invalidation assumptions, + // but this is not necessarily the case because we may be grabbing 2x memory lanes each time to write there. + // Could investigate which pattern is faster in the future. + for (auto v : boost::make_iterator_range(vertices(g))) + { + rank_type value = get(personalization_map, v)/personalization_norm; + put(personalization_map, v, value); + put(rank_map, v, value); + } + + bool to_map_2 = true; + do + { + typedef typename graph_traits< Graph >::traversal_category category; + if (to_map_2) + personalized_page_rank_detail::personalized_page_rank_step(g, weight_map, personalization_map, rank_map, rank_map2, damping, category()); + else + personalized_page_rank_detail::personalized_page_rank_step(g, weight_map, personalization_map, rank_map2, rank_map, damping, category()); + to_map_2 = !to_map_2; + } + while ((to_map_2 && !done(rank_map, rank_map2, g)) || (!to_map_2 && !done(rank_map2, rank_map, g))); // Done may not be symmetric. + + // Now multiply the result with personalization_norm to restore the order of magnitude and store it in rank_map. + // Also restore the original personalization_map's magnitude for reuse (this is lossy up to numerical tolerance + // but leaner than making a copy). + if (!to_map_2) + { + for (auto v : boost::make_iterator_range(vertices(g))) + { + put(rank_map, v, get(rank_map2, v)*personalization_norm); + put(personalization_map, v, get(personalization_map, v)*personalization_norm); + } + } + else + { + for (auto v : boost::make_iterator_range(vertices(g))) + { + put(rank_map, v, get(rank_map, v)*personalization_norm); + put(personalization_map, v, get(personalization_map, v)*personalization_norm); + } + } + return done; + } + + template < + typename Graph, + typename WeightMap, + typename PersonalizationMap, + typename RankMap, + typename Done > + Done personalized_page_rank( + const Graph& g, + WeightMap weight_map, + PersonalizationMap personalization_map, + RankMap rank_map, + Done done, + typename property_traits< RankMap >::value_type damping) + { + using rank_type = typename property_traits< RankMap >::value_type; + std::vector< rank_type > ranks2(num_vertices(g)); + return personalized_page_rank(g, weight_map, personalization_map, rank_map, done, damping, + make_iterator_property_map(ranks2.begin(), get(vertex_index, g))); + } + + template < typename Graph, typename PersonalizationMap, typename RankMap > + rank_convergence personalized_page_rank( + const Graph& g, + PersonalizationMap personalization_map, + RankMap rank_map, + typename property_traits< RankMap >::value_type damping=0.85) + { + // This is the most traditional personalized PageRank implementation, with minimized signature. + using Edge = graph_traits::edge_descriptor; + using rank_type = typename property_traits< RankMap >::value_type; + std::vector< rank_type > ranks2(num_vertices(g)); + auto markovian_weights = make_function_property_map([&g](Edge e){ return 1.0 / out_degree(source(e, g), g); }); + return personalized_page_rank(g, + markovian_weights, + personalization_map, + rank_map, + rank_convergence(100, 1.E-9), + damping, + make_iterator_property_map(ranks2.begin(), get(vertex_index, g))); + } + +} +} // end namespace boost::graph + +#endif // BOOST_GRAPH_PERSONALIZED_PAGE_RANK_HPP diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 11cfbdaae..61f87d0e8 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -167,6 +167,7 @@ alias graph_test_regular : [ run delete_edge.cpp ] [ run johnson-test.cpp ] [ run lvalue_pmap.cpp ] + [ run personalized_pagerank_test.cpp ] ; alias graph_test_with_filesystem : : diff --git a/test/personalized_pagerank_test.cpp b/test/personalized_pagerank_test.cpp new file mode 100644 index 000000000..697aa8a5d --- /dev/null +++ b/test/personalized_pagerank_test.cpp @@ -0,0 +1,71 @@ + +#include +#include +#include +#include +#include +#include + +int main(int, char*[]) { + using namespace boost; + // deliberately hard (slow-converging) graph + using Graph = adjacency_list; + std::vector> edges = { + {0,1},{1,0},{1,2},{2,1},{2,3},{3,2}, + {4,5},{5,4},{5,6},{6,5},{6,7},{7,6},{7,8},{8,7},{8,9},{9,8},{9,10},{10,9}, + {0,3},{3,0},{1,3},{3,1},{1,4},{4,1}, + {4,6},{6,4},{6,9},{9,6},{6,8},{8,6},{7,9},{9,7},{8,10},{10,8}, + {11,10},{10,11},{10,12},{12,10} + }; + Graph g(edges.begin(), edges.end(), 13); + + std::vector ranks(num_vertices(g)); + auto rank_map = make_iterator_property_map(ranks.begin(), get(vertex_index, g)); + std::vector personalization(num_vertices(g)); + auto personalization_map = make_iterator_property_map(personalization.begin(), get(vertex_index, g)); + personalization[0] = 1; + personalization[1] = 1; + personalization[2] = 1; + personalization[3] = 1; + + std::size_t max_iters(100); // Convergence is so bad in this graph that it needs such a high cap. + using Edge = graph_traits::edge_descriptor; + auto weight = make_function_property_map( + [&g](Edge e){ return 1.0 / std::sqrt(double(out_degree(source(e, g), g) * out_degree(target(e, g), g))); }); + auto convergence1 = graph::rank_convergence(max_iters, 1.E-9); + convergence1 = graph::personalized_page_rank(g, weight, personalization_map, rank_map, convergence1, 0.9); + + std::cout << "ended after "<0); + BOOST_ASSERT(convergence1.iters( + [&g](Edge e){ return 1.0 / std::sqrt(double((1.0+out_degree(source(e, g), g)) * (1.0+out_degree(target(e, g), g)))); }); + auto convergence2 = graph::rank_convergence(max_iters, 1.E-9); + convergence2 = graph::personalized_page_rank(g, renorm_weight, personalization_map, rank_map, convergence2, -0.8); + std::cout << "ended after "<ranks[1]+0.1); + BOOST_ASSERT(ranks[8]==ranks[9]); + BOOST_ASSERT(ranks[11]==ranks[12]); + BOOST_ASSERT(ranks[11]<0); + BOOST_ASSERT(convergence2.iters