Pages

Search This Blog

Monday 17 November 2014

Implementation Of Page Rank in PHP

<?php
/*
 *
 * @Author: Zuhair Mirza
 * @Implementation Of Page Rank in PHP
 * @Date : 18-November-2014
 *
<!--
 PageRank is an algorithm used by Google Search to rank websites in their search engine results. PageRank was named after Larry Page, one of the founders of Google. PageRank is a way of measuring the importance of website pages. According to Google:PageRank works by counting the number and quality of links to a page to determine a rough estimate of how important the website   is.The underlying assumption is that more important websites are  likely to receive more links from other websites.
-->

$links = array(
    1 => array(5),
    2 => array(4, 7, 8, 9),
    3 => array(1, 3, 7),
    4 => array(1, 2, 4, 8),
    5 => array(1, 6, 7, 9),
    6 => array(1, 5),
    8 => array(3, 4, 6, 8),
    9 => array(1, 8)
);

$pageRank = calculatePageRank($links);

var_dump($pageRank);
?>

<!--If we look at this link graph, a few things stand out. Page 1 is pointed to by almost everything, so is presumably very good. Page 4 is also very popular, so should be expected to rank well. We can then calculate the PageRank by running our iterative process.

We've got a hard limit of 100 iterations, but we also keep track of the amount of change between one set of PageRanks and the next.

If that change drops below a set level (here 0.00005) the process decides it's stable enough and stops.-->

<?php

function calculatePageRank($linkGraph, $dampingFactor = 0.15) {

 
    $pageRank = array();
    $tempRank = array();
    $nodeCount = count($linkGraph);

 
    // initialise the PR as 1/n
    foreach ($linkGraph as $node => $outbound) {
        $pageRank[$node] = 1 / $nodeCount;
        $tempRank[$node] = 0;
    }


    $change = 1;
    $i = 0;
    while ($change > 0.00005 && $i < 100) {
        $change = 0;
        $i++;

        // distribute the PR of each page
        foreach ($linkGraph as $node => $outbound) {
            $outboundCount = count($outbound);
            foreach ($outbound as $link) {
                $tempRank[$link] += $pageRank[$node] / $outboundCount;
            }
        }

        $total = 0;
        // calculate the new PR using the damping factor
        foreach ($linkGraph as $node => $outbound) {
            $tempRank[$node] = ($dampingFactor / $nodeCount) + (1 - $dampingFactor) * $tempRank[$node];
            $change += abs($pageRank[$node] - $tempRank[$node]);
            $pageRank[$node] = $tempRank[$node];
            $tempRank[$node] = 0;
            $total += $pageRank[$node];
        }

        // Normalise the page ranks so it's all a proportion 0-1
        foreach ($pageRank as $node => $score) {
            $pageRank[$node] /= $total;
        }
    }

    return $pageRank;
}
?>

Sunday 16 November 2014

Find Google Page Rank in PHP

<?php

/*
 *
 * @Author: Zuhair Mirza
 * @Implementation Of Google Page Rank in PHP
 * @Date : 17-November-2014
 *
 * Google PageRank (Google PR) is one of the methods Google uses to determine a page's relevance
 * or importance. Important pages receive a higher PageRank and are more likely to appear at the top
 * of the search results. Google PageRank (PR) is a measure from 0 - 10. Google Pagerank is based
 * on backlinks.
 *
 */


$url = 'http://en.wikipedia.org/';

$pr = new GooglePageRank();
echo "$url has Google PageRank: " . $pr->get_google_pagerank($url);



class GooglePageRank {

    public function get_google_pagerank($url) {
       
        $query = "http://toolbarqueries.google.com/tbr?client=navclient-auto&ch=" . $this->CheckHash($this->HashURL($url)) . "&features=Rank&q=info:" . $url . "&num=100&filter=0";
       
        $data = file_get_contents($query);
               
        $pos = strpos($data, "Rank_");
        if ($pos === false) {
           
        } else {
            $pagerank = substr($data, $pos + 9);
            return $pagerank;
        }
    }

    public function StrToNum($Str, $Check, $Magic) {
        $Int32Unit = 4294967296; // 2^32
        $length = strlen($Str);
        for ($i = 0; $i < $length; $i++) {
            $Check *= $Magic;
            if ($Check >= $Int32Unit) {
                $Check = ($Check - $Int32Unit * (int) ($Check / $Int32Unit));
                $Check = ($Check < -2147483648) ? ($Check + $Int32Unit) : $Check;
            }
            $Check += ord($Str{$i});
        }
        return $Check;
    }

    public function HashURL($String) {
        $Check1 = $this->StrToNum($String, 0x1505, 0x21);
        $Check2 = $this->StrToNum($String, 0, 0x1003F);
        $Check1 >>= 2;
        $Check1 = (($Check1 >> 4) & 0x3FFFFC0 ) | ($Check1 & 0x3F);
        $Check1 = (($Check1 >> 4) & 0x3FFC00 ) | ($Check1 & 0x3FF);
        $Check1 = (($Check1 >> 4) & 0x3C000 ) | ($Check1 & 0x3FFF);
        $T1 = (((($Check1 & 0x3C0) << 4) | ($Check1 & 0x3C)) << 2 ) | ($Check2 & 0xF0F );
        $T2 = (((($Check1 & 0xFFFFC000) << 4) | ($Check1 & 0x3C00)) << 0xA) | ($Check2 & 0xF0F0000 );
        return ($T1 | $T2);
    }

    public function CheckHash($Hashnum) {
        $CheckByte = 0;
        $Flag = 0;
        $HashStr = sprintf('%u', $Hashnum);
        $length = strlen($HashStr);
        for ($i = $length - 1; $i >= 0; $i --) {
            $Re = $HashStr{$i};
            if (1 === ($Flag % 2)) {
                $Re += $Re;
                $Re = (int) ($Re / 10) + ($Re % 10);
            }
            $CheckByte += $Re;
            $Flag ++;
        }
        $CheckByte %= 10;
        if (0 !== $CheckByte) {
            $CheckByte = 10 - $CheckByte;
            if (1 === ($Flag % 2)) {
                if (1 === ($CheckByte % 2)) {
                    $CheckByte += 9;
                }
                $CheckByte >>= 1;
            }
        }
        return '7' . $CheckByte . $HashStr;
    }

}

?>