PHP Classes

File: crawler.php

Recommend this page to a friend!
  Classes of Juraj Puchký   PHP Google Crawler   crawler.php   Download  
File: crawler.php
Role: Example script
Content type: text/plain
Description: Example script
Class: PHP Google Crawler
Perform Google searches and get the result URLs
Author: By
Last change:
Date: 5 years ago
Size: 1,529 bytes
 

Contents

Class file image Download
<?php

require_once 'implementation/Google.php';

use
App\ContactCrawler\Google;

function
collectEmail($url) {
   
$s = new \App\ContactCrawler\Search();
   
$data = $s->getData($url);
   
libxml_use_internal_errors(true);
   
$emails = [];
   
$dom = new DOMDocument();
    @
$dom->loadHTML($data);
   
libxml_clear_errors();
   
$results = $dom->getElementsByTagName('a');
    foreach (
$results as $r) {
        if (
strstr($r->getAttribute('href'), 'mailto:')) {
           
$emails[] = str_replace("mailto:", "", $r->getAttribute('href'));
        }
    }
    return
$emails;
}

function
collectUrls($url) {
   
$s = new \App\ContactCrawler\Search();
   
$data = $s->getData($url);
   
// subpages
   
libxml_use_internal_errors(true);
   
$urls = [];
   
$dom = new DOMDocument();
    @
$dom->loadHTML($data);
   
libxml_clear_errors();
   
$results = $dom->getElementsByTagName('a');
    foreach (
$results as $r) {
       
$urls[$r->getAttribute('href')] = $r->getAttribute('href');
    }
    return
$urls;
}

if (isset(
$argv[1])) {
   
$google = new Google();
   
$urls = $google->search($argv[1], "cs", 10000);
   
$emails = [];
    foreach (
$urls as $url) {
       
$pages = collectUrls($url);
        foreach (
$pages as $page) {
           
$ems = collectEmail($page);
            foreach (
$ems as $email) {
                echo
$email . "\n";
               
$emails[$email] = $email;
            }
        }
    }
    foreach (
$emails as $email) {
        echo
"$email\n";
    }
} else {
    echo
"Help: <keyword>\n";
}