Web Crawler dengan PHP Simple HTML DOM Parser

URL :

URL yang dimasukan akan tersimpan
Lihat URL disini. 0;$abs=preg_replace($re,'/', $abs,-1,$n)){} $abs=str_replace("../","",$abs); return $scheme.'://'.$abs; } function perfect_url($u,$b){ $bp=parse_url($b); if(($bp['path']!="/" && $bp['path']!="") || $bp['path']==''){ if($bp['scheme']==""){$scheme="http";}else{$scheme=$bp['scheme'];} $b=$scheme."://".$bp['host']."/"; } if(substr($u,0,2)=="//"){ $u="http:".$u; } if(substr($u,0,4)!="http"){ $u=rel2abs($u,$b); } return $u; } function crawl_site($u){ global $crawled_urls; $uen=urlencode($u); if((array_key_exists($uen,$crawled_urls)==0 || $crawled_urls[$uen] < date("YmdHis",strtotime('-25 seconds', time())))){ $html = file_get_html($u); $crawled_urls[$uen]=date("YmdHis"); foreach($html->find("a") as $li){ $url=perfect_url($li->href,$u); $enurl=urlencode($url); if($url!='' && substr($url,0,4)!="mail" && substr($url,0,4)!="java" && array_key_exists($enurl,$found_urls)==0){ $found_urls[$enurl]=1; echo "
  • ".$url."
  • "; } } } } if(isset($_POST['submit'])){ $url=$_POST['url']; if($url==''){ echo "

    Silahkan masukan URL

    "; }else{ $f=fopen("dataku.html","a+"); fwrite($f,"
    $url - ".date("Y-m-d H:i:s")."
    "); fclose($f); echo "

    Hasil URL yang ditemukan

    "; } } ?>