仙人掌治痔疮:PHP写的记录各大蜘蛛活动信息的脚本

来源:百度文库 编辑:中财网 时间:2024/05/08 13:14:16

PHP写的记录各大蜘蛛活动信息的脚本

[ 2006/10/29 00:22 | by sangern ] 每次下载页面时,根据$useragent判断客户端的类型,以此确定用户类型,如果是对用的蜘蛛,则把蜘蛛活动时间以及蜘蛛信息记录在文件中。

  1. <>
  2.  
  3. // $cachename:文件生成的地方和文件名,既你要放置的地方
  4.  
  5. function writefiles($cachename,$sData)
  6.  {
  7.        
  8.   
  9. $fp2 = @fopen($cachename,"w");
  10.    @
  11. flock($fp2,2);
  12.    @
  13. fwrite($fp2,$sData);
  14.    @
  15. fclose($fp2);        
  16.   
  17. $mystr="ok";
  18.    return
  19. $mystr;
  20.  }
  21.  
  22.  
  23.  function
  24. get_naps_bot()
  25.  {
  26.   
  27. $useragent = strtolower($_SERVER['HTTP_USER_AGENT']);
  28.  
  29.  
  30.    if (
  31. strpos($useragent, "googlebot") !== false){
  32.      return
  33. 'Googlebot';
  34.    }
  35.  
  36.    if (
  37. strpos($useragent, "Googlebot") !== false){
  38.      return
  39. 'Googlebot';
  40.    }
  41.  
  42.    if (
  43. strpos($useragent, "msnbot") !== false){
  44.      return
  45. "MSNbot";
  46.    }
  47.  
  48.    if (
  49. strpos($useragent, "slurp") !== false){
  50.      return
  51. "Yahoobot";
  52.    }
  53.  
  54.    if (
  55. strpos($useragent, 'baiduspider') !== false){
  56.      return
  57. "Baiduspider";
  58.    }
  59.  
  60.    if (
  61. strpos($useragent, "sohu-search") !== false){
  62.      return
  63. "Sohubot";
  64.    }
  65.  
  66.    if (
  67. strpos($useragent, "lycos") !== false){
  68.      return
  69. "Lycos";
  70.    }
  71.  
  72.    if (
  73. strpos($useragent, "robozilla") !== false){
  74.      return
  75. "Robozilla";
  76.    }
  77.  
  78.    if (
  79. strpos($useragent, "SpiderMan") !== false){
  80.      return
  81. "SpiderMan";
  82.    }
  83.  
  84.    if (
  85. strpos($useragent, "Baiduspider") !== false){
  86.      return
  87. "Baiduspider";
  88.    }
  89.  
  90.    return
  91. false;
  92.  
  93.  
  94.  }
  95.  
  96.  
  97. $tlc_thispage = addslashes($_SERVER['HTTP_USER_AGENT']);
  98.  
  99. //添加蜘蛛的抓取记录
  100.  
  101. $searchbot = get_naps_bot();
  102.  
  103.  
  104.  
  105.  if (
  106. $searchbot) {
  107.  
  108.  
  109. $BotData="";
  110.  
  111. $BotData=$searchbot.",".$tlc_thispage.",".date("Y-m-d H:i:s");
  112.  
  113. writefiles("cache/".$searchbot.date("YmdHis").".txt",$BotData);
  114.  
  115. //echo"";
  116.  
  117. }
  118.  
  119.  
  120. ?>