-
+ 36F6FBA56BFEE3416CFB78E50F60D228FCDC2FFDC078EA08BC1553501427E2A227889A82406A65499CE0AD2D6EF1EE4E2BB12CD447CF43E656D3D06C961477E6
mp-wp/wp-includes/class-snoopy.php
(0 . 0)(1 . 1250)
80278 <?php
80279 if ( !in_array('Snoopy', get_declared_classes() ) ) :
80280 /*************************************************
80281
80282 Snoopy - the PHP net client
80283 Author: Monte Ohrt <monte@ispi.net>
80284 Copyright (c): 1999-2008 New Digital Group, all rights reserved
80285 Version: 1.2.4
80286
80287 * This library is free software; you can redistribute it and/or
80288 * modify it under the terms of the GNU Lesser General Public
80289 * License as published by the Free Software Foundation; either
80290 * version 2.1 of the License, or (at your option) any later version.
80291 *
80292 * This library is distributed in the hope that it will be useful,
80293 * but WITHOUT ANY WARRANTY; without even the implied warranty of
80294 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
80295 * Lesser General Public License for more details.
80296 *
80297 * You should have received a copy of the GNU Lesser General Public
80298 * License along with this library; if not, write to the Free Software
80299 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
80300
80301 You may contact the author of Snoopy by e-mail at:
80302 monte@ohrt.com
80303
80304 The latest version of Snoopy can be obtained from:
80305 http://snoopy.sourceforge.net/
80306
80307 *************************************************/
80308
80309 class Snoopy
80310 {
80311 /**** Public variables ****/
80312
80313 /* user definable vars */
80314
80315 var $host = "www.php.net"; // host name we are connecting to
80316 var $port = 80; // port we are connecting to
80317 var $proxy_host = ""; // proxy host to use
80318 var $proxy_port = ""; // proxy port to use
80319 var $proxy_user = ""; // proxy user to use
80320 var $proxy_pass = ""; // proxy password to use
80321
80322 var $agent = "Snoopy v1.2.4"; // agent we masquerade as
80323 var $referer = ""; // referer info to pass
80324 var $cookies = array(); // array of cookies to pass
80325 // $cookies["username"]="joe";
80326 var $rawheaders = array(); // array of raw headers to send
80327 // $rawheaders["Content-type"]="text/html";
80328
80329 var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
80330 var $lastredirectaddr = ""; // contains address of last redirected address
80331 var $offsiteok = true; // allows redirection off-site
80332 var $maxframes = 0; // frame content depth maximum. 0 = disallow
80333 var $expandlinks = true; // expand links to fully qualified URLs.
80334 // this only applies to fetchlinks()
80335 // submitlinks(), and submittext()
80336 var $passcookies = true; // pass set cookies back through redirects
80337 // NOTE: this currently does not respect
80338 // dates, domains or paths.
80339
80340 var $user = ""; // user for http authentication
80341 var $pass = ""; // password for http authentication
80342
80343 // http accept types
80344 var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
80345
80346 var $results = ""; // where the content is put
80347
80348 var $error = ""; // error messages sent here
80349 var $response_code = ""; // response code returned from server
80350 var $headers = array(); // headers returned from server sent here
80351 var $maxlength = 500000; // max return data length (body)
80352 var $read_timeout = 0; // timeout on read operations, in seconds
80353 // supported only since PHP 4 Beta 4
80354 // set to 0 to disallow timeouts
80355 var $timed_out = false; // if a read operation timed out
80356 var $status = 0; // http request status
80357
80358 var $temp_dir = "/tmp"; // temporary directory that the webserver
80359 // has permission to write to.
80360 // under Windows, this should be C:\temp
80361
80362 var $curl_path = "/usr/local/bin/curl";
80363 // Snoopy will use cURL for fetching
80364 // SSL content if a full system path to
80365 // the cURL binary is supplied here.
80366 // set to false if you do not have
80367 // cURL installed. See http://curl.haxx.se
80368 // for details on installing cURL.
80369 // Snoopy does *not* use the cURL
80370 // library functions built into php,
80371 // as these functions are not stable
80372 // as of this Snoopy release.
80373
80374 /**** Private variables ****/
80375
80376 var $_maxlinelen = 4096; // max line length (headers)
80377
80378 var $_httpmethod = "GET"; // default http request method
80379 var $_httpversion = "HTTP/1.0"; // default http request version
80380 var $_submit_method = "POST"; // default submit method
80381 var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
80382 var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
80383 var $_redirectaddr = false; // will be set if page fetched is a redirect
80384 var $_redirectdepth = 0; // increments on an http redirect
80385 var $_frameurls = array(); // frame src urls
80386 var $_framedepth = 0; // increments on frame depth
80387
80388 var $_isproxy = false; // set if using a proxy server
80389 var $_fp_timeout = 30; // timeout for socket connection
80390
80391 /*======================================================================*\
80392 Function: fetch
80393 Purpose: fetch the contents of a web page
80394 (and possibly other protocols in the
80395 future like ftp, nntp, gopher, etc.)
80396 Input: $URI the location of the page to fetch
80397 Output: $this->results the output text from the fetch
80398 \*======================================================================*/
80399
80400 function fetch($URI)
80401 {
80402
80403 //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
80404 $URI_PARTS = parse_url($URI);
80405 if (!empty($URI_PARTS["user"]))
80406 $this->user = $URI_PARTS["user"];
80407 if (!empty($URI_PARTS["pass"]))
80408 $this->pass = $URI_PARTS["pass"];
80409 if (empty($URI_PARTS["query"]))
80410 $URI_PARTS["query"] = '';
80411 if (empty($URI_PARTS["path"]))
80412 $URI_PARTS["path"] = '';
80413
80414 switch(strtolower($URI_PARTS["scheme"]))
80415 {
80416 case "http":
80417 $this->host = $URI_PARTS["host"];
80418 if(!empty($URI_PARTS["port"]))
80419 $this->port = $URI_PARTS["port"];
80420 if($this->_connect($fp))
80421 {
80422 if($this->_isproxy)
80423 {
80424 // using proxy, send entire URI
80425 $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
80426 }
80427 else
80428 {
80429 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
80430 // no proxy, send only the path
80431 $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
80432 }
80433
80434 $this->_disconnect($fp);
80435
80436 if($this->_redirectaddr)
80437 {
80438 /* url was redirected, check if we've hit the max depth */
80439 if($this->maxredirs > $this->_redirectdepth)
80440 {
80441 // only follow redirect if it's on this site, or offsiteok is true
80442 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
80443 {
80444 /* follow the redirect */
80445 $this->_redirectdepth++;
80446 $this->lastredirectaddr=$this->_redirectaddr;
80447 $this->fetch($this->_redirectaddr);
80448 }
80449 }
80450 }
80451
80452 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
80453 {
80454 $frameurls = $this->_frameurls;
80455 $this->_frameurls = array();
80456
80457 while(list(,$frameurl) = each($frameurls))
80458 {
80459 if($this->_framedepth < $this->maxframes)
80460 {
80461 $this->fetch($frameurl);
80462 $this->_framedepth++;
80463 }
80464 else
80465 break;
80466 }
80467 }
80468 }
80469 else
80470 {
80471 return false;
80472 }
80473 return true;
80474 break;
80475 case "https":
80476 if(!$this->curl_path)
80477 return false;
80478 if(function_exists("is_executable"))
80479 if (!is_executable($this->curl_path))
80480 return false;
80481 $this->host = $URI_PARTS["host"];
80482 if(!empty($URI_PARTS["port"]))
80483 $this->port = $URI_PARTS["port"];
80484 if($this->_isproxy)
80485 {
80486 // using proxy, send entire URI
80487 $this->_httpsrequest($URI,$URI,$this->_httpmethod);
80488 }
80489 else
80490 {
80491 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
80492 // no proxy, send only the path
80493 $this->_httpsrequest($path, $URI, $this->_httpmethod);
80494 }
80495
80496 if($this->_redirectaddr)
80497 {
80498 /* url was redirected, check if we've hit the max depth */
80499 if($this->maxredirs > $this->_redirectdepth)
80500 {
80501 // only follow redirect if it's on this site, or offsiteok is true
80502 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
80503 {
80504 /* follow the redirect */
80505 $this->_redirectdepth++;
80506 $this->lastredirectaddr=$this->_redirectaddr;
80507 $this->fetch($this->_redirectaddr);
80508 }
80509 }
80510 }
80511
80512 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
80513 {
80514 $frameurls = $this->_frameurls;
80515 $this->_frameurls = array();
80516
80517 while(list(,$frameurl) = each($frameurls))
80518 {
80519 if($this->_framedepth < $this->maxframes)
80520 {
80521 $this->fetch($frameurl);
80522 $this->_framedepth++;
80523 }
80524 else
80525 break;
80526 }
80527 }
80528 return true;
80529 break;
80530 default:
80531 // not a valid protocol
80532 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
80533 return false;
80534 break;
80535 }
80536 return true;
80537 }
80538
80539 /*======================================================================*\
80540 Function: submit
80541 Purpose: submit an http form
80542 Input: $URI the location to post the data
80543 $formvars the formvars to use.
80544 format: $formvars["var"] = "val";
80545 $formfiles an array of files to submit
80546 format: $formfiles["var"] = "/dir/filename.ext";
80547 Output: $this->results the text output from the post
80548 \*======================================================================*/
80549
80550 function submit($URI, $formvars="", $formfiles="")
80551 {
80552 unset($postdata);
80553
80554 $postdata = $this->_prepare_post_body($formvars, $formfiles);
80555
80556 $URI_PARTS = parse_url($URI);
80557 if (!empty($URI_PARTS["user"]))
80558 $this->user = $URI_PARTS["user"];
80559 if (!empty($URI_PARTS["pass"]))
80560 $this->pass = $URI_PARTS["pass"];
80561 if (empty($URI_PARTS["query"]))
80562 $URI_PARTS["query"] = '';
80563 if (empty($URI_PARTS["path"]))
80564 $URI_PARTS["path"] = '';
80565
80566 switch(strtolower($URI_PARTS["scheme"]))
80567 {
80568 case "http":
80569 $this->host = $URI_PARTS["host"];
80570 if(!empty($URI_PARTS["port"]))
80571 $this->port = $URI_PARTS["port"];
80572 if($this->_connect($fp))
80573 {
80574 if($this->_isproxy)
80575 {
80576 // using proxy, send entire URI
80577 $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
80578 }
80579 else
80580 {
80581 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
80582 // no proxy, send only the path
80583 $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
80584 }
80585
80586 $this->_disconnect($fp);
80587
80588 if($this->_redirectaddr)
80589 {
80590 /* url was redirected, check if we've hit the max depth */
80591 if($this->maxredirs > $this->_redirectdepth)
80592 {
80593 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
80594 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
80595
80596 // only follow redirect if it's on this site, or offsiteok is true
80597 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
80598 {
80599 /* follow the redirect */
80600 $this->_redirectdepth++;
80601 $this->lastredirectaddr=$this->_redirectaddr;
80602 if( strpos( $this->_redirectaddr, "?" ) > 0 )
80603 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
80604 else
80605 $this->submit($this->_redirectaddr,$formvars, $formfiles);
80606 }
80607 }
80608 }
80609
80610 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
80611 {
80612 $frameurls = $this->_frameurls;
80613 $this->_frameurls = array();
80614
80615 while(list(,$frameurl) = each($frameurls))
80616 {
80617 if($this->_framedepth < $this->maxframes)
80618 {
80619 $this->fetch($frameurl);
80620 $this->_framedepth++;
80621 }
80622 else
80623 break;
80624 }
80625 }
80626
80627 }
80628 else
80629 {
80630 return false;
80631 }
80632 return true;
80633 break;
80634 case "https":
80635 if(!$this->curl_path)
80636 return false;
80637 if(function_exists("is_executable"))
80638 if (!is_executable($this->curl_path))
80639 return false;
80640 $this->host = $URI_PARTS["host"];
80641 if(!empty($URI_PARTS["port"]))
80642 $this->port = $URI_PARTS["port"];
80643 if($this->_isproxy)
80644 {
80645 // using proxy, send entire URI
80646 $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
80647 }
80648 else
80649 {
80650 $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
80651 // no proxy, send only the path
80652 $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
80653 }
80654
80655 if($this->_redirectaddr)
80656 {
80657 /* url was redirected, check if we've hit the max depth */
80658 if($this->maxredirs > $this->_redirectdepth)
80659 {
80660 if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
80661 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
80662
80663 // only follow redirect if it's on this site, or offsiteok is true
80664 if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
80665 {
80666 /* follow the redirect */
80667 $this->_redirectdepth++;
80668 $this->lastredirectaddr=$this->_redirectaddr;
80669 if( strpos( $this->_redirectaddr, "?" ) > 0 )
80670 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
80671 else
80672 $this->submit($this->_redirectaddr,$formvars, $formfiles);
80673 }
80674 }
80675 }
80676
80677 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
80678 {
80679 $frameurls = $this->_frameurls;
80680 $this->_frameurls = array();
80681
80682 while(list(,$frameurl) = each($frameurls))
80683 {
80684 if($this->_framedepth < $this->maxframes)
80685 {
80686 $this->fetch($frameurl);
80687 $this->_framedepth++;
80688 }
80689 else
80690 break;
80691 }
80692 }
80693 return true;
80694 break;
80695
80696 default:
80697 // not a valid protocol
80698 $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
80699 return false;
80700 break;
80701 }
80702 return true;
80703 }
80704
80705 /*======================================================================*\
80706 Function: fetchlinks
80707 Purpose: fetch the links from a web page
80708 Input: $URI where you are fetching from
80709 Output: $this->results an array of the URLs
80710 \*======================================================================*/
80711
80712 function fetchlinks($URI)
80713 {
80714 if ($this->fetch($URI))
80715 {
80716 if($this->lastredirectaddr)
80717 $URI = $this->lastredirectaddr;
80718 if(is_array($this->results))
80719 {
80720 for($x=0;$x<count($this->results);$x++)
80721 $this->results[$x] = $this->_striplinks($this->results[$x]);
80722 }
80723 else
80724 $this->results = $this->_striplinks($this->results);
80725
80726 if($this->expandlinks)
80727 $this->results = $this->_expandlinks($this->results, $URI);
80728 return true;
80729 }
80730 else
80731 return false;
80732 }
80733
80734 /*======================================================================*\
80735 Function: fetchform
80736 Purpose: fetch the form elements from a web page
80737 Input: $URI where you are fetching from
80738 Output: $this->results the resulting html form
80739 \*======================================================================*/
80740
80741 function fetchform($URI)
80742 {
80743
80744 if ($this->fetch($URI))
80745 {
80746
80747 if(is_array($this->results))
80748 {
80749 for($x=0;$x<count($this->results);$x++)
80750 $this->results[$x] = $this->_stripform($this->results[$x]);
80751 }
80752 else
80753 $this->results = $this->_stripform($this->results);
80754
80755 return true;
80756 }
80757 else
80758 return false;
80759 }
80760
80761
80762 /*======================================================================*\
80763 Function: fetchtext
80764 Purpose: fetch the text from a web page, stripping the links
80765 Input: $URI where you are fetching from
80766 Output: $this->results the text from the web page
80767 \*======================================================================*/
80768
80769 function fetchtext($URI)
80770 {
80771 if($this->fetch($URI))
80772 {
80773 if(is_array($this->results))
80774 {
80775 for($x=0;$x<count($this->results);$x++)
80776 $this->results[$x] = $this->_striptext($this->results[$x]);
80777 }
80778 else
80779 $this->results = $this->_striptext($this->results);
80780 return true;
80781 }
80782 else
80783 return false;
80784 }
80785
80786 /*======================================================================*\
80787 Function: submitlinks
80788 Purpose: grab links from a form submission
80789 Input: $URI where you are submitting from
80790 Output: $this->results an array of the links from the post
80791 \*======================================================================*/
80792
80793 function submitlinks($URI, $formvars="", $formfiles="")
80794 {
80795 if($this->submit($URI,$formvars, $formfiles))
80796 {
80797 if($this->lastredirectaddr)
80798 $URI = $this->lastredirectaddr;
80799 if(is_array($this->results))
80800 {
80801 for($x=0;$x<count($this->results);$x++)
80802 {
80803 $this->results[$x] = $this->_striplinks($this->results[$x]);
80804 if($this->expandlinks)
80805 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
80806 }
80807 }
80808 else
80809 {
80810 $this->results = $this->_striplinks($this->results);
80811 if($this->expandlinks)
80812 $this->results = $this->_expandlinks($this->results,$URI);
80813 }
80814 return true;
80815 }
80816 else
80817 return false;
80818 }
80819
80820 /*======================================================================*\
80821 Function: submittext
80822 Purpose: grab text from a form submission
80823 Input: $URI where you are submitting from
80824 Output: $this->results the text from the web page
80825 \*======================================================================*/
80826
80827 function submittext($URI, $formvars = "", $formfiles = "")
80828 {
80829 if($this->submit($URI,$formvars, $formfiles))
80830 {
80831 if($this->lastredirectaddr)
80832 $URI = $this->lastredirectaddr;
80833 if(is_array($this->results))
80834 {
80835 for($x=0;$x<count($this->results);$x++)
80836 {
80837 $this->results[$x] = $this->_striptext($this->results[$x]);
80838 if($this->expandlinks)
80839 $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
80840 }
80841 }
80842 else
80843 {
80844 $this->results = $this->_striptext($this->results);
80845 if($this->expandlinks)
80846 $this->results = $this->_expandlinks($this->results,$URI);
80847 }
80848 return true;
80849 }
80850 else
80851 return false;
80852 }
80853
80854
80855
80856 /*======================================================================*\
80857 Function: set_submit_multipart
80858 Purpose: Set the form submission content type to
80859 multipart/form-data
80860 \*======================================================================*/
80861 function set_submit_multipart()
80862 {
80863 $this->_submit_type = "multipart/form-data";
80864 }
80865
80866
80867 /*======================================================================*\
80868 Function: set_submit_normal
80869 Purpose: Set the form submission content type to
80870 application/x-www-form-urlencoded
80871 \*======================================================================*/
80872 function set_submit_normal()
80873 {
80874 $this->_submit_type = "application/x-www-form-urlencoded";
80875 }
80876
80877
80878
80879
80880 /*======================================================================*\
80881 Private functions
80882 \*======================================================================*/
80883
80884
80885 /*======================================================================*\
80886 Function: _striplinks
80887 Purpose: strip the hyperlinks from an html document
80888 Input: $document document to strip.
80889 Output: $match an array of the links
80890 \*======================================================================*/
80891
80892 function _striplinks($document)
80893 {
80894 preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
80895 ([\"\'])? # find single or double quote
80896 (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
80897 # quote, otherwise match up to next space
80898 'isx",$document,$links);
80899
80900
80901 // catenate the non-empty matches from the conditional subpattern
80902
80903 while(list($key,$val) = each($links[2]))
80904 {
80905 if(!empty($val))
80906 $match[] = $val;
80907 }
80908
80909 while(list($key,$val) = each($links[3]))
80910 {
80911 if(!empty($val))
80912 $match[] = $val;
80913 }
80914
80915 // return the links
80916 return $match;
80917 }
80918
80919 /*======================================================================*\
80920 Function: _stripform
80921 Purpose: strip the form elements from an html document
80922 Input: $document document to strip.
80923 Output: $match an array of the links
80924 \*======================================================================*/
80925
80926 function _stripform($document)
80927 {
80928 preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
80929
80930 // catenate the matches
80931 $match = implode("\r\n",$elements[0]);
80932
80933 // return the links
80934 return $match;
80935 }
80936
80937
80938
80939 /*======================================================================*\
80940 Function: _striptext
80941 Purpose: strip the text from an html document
80942 Input: $document document to strip.
80943 Output: $text the resulting text
80944 \*======================================================================*/
80945
80946 function _striptext($document)
80947 {
80948
80949 // I didn't use preg eval (//e) since that is only available in PHP 4.0.
80950 // so, list your entities one by one here. I included some of the
80951 // more common ones.
80952
80953 $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
80954 "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
80955 "'([\r\n])[\s]+'", // strip out white space
80956 "'&(quot|#34|#034|#x22);'i", // replace html entities
80957 "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
80958 "'&(lt|#60|#060|#x3c);'i",
80959 "'&(gt|#62|#062|#x3e);'i",
80960 "'&(nbsp|#160|#xa0);'i",
80961 "'&(iexcl|#161);'i",
80962 "'&(cent|#162);'i",
80963 "'&(pound|#163);'i",
80964 "'&(copy|#169);'i",
80965 "'&(reg|#174);'i",
80966 "'&(deg|#176);'i",
80967 "'&(#39|#039|#x27);'",
80968 "'&(euro|#8364);'i", // europe
80969 "'&a(uml|UML);'", // german
80970 "'&o(uml|UML);'",
80971 "'&u(uml|UML);'",
80972 "'&A(uml|UML);'",
80973 "'&O(uml|UML);'",
80974 "'&U(uml|UML);'",
80975 "'ß'i",
80976 );
80977 $replace = array( "",
80978 "",
80979 "\\1",
80980 "\"",
80981 "&",
80982 "<",
80983 ">",
80984 " ",
80985 chr(161),
80986 chr(162),
80987 chr(163),
80988 chr(169),
80989 chr(174),
80990 chr(176),
80991 chr(39),
80992 chr(128),
80993 "",
80994 "",
80995 "",
80996 "",
80997 "",
80998 "",
80999 "",
81000 );
81001
81002 $text = preg_replace($search,$replace,$document);
81003
81004 return $text;
81005 }
81006
81007 /*======================================================================*\
81008 Function: _expandlinks
81009 Purpose: expand each link into a fully qualified URL
81010 Input: $links the links to qualify
81011 $URI the full URI to get the base from
81012 Output: $expandedLinks the expanded links
81013 \*======================================================================*/
81014
81015 function _expandlinks($links,$URI)
81016 {
81017
81018 preg_match("/^[^\?]+/",$URI,$match);
81019
81020 $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
81021 $match = preg_replace("|/$|","",$match);
81022 $match_part = parse_url($match);
81023 $match_root =
81024 $match_part["scheme"]."://".$match_part["host"];
81025
81026 $search = array( "|^http://".preg_quote($this->host)."|i",
81027 "|^(\/)|i",
81028 "|^(?!http://)(?!mailto:)|i",
81029 "|/\./|",
81030 "|/[^\/]+/\.\./|"
81031 );
81032
81033 $replace = array( "",
81034 $match_root."/",
81035 $match."/",
81036 "/",
81037 "/"
81038 );
81039
81040 $expandedLinks = preg_replace($search,$replace,$links);
81041
81042 return $expandedLinks;
81043 }
81044
81045 /*======================================================================*\
81046 Function: _httprequest
81047 Purpose: go get the http data from the server
81048 Input: $url the url to fetch
81049 $fp the current open file pointer
81050 $URI the full URI
81051 $body body contents to send if any (POST)
81052 Output:
81053 \*======================================================================*/
81054
81055 function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
81056 {
81057 $cookie_headers = '';
81058 if($this->passcookies && $this->_redirectaddr)
81059 $this->setcookies();
81060
81061 $URI_PARTS = parse_url($URI);
81062 if(empty($url))
81063 $url = "/";
81064 $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
81065 if(!empty($this->agent))
81066 $headers .= "User-Agent: ".$this->agent."\r\n";
81067 if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
81068 $headers .= "Host: ".$this->host;
81069 if(!empty($this->port) && $this->port != 80)
81070 $headers .= ":".$this->port;
81071 $headers .= "\r\n";
81072 }
81073 if(!empty($this->accept))
81074 $headers .= "Accept: ".$this->accept."\r\n";
81075 if(!empty($this->referer))
81076 $headers .= "Referer: ".$this->referer."\r\n";
81077 if(!empty($this->cookies))
81078 {
81079 if(!is_array($this->cookies))
81080 $this->cookies = (array)$this->cookies;
81081
81082 reset($this->cookies);
81083 if ( count($this->cookies) > 0 ) {
81084 $cookie_headers .= 'Cookie: ';
81085 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
81086 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
81087 }
81088 $headers .= substr($cookie_headers,0,-2) . "\r\n";
81089 }
81090 }
81091 if(!empty($this->rawheaders))
81092 {
81093 if(!is_array($this->rawheaders))
81094 $this->rawheaders = (array)$this->rawheaders;
81095 while(list($headerKey,$headerVal) = each($this->rawheaders))
81096 $headers .= $headerKey.": ".$headerVal."\r\n";
81097 }
81098 if(!empty($content_type)) {
81099 $headers .= "Content-type: $content_type";
81100 if ($content_type == "multipart/form-data")
81101 $headers .= "; boundary=".$this->_mime_boundary;
81102 $headers .= "\r\n";
81103 }
81104 if(!empty($body))
81105 $headers .= "Content-length: ".strlen($body)."\r\n";
81106 if(!empty($this->user) || !empty($this->pass))
81107 $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
81108
81109 //add proxy auth headers
81110 if(!empty($this->proxy_user))
81111 $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
81112
81113
81114 $headers .= "\r\n";
81115
81116 // set the read timeout if needed
81117 if ($this->read_timeout > 0)
81118 socket_set_timeout($fp, $this->read_timeout);
81119 $this->timed_out = false;
81120
81121 fwrite($fp,$headers.$body,strlen($headers.$body));
81122
81123 $this->_redirectaddr = false;
81124 unset($this->headers);
81125
81126 while($currentHeader = fgets($fp,$this->_maxlinelen))
81127 {
81128 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
81129 {
81130 $this->status=-100;
81131 return false;
81132 }
81133
81134 if($currentHeader == "\r\n")
81135 break;
81136
81137 // if a header begins with Location: or URI:, set the redirect
81138 if(preg_match("/^(Location:|URI:)/i",$currentHeader))
81139 {
81140 // get URL portion of the redirect
81141 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
81142 // look for :// in the Location header to see if hostname is included
81143 if(!preg_match("|\:\/\/|",$matches[2]))
81144 {
81145 // no host in the path, so prepend
81146 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
81147 // eliminate double slash
81148 if(!preg_match("|^/|",$matches[2]))
81149 $this->_redirectaddr .= "/".$matches[2];
81150 else
81151 $this->_redirectaddr .= $matches[2];
81152 }
81153 else
81154 $this->_redirectaddr = $matches[2];
81155 }
81156
81157 if(preg_match("|^HTTP/|",$currentHeader))
81158 {
81159 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
81160 {
81161 $this->status= $status[1];
81162 }
81163 $this->response_code = $currentHeader;
81164 }
81165
81166 $this->headers[] = $currentHeader;
81167 }
81168
81169 $results = '';
81170 do {
81171 $_data = fread($fp, $this->maxlength);
81172 if (strlen($_data) == 0) {
81173 break;
81174 }
81175 $results .= $_data;
81176 } while(true);
81177
81178 if ($this->read_timeout > 0 && $this->_check_timeout($fp))
81179 {
81180 $this->status=-100;
81181 return false;
81182 }
81183
81184 // check if there is a a redirect meta tag
81185
81186 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
81187
81188 {
81189 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
81190 }
81191
81192 // have we hit our frame depth and is there frame src to fetch?
81193 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
81194 {
81195 $this->results[] = $results;
81196 for($x=0; $x<count($match[1]); $x++)
81197 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
81198 }
81199 // have we already fetched framed content?
81200 elseif(is_array($this->results))
81201 $this->results[] = $results;
81202 // no framed content
81203 else
81204 $this->results = $results;
81205
81206 return true;
81207 }
81208
81209 /*======================================================================*\
81210 Function: _httpsrequest
81211 Purpose: go get the https data from the server using curl
81212 Input: $url the url to fetch
81213 $URI the full URI
81214 $body body contents to send if any (POST)
81215 Output:
81216 \*======================================================================*/
81217
81218 function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
81219 {
81220 if($this->passcookies && $this->_redirectaddr)
81221 $this->setcookies();
81222
81223 $headers = array();
81224
81225 $URI_PARTS = parse_url($URI);
81226 if(empty($url))
81227 $url = "/";
81228 // GET ... header not needed for curl
81229 //$headers[] = $http_method." ".$url." ".$this->_httpversion;
81230 if(!empty($this->agent))
81231 $headers[] = "User-Agent: ".$this->agent;
81232 if(!empty($this->host))
81233 if(!empty($this->port))
81234 $headers[] = "Host: ".$this->host.":".$this->port;
81235 else
81236 $headers[] = "Host: ".$this->host;
81237 if(!empty($this->accept))
81238 $headers[] = "Accept: ".$this->accept;
81239 if(!empty($this->referer))
81240 $headers[] = "Referer: ".$this->referer;
81241 if(!empty($this->cookies))
81242 {
81243 if(!is_array($this->cookies))
81244 $this->cookies = (array)$this->cookies;
81245
81246 reset($this->cookies);
81247 if ( count($this->cookies) > 0 ) {
81248 $cookie_str = 'Cookie: ';
81249 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
81250 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
81251 }
81252 $headers[] = substr($cookie_str,0,-2);
81253 }
81254 }
81255 if(!empty($this->rawheaders))
81256 {
81257 if(!is_array($this->rawheaders))
81258 $this->rawheaders = (array)$this->rawheaders;
81259 while(list($headerKey,$headerVal) = each($this->rawheaders))
81260 $headers[] = $headerKey.": ".$headerVal;
81261 }
81262 if(!empty($content_type)) {
81263 if ($content_type == "multipart/form-data")
81264 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
81265 else
81266 $headers[] = "Content-type: $content_type";
81267 }
81268 if(!empty($body))
81269 $headers[] = "Content-length: ".strlen($body);
81270 if(!empty($this->user) || !empty($this->pass))
81271 $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
81272
81273 for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
81274 $safer_header = strtr( $headers[$curr_header], "\"", " " );
81275 $cmdline_params .= " -H \"".$safer_header."\"";
81276 }
81277
81278 if(!empty($body))
81279 $cmdline_params .= " -d \"$body\"";
81280
81281 if($this->read_timeout > 0)
81282 $cmdline_params .= " -m ".$this->read_timeout;
81283
81284 $headerfile = tempnam($temp_dir, "sno");
81285
81286 exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
81287
81288 if($return)
81289 {
81290 $this->error = "Error: cURL could not retrieve the document, error $return.";
81291 return false;
81292 }
81293
81294
81295 $results = implode("\r\n",$results);
81296
81297 $result_headers = file("$headerfile");
81298
81299 $this->_redirectaddr = false;
81300 unset($this->headers);
81301
81302 for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
81303 {
81304
81305 // if a header begins with Location: or URI:, set the redirect
81306 if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
81307 {
81308 // get URL portion of the redirect
81309 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
81310 // look for :// in the Location header to see if hostname is included
81311 if(!preg_match("|\:\/\/|",$matches[2]))
81312 {
81313 // no host in the path, so prepend
81314 $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
81315 // eliminate double slash
81316 if(!preg_match("|^/|",$matches[2]))
81317 $this->_redirectaddr .= "/".$matches[2];
81318 else
81319 $this->_redirectaddr .= $matches[2];
81320 }
81321 else
81322 $this->_redirectaddr = $matches[2];
81323 }
81324
81325 if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
81326 $this->response_code = $result_headers[$currentHeader];
81327
81328 $this->headers[] = $result_headers[$currentHeader];
81329 }
81330
81331 // check if there is a a redirect meta tag
81332
81333 if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
81334 {
81335 $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
81336 }
81337
81338 // have we hit our frame depth and is there frame src to fetch?
81339 if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
81340 {
81341 $this->results[] = $results;
81342 for($x=0; $x<count($match[1]); $x++)
81343 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
81344 }
81345 // have we already fetched framed content?
81346 elseif(is_array($this->results))
81347 $this->results[] = $results;
81348 // no framed content
81349 else
81350 $this->results = $results;
81351
81352 unlink("$headerfile");
81353
81354 return true;
81355 }
81356
81357 /*======================================================================*\
81358 Function: setcookies()
81359 Purpose: set cookies for a redirection
81360 \*======================================================================*/
81361
81362 function setcookies()
81363 {
81364 for($x=0; $x<count($this->headers); $x++)
81365 {
81366 if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
81367 $this->cookies[$match[1]] = urldecode($match[2]);
81368 }
81369 }
81370
81371
81372 /*======================================================================*\
81373 Function: _check_timeout
81374 Purpose: checks whether timeout has occurred
81375 Input: $fp file pointer
81376 \*======================================================================*/
81377
81378 function _check_timeout($fp)
81379 {
81380 if ($this->read_timeout > 0) {
81381 $fp_status = socket_get_status($fp);
81382 if ($fp_status["timed_out"]) {
81383 $this->timed_out = true;
81384 return true;
81385 }
81386 }
81387 return false;
81388 }
81389
81390 /*======================================================================*\
81391 Function: _connect
81392 Purpose: make a socket connection
81393 Input: $fp file pointer
81394 \*======================================================================*/
81395
81396 function _connect(&$fp)
81397 {
81398 if(!empty($this->proxy_host) && !empty($this->proxy_port))
81399 {
81400 $this->_isproxy = true;
81401
81402 $host = $this->proxy_host;
81403 $port = $this->proxy_port;
81404 }
81405 else
81406 {
81407 $host = $this->host;
81408 $port = $this->port;
81409 }
81410
81411 $this->status = 0;
81412
81413 if($fp = fsockopen(
81414 $host,
81415 $port,
81416 $errno,
81417 $errstr,
81418 $this->_fp_timeout
81419 ))
81420 {
81421 // socket connection succeeded
81422
81423 return true;
81424 }
81425 else
81426 {
81427 // socket connection failed
81428 $this->status = $errno;
81429 switch($errno)
81430 {
81431 case -3:
81432 $this->error="socket creation failed (-3)";
81433 case -4:
81434 $this->error="dns lookup failure (-4)";
81435 case -5:
81436 $this->error="connection refused or timed out (-5)";
81437 default:
81438 $this->error="connection failed (".$errno.")";
81439 }
81440 return false;
81441 }
81442 }
81443 /*======================================================================*\
81444 Function: _disconnect
81445 Purpose: disconnect a socket connection
81446 Input: $fp file pointer
81447 \*======================================================================*/
81448
81449 function _disconnect($fp)
81450 {
81451 return(fclose($fp));
81452 }
81453
81454
81455 /*======================================================================*\
81456 Function: _prepare_post_body
81457 Purpose: Prepare post body according to encoding type
81458 Input: $formvars - form variables
81459 $formfiles - form upload files
81460 Output: post body
81461 \*======================================================================*/
81462
81463 function _prepare_post_body($formvars, $formfiles)
81464 {
81465 settype($formvars, "array");
81466 settype($formfiles, "array");
81467 $postdata = '';
81468
81469 if (count($formvars) == 0 && count($formfiles) == 0)
81470 return;
81471
81472 switch ($this->_submit_type) {
81473 case "application/x-www-form-urlencoded":
81474 reset($formvars);
81475 while(list($key,$val) = each($formvars)) {
81476 if (is_array($val) || is_object($val)) {
81477 while (list($cur_key, $cur_val) = each($val)) {
81478 $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
81479 }
81480 } else
81481 $postdata .= urlencode($key)."=".urlencode($val)."&";
81482 }
81483 break;
81484
81485 case "multipart/form-data":
81486 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
81487
81488 reset($formvars);
81489 while(list($key,$val) = each($formvars)) {
81490 if (is_array($val) || is_object($val)) {
81491 while (list($cur_key, $cur_val) = each($val)) {
81492 $postdata .= "--".$this->_mime_boundary."\r\n";
81493 $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
81494 $postdata .= "$cur_val\r\n";
81495 }
81496 } else {
81497 $postdata .= "--".$this->_mime_boundary."\r\n";
81498 $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
81499 $postdata .= "$val\r\n";
81500 }
81501 }
81502
81503 reset($formfiles);
81504 while (list($field_name, $file_names) = each($formfiles)) {
81505 settype($file_names, "array");
81506 while (list(, $file_name) = each($file_names)) {
81507 if (!is_readable($file_name)) continue;
81508
81509 $fp = fopen($file_name, "r");
81510 $file_content = fread($fp, filesize($file_name));
81511 fclose($fp);
81512 $base_name = basename($file_name);
81513
81514 $postdata .= "--".$this->_mime_boundary."\r\n";
81515 $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
81516 $postdata .= "$file_content\r\n";
81517 }
81518 }
81519 $postdata .= "--".$this->_mime_boundary."--\r\n";
81520 break;
81521 }
81522
81523 return $postdata;
81524 }
81525 }
81526 endif;
81527 ?>