Learn Web.Crawling of Perl

#####
#Overview of Web.Crawling related modules.
#Note that, below codes can not be executed just for overview intention.
#####

#!/usr/bin/perl

#####
#HTTP::Thin
#####
use 5.12.1;
use HTTP::Request::Common;
use HTTP::Thin;

say HTTP::Thin->new()->request(GET 'http://example.com')->as_string;

#####
#HTTP:Tiny
#####
use HTTP::Tiny;

my $response = HTTP::Tiny->new->get('http://example.com/');
die "Failed!
" unless $response->{success};
print "$response->{status} $response->{reason}
";

while (my ($k, $v) = each %{$response->{headers}}) {
for (ref $v eq 'ARRAY' ? @$v : $v) {
print "$k: $_
";
}
}

print $response->{content} if length $response->{content};

#new
$http = HTTP::Tiny->new{ %attrubutes };

#valid attributes include:
#-agent
#-cookie_jar
#-default_headers
#-local_address
#-keep_alive
#-max_redirect
#-max_size
#-https_proxy
#-proxy
#-no_proxy
#-timeout
#-verify_SSL
#-SSL_options

#get[head][put][post]delete
$response = $http->get($url);
$response = $http->get($url, \%options);
$response = $http->head($url);

#post_form
$response = $http->post_form($url, $form_data);
$response = $http->post_form($url, $form_data, \%options);

#request
$response = $http->request($method, $url);
$response = $http->request($method, $url, \%options);

$http->request('GET', 'http://user:pwd hk.mars@aol.com');
#or
$http->request('GET', 'http://mars%40:pwd hk.mars@aol.com');

#www_form_urlencode
$params = $http->www_form_urlencode( $data );
$response = $http->get("http://example.com/query?$params");

#SSL support
SSL_options => {
SSL_ca_file => $file_path,
}

#proxy support

#####
#www::Mechanize
#
#Stateful programmatic web browsing, used for automating interaction with websites.
#####

use WWW::Mechanize;

my $mech = WWW::Mechanize->new();

$mech->get( $url );

$mech->follow_link( n => 3 );
$mech->follow_link( text_regex => qr/download this/i );
$mech->follow_link( url => 'http://host.com/index.html' );

$mech->submit_form(
form_number => 3,
fields => {
username => 'banana',
passoword => 'lost-and-alone',
}
);

$mech->submit_form(
form_name => 'search',
fields => { query => 'pot of gold', },
button => 'search now'
);

#testing web applications
use Test::More;

like( $mech->content(), qr/$expected/, "Got expected content" );

#page traverse
$mech->back();

#finer control over page
$mech->find_link( n => $number );
$mech->form_number( $number );
$mech->form_name( $name );
$mech->field( $name, $value );
$mech->set_fields( $field_values );
$mech->set_visible( @criteria );
$mech->click( $button );

#subclass of LWP::UserAgent, eg:
$mech->add_header( $name =>$value );

#page-fecting methods

#status methods

#content-handling methods

#link methods

#image methods

#form methods

#field methods

#miscellaneous methods

#overridden LWP::UserAgent methods
#inherited unchanced LWP::UserAgent methods

#yeah now, it's easy to implement a spider project for future integration use.

Mars

波比源码 – 精品源码模版分享 | www.bobi11.com
1. 本站所有资源来源于用户上传和网络,如有侵权请邮件联系站长!
2. 分享目的仅供大家学习和交流,您必须在下载后24小时内删除!
3. 不得使用于非法商业用途,不得违反国家法律。否则后果自负!
4. 本站提供的源码、模板、插件等等其他资源,都不包含技术服务请大家谅解!
5. 如有链接无法下载、失效或广告,请联系管理员处理!
6. 本站资源售价只是赞助,收取费用仅维持本站的日常运营所需!
7. 如遇到加密压缩包,请使用WINRAR解压,如遇到无法解压的请联系管理员!

波比源码 » Learn Web.Crawling of Perl

发表评论

Hi, 如果你对这款模板有疑问,可以跟我联系哦!

联系站长
赞助VIP 享更多特权,建议使用 QQ 登录
喜欢我嘛?喜欢就按“ctrl+D”收藏我吧!♡