作者imce (蜥蜴)
看板Perl
標題Re: [問題] 如何減少WWW::Mechanize記憶體的用量
時間Mon Jul 6 16:45:32 2009
use strict;
use GmailV2;
my $gmail = GmailV2->new(username => "id"
, password => "pw",enableprint=>1);
system("pause");#6440K
$gmail->login();
system("pause");#12108K
my %PAGEs = $gmail->get_page();
system("pause");#12176K
$gmail->send_message(subject=>"ccc",body=>"ddd",file=>"d:\\a.jpg");
system("pause");#18224K(a.jpg有590K)
%PAGEs = $gmail->get_page();
system("pause");#17060K
#我希望第二次get_page()後,記憶體用量只有12176K
-------------------------GmailV2.pm-----------------------------
package GmailV2;
use lib qw(lib);
use strict;
use WWW::Mechanize;
our $VERSION = "0.0.7";
sub new {
my $class = shift;
my %args = @_;
my %pages;
my $mech = WWW::Mechanize->new();
$mech->stack_depth( 0 );
$args{username} .= '@gmail.com' if ($args{username} !~ /\@gmail\.com/);
my $self = bless {
_username => $args{username} || die( 'No username defined' ),
_password => $args{password} || die( 'No password defined' ),
_mech => $mech,
_lastpage => 0,
_nextpage => 0,
_pagedata => 0,
_print => $args{enableprint},
}, $class;
return $self;
}
sub login {
my ( $self ) = @_;
my $mech = $self->{_mech};
my $usr = $self->{_username};
my $pw = $self->{_password};
print "login $usr\n" if ($self->{_print});
$mech->get("http://mail.google.com/mail/?logout");
$mech->submit_form(
form_number => 1,
fields => {
"Email" => $usr,
"Passwd" => $pw,
}
);
$mech->get("http://mail.google.com/mail/?ui=html&zy=a");
$mech->content =~ / url=(.*?)>/;
return 0 if (!$1);
my $URL = $1;
$URL =~ s/&/&/ig;$URL =~ s/\"//g;$URL =~ s/'//g;
$mech->get($URL);
$self->{_pagedata} = $mech->content();
}
sub get_page{
my ( $self ) = @_;
my $mech = $self->{_mech};
my ($LastPage,$FirstPage,$NextPage) = ($self->{_lastpage},0,$self->{_nextpage});
my @DATA;
$mech->get("?st=0");
($LastPage,$FirstPage,$NextPage) = $mech->content() =~ /<b>([\d]*)<\/b>/ig;
($LastPage,$FirstPage,$NextPage) = ($NextPage,$LastPage,$FirstPage) if ($LastPage < $NextPage);
my $Pages = (!$LastPage || !$NextPage)?1:$LastPage/$NextPage;
for(my $i=0;$i<$Pages;$i++)
{
print "Reading Page $i\n" if ($self->{_print});
$mech->get("?st=".$i*$NextPage) if ($i);
my $Content = join("", (split(/\n/,$mech->content())) );
push(@DATA,$Content =~ /<a href=(.*?)<\/a>/ig);
}
@DATA = grep(/&th=/,@DATA);
$self->{_nextpage} = $NextPage;
$self->{_lastpage} = $LastPage;
$self->{_pagedata} = join("\n",@DATA);
return AnalizePageData($self->{_pagedata});
}
sub AnalizePageData{
my %Pages;
my $i=0;
my @DATA = split(/\n/,$_[0]);
foreach (@DATA)
{
s/<(?:[^>'"]*|(['"]).*?\1)*>//gs;
s/".*?th=(.*?)">/$1 /;
s/[ ]+/\t/;
($Pages{"MID$i"},my @title) = split;
$Pages{"TITLE$i"} = join(" ",@title);
$i++;
}
return %Pages;
}
sub send_message{
my ( $self ) = shift;
my ( %args ) = (
to => '' || $_{to},
cc => '' || $_{cc},
bcc => '' || $_{bcc},
subject => '' || $_{subject},
body => '' || $_{body},
file => '' || $_{file},
@_,
);
$args{to} = $self->{_username} if (!$args{to});
my $mech = $self->{_mech};
print "SendMail\n" if ($self->{_print});
$mech->get("?v=b&pv=tl&cs=b");
return 0 if (!$mech->form_name( "f" ));
print "Submit Mail\n" if ($self->{_print});
$mech->submit_form(
form_name => "f",
fields => {
"to" => $args{to},
"cc" => $args{cc},
"bcc" => $args{bcc},
"file0" => $args{file},
"subject" => $args{subject},
"body" => $args{body},
},
button => 'nvp_bu_send',
);
print "Submit Done\n" if ($self->{_print});
return 1;
}
1;
--
※ 發信站: 批踢踢實業坊(ptt.cc)
◆ From: 60.250.75.176
※ 編輯: imce 來自: 60.250.75.176 (07/06 16:46)
→ imce:記憶體增加的狀況,隨著上傳的檔案越大增加的越多 07/06 16:52
推 LiloHuang:看起來比較沒뼠Perl 記憶體管理機制比較像 memory pool 07/06 20:58
推 LiloHuang:好不容易跟OS要到記憶體 GC機制應該不會那麼快就還回去 07/06 20:59
推 LiloHuang:如果你 #18224K(a.jpg有590K) 這個做兩次應該不會變兩倍 07/06 20:59
推 LiloHuang:可以試試看 要不然就看有沒有辦法改到 reference count 07/06 21:00
→ imce:做兩次跟做一次記憶體是一樣多的 07/07 08:28