- 连接任意页面Special:Random,读取其中的条目名称,输出到“fname.txt”文件中。
- 读取该条目中的内容,输出到“fout.txt”文件中。
my $username="username"; (username是已经申请的用户名,只能是英文,例如ilikewikipedia) my $password="****"; (****填入对应的密码) my $WIKI_PATH="zh.wikipedia.org"; (缺省是中文维基,可以修改成为其他语言,比如en.wikipedia.org)
编辑#!/usr/bin/perl # rfget.pl - Get content from a random wikipedia article # By WikiPedia:User:下一次登录 # Portions largely taken or based on upload.pl by WikiPedia:User:Eloquence # and mwpush.pl by WikiPedia:User:KeithTyler # call requirements use Getopt::Std; use LWP::Simple; use LWP::UserAgent; use HTTP::Request; use HTTP::Response; use HTTP::Cookies; #use warnings; my $username="username"; my $password="****"; my $WIKI_PATH="zh.wikipedia.org"; my $onlyifempty=$opt_e; ### Login to wiki # Set up connection data my $browser=LWP::UserAgent->new(); my @ns_headers = ( 'User-Agent' => 'MediaWiki Pusher 0.02 by Gfdrtf', #Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7) Gecko/20041107 Firefox/1.0', 'Accept' => 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*', 'Accept-Charset' => 'iso-8859-1,*,utf-8', 'Accept-Language' => 'en-US', ); # Hold cookies $browser->cookie_jar( {} ); # Make login request $response=$browser->post("http://".$WIKI_PATH."/w/index.php?title=Special:Userlogin&action=submitlogin", @ns_headers, Content=>[wpName=>$username,wpPassword=>$password,wpRemember=>"1",wpLoginAttempt=>"Log in"]); # After logging in, we should be redirected to another page. # If we aren't, something is wrong. # if($response->code!=302) { print "We weren't able to login. This could have the following causes: * The username ($username) or password may be incorrect. Solution: Re-run script with correct credentials. * The MediaWiki software on the target host has been upgraded. Solution: Make some corresponding changes of the codes. * You are trying to hack this script for other wikis. The wiki you are uploading to has cookie check disabled. Solution: Try setting \$ignore_login_error to 1. Regardless, we will now try to write the output from the server to rfget.debug.out....\n\n"; open(DEBUG,">rfget.debug.out") or die "Could not write file.\n"; print DEBUG $response->as_string; print "This seems to have worked. Take a look at the file for further information or send it to moeller AT scireview DOT de if you need help debugging the script.\n"; close(DEBUG); exit 1; } my $URL="http://".$WIKI_PATH."/wiki/Special:Random"; $response=$browser->get($URL, @ns_headers); my $filename1=$response->as_string; # extract the filename my $filestartstr = "<li id=\"t-permalink\"><a href=\"/w/index.php?title="; my $filestart = index($filename1, $filestartstr); $filestart+=49; my $fileendstr="&oldid="; my $fileend = index($filename1, $fileendstr); my $filename=substr($filename1, $filestart, $fileend-$filestart); $URL="http://".$WIKI_PATH."/w/index.php?title=".$filename."&action=edit"; $response=$browser->get($URL, @ns_headers); my $content1=$response->as_string; $filestartstr="<textarea tabindex='1' accesskey=\",\" name=\"wpTextbox1\" id=\"wpTextbox1\" rows='25'"; $fileendstr="</textarea>"; $filestart= index($content1, $filestartstr); $filestart+=92; $fileend= index($content1, $fileendstr); my $content2=substr($content1, $filestart, $fileend-$filestart); open INPUT, ">fout.txt"; print INPUT $content2; close INPUT; open INPUT, ">fname.txt"; print INPUT $filename; close INPUT; print "Everything seems to be OK. Log will be written to rfget.log.\n"; open(LOG,">rfget.log") or die "Could not write file.\n"; print LOG $response->as_string;