perl 爬取上市公司业绩预告
2016-04-01 22:28
316 查看
<pre name="code" class="python">use LWP::UserAgent; use utf8; use DBI; use POSIX; use Data::Dumper; use HTML::TreeBuilder; use HTML::TreeBuilder::XPath; my $ua = LWP::UserAgent->new; $ua->timeout(10); $ua->env_proxy; $ua->agent("Mozilla/8.0"); #my $response = $ua->get('http://data.10jqka.com.cn/financial/yjyg/date/2016-12-31/board/ALL/field/enddate/order/desc/page/1/ajax/1/'); #my $response = $ua->get('http://data.10jqka.com.cn/financial/yjyg/'); my @array=('2016-12-31','2016-03-31','2015-12-31','2015-09-30','2015-06-30','2015-03-31','2014-12-31','2014-09-30','2014-03-31'); foreach (@array){ print "\$_ is $_\n"; my $url="http://data.10jqka.com.cn/financial/yjyg/date/$_/board/ALL/field/enddate/order/desc/page/1/ajax/1/"; print "\$url is $url\n"; my $response = $ua->get($url); if ($response->is_success) { open DATAFH,">data.html" || die "open data file failed:$!"; print DATAFH "<html>"; print DATAFH "\n"; print DATAFH $response->decoded_content; # or whatever print DATAFH "</html>"; print DATAFH "\n"; }; close DATAFH; unlink("ths.html"); system('cp data.html ths.html'); $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "ths.html"); my $title="$_"; #my $title= $tree->findvalue('/html/body//span[@class="text-value"]'); print "\$title is $title\n"; my @pages=""; my @titlepage=""; $max=""; my @pages=$tree->find_by_tag_name('a'); print "\@pages is @pages\n"; #@urlall除了包含每个类别的文章,还包含阅读排行里的文章 foreach (@pages) { @titlepage = $_->attr('page'); foreach (@titlepage) { if ($_){ if ( $_ > $max ){ $max=$_; }; ###获取版块中每个页面的url }; }; }; unless ($max){$max=1}; print "\$max is $max\n"; sleep (5); for ($m=1;$m<=$max; $m++){ my $url="http://data.10jqka.com.cn/financial/yjyg/date/$_/board/ALL/field/enddate/order/desc/page/$m/ajax/1/"; my $response = $ua->get("$url"); if ($response->is_success) { open DATAFH,">data.html" || die "open data file failed:$!"; print DATAFH "<html>"; print DATAFH "\n"; print DATAFH $response->decoded_content; # or whatever print DATAFH "</html>"; print DATAFH "\n"; close DATATH; }; unlink("ths.html"); system('cp data.html ths.html'); $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "ths.html"); my @arr1= $tree->find_by_tag_name("tr") ; #shift @arr1; foreach my $row ( @arr1) { my @arr2= $row->content_list; my $str1= $arr2[0]->as_text; my $str2= $arr2[1]->as_text; my $str3= $arr2[2]->as_text; my $str4= $arr2[3]->as_text; my $str5= $arr2[4]->as_text; my $str6= $arr2[5]->as_text; my $str7= $arr2[6]->as_text; my $str8= $arr2[7]->as_text; print $str1, $str2, $str3, $str4, $str5, $str6, $str7,$str8."\n"; open( E, ">>", "$title-$m.txt" ); print E ($str1."|".$str2."|".$str3."|".$str4."|".$str5."|".$str6."|".$str7."|".$str8."\n"); close E; } } }
相关文章推荐
- 读《九败一胜》有感
- [Java学习] 移动互联网时代为什么必学java
- 互联网人必须看看这些书
- 用诚信铸就发展平台的“宝”理念
- 浅析互联网系统和传统企业IT系统的异同
- 望京进化史:下一个中国硅谷 互联网巨头们携手而来
- 想用互联网营销赚到100万,请先学会这些(疯转)
- 公职老师像网红一样赚“外快”合法吗?
- 你能排第几?2016互联网行业薪酬数据分析
- 对互联网中常见地图的坐标系探讨
- 【互联网】剖析车联网的完美形态,机器人化还是智能交通?
- 互联网协议
- A股涉及无人驾驶概念的上市公司
- 资料大分享!!国家电网!!!互联网开发技术!!!
- 多点易到异业跨界,物价真能回到10年前?
- java成为移动互联网时代必学语言的六大理由
- 数据分析在互联网金融的应用
- Open-Falcon第三步安装Agent (小米开源互联网企业级监控系统)
- 传统企业互联网转型,引爆跨界IT管理的工具与经验
- 【互联网】开发APP最重要的8个细节