您的位置:首页 > 编程语言 > PHP开发

PHP采集程序中常用的函数

2016-04-29 22:16 621 查看
函数描述及例子 PHP采集程序中常用的函数 查询关键字 PHP采集程序中常用的函数

view
source

print?

001.
//获得当前的脚本网址

002.
function
get_php_url(){

003.
if
(!
empty
empty
(
$_SERVER
[
"REQUEST_URI"
])){

004.
$scriptName
=
$_SERVER
[
"REQUEST_URI"
];

005.
$nowurl
=
$scriptName
;

006.
}
else
{

007.
$scriptName
=
$_SERVER
[
"PHP_SELF"
];

008.
if
(
empty
empty
(
$_SERVER
[
"QUERY_STRING"
]))
$nowurl
=
$scriptName
;

009.
else
$nowurl
=
$scriptName
.
"?"
.
$_SERVER
[
"QUERY_STRING"
];

010.
}

011.
return
$nowurl
;

012.
}

013.
//把全角数字转为半角数字

014.
function
GetAlabNum(
$fnum
){

015.
$nums
=
array
(
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"5"
,
"6"
,
"7"
,
"8"
,
"9"
);

016.
$fnums
=
"0123456789"
;

017.
for
(
$i
=0;
$i
<=9;
$i
++)
$fnum
=
str_replace
(
$nums
[
$i
],
$fnums
[
$i
],
$fnum
);

018.
$fnum
=
ereg_replace
(
"[^0-9\.]|^0{1,}"
,
""
,
$fnum
);

019.
if
(
$fnum
==
""
)
$fnum
=0;

020.
return
$fnum
;

021.
}

022.
//去除HTML标记

023.
function
Text2Html(
$txt
){

024.
$txt
=
str_replace
(
"
"
,
" "
,
$txt
);

025.
$txt
=
str_replace
(
"<"
,
"<"
,
$txt
);

026.
$txt
=
str_replace
(
">"
,
">"
,
$txt
);

027.
$txt
=
preg_replace(
"/[\r\n]{1,}/isU"
,"

028.
\r\n",
$txt
);

029.
return
$txt
;

030.
}

031.
//清除HTML标记

032.
function
ClearHtml(
$str
){

033.
$str
=
str_replace
(
'<'
,
'<'
,
$str
);

034.
$str
=
str_replace
(
'>'
,
'>'
,
$str
);

035.
return
$str
;

036.
}

037.
//相对路径转化成绝对路径

038.
function
relative_to_absolute(
$content
,
$feed_url
)
{

039.
preg_match(
'/(http|https|ftp):\/\//'
,
$feed_url
,
$protocol
);

040.
$server_url
=
preg_replace(
"/(http|https|ftp|news):\/\//"
,
""
,
$feed_url
);

041.
$server_url
=
preg_replace(
"/\/.*/"
,
""
,
$server_url
);

042.
if
(
$server_url
==
''
)
{

043.
return
$content
;

044.
}

045.
if
(isset(
$protocol
[0]))
{

046.
$new_content
=
preg_replace(
'/href="\//'
,
'href="'
.
$protocol
[0].
$server_url
.
'/'
,
$content
);

047.
$new_content
=
preg_replace(
'/src="\//'
,
'src="'
.
$protocol
[0].
$server_url
.
'/'
,
$new_content
);

048.
}
else
{

049.
$new_content
=
$content
;

050.
}

051.
return
$new_content
;

052.
}

053.
//取得所有链接

054.
function
get_all_url(
$code
){

055.
preg_match_all(
'/<a\s+href=["|\']?([^>"\'
]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i'
,
$code
,
$arr
);

056.
return
array
(
'name'
=>
$arr
[2],
'url'
=>
$arr
[1]);

057.
}

058.
//获取指定标记中的内容

059.
function
get_tag_data(
$str
,
$start
,
$end
){

060.
if
(
$start
==
''
||
$end
==
''
){

061.
return
;

062.
}

063.
$str
=
explode
(
$start
,
$str
);

064.
$str
=
explode
(
$end
,
$str
[1]);

065.
return
$str
[0];

066.
}

067.
//HTML表格的每行转为CSV格式数组

068.
function
get_tr_array(
$table
)
{

069.
$table
=
preg_replace(
"'<td[^>]*?>'si"
,
'"'
,
$table
);

070.
$table
=
str_replace
(
""
,
'",'
,
$table
);

071.
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);

072.
//去掉
HTML 标记

073.
$table
=
preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);

074.
//去掉空白字符

075.
$table
=
preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);

076.
$table
=
str_replace
(
"
"
,
""
,
$table
);

077.
$table
=
str_replace
(
"
"
,
""
,
$table
);

078.
$table
=
explode
(
",{tr}"
,
$table
);

079.
array_pop
(
$table
);

080.
return
$table
;

081.
}

082.
//将HTML表格的每行每列转为数组,采集表格数据

083.
function
get_td_array(
$table
)
{

084.
$table
=
preg_replace(
"'<table[^>]*?>'si"
,
""
,
$table
);

085.
$table
=
preg_replace(
"'<tr[^>]*?>'si"
,
""
,
$table
);

086.
$table
=
preg_replace(
"'<td[^>]*?>'si"
,
""
,
$table
);

087.
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);

088.
$table
=
str_replace
(
""
,
"{td}"
,
$table
);

089.
//去掉
HTML 标记

090.
$table
=
preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);

091.
//去掉空白字符

092.
$table
=
preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);

093.
$table
=
str_replace
(
"
"
,
""
,
$table
);

094.
$table
=
str_replace
(
"
"
,
""
,
$table
);

095.

096.
$table
=
explode
(
'{tr}'
,
$table
);

097.
array_pop
(
$table
);

098.
foreach
(
$table
as
$key
=>
$tr
)
{

099.
$td
=
explode
(
'{td}'
,
$tr
);

100.
array_pop
(
$td
);

101.
$td_array
[]
=
$td
;

102.
}

103.
return
$td_array
;

104.
}

105.
//返回字符串中的所有单词
$distinct=true 去除重复

106.
function
split_en_str(
$str
,
$distinct
=true)
{

107.
preg_match_all(
'/([a-zA-Z]+)/'
,
$str
,
$match
);

108.
if
(
$distinct
==
true){

109.
$match
[1]
=
array_unique
(
$match
[1]);

110.
}

111.
sort(
$match
[1]);

112.
return
$match
[1];

113.
}

114.

115.
函数描述及例子

116.

117.
PHP采集程序中常用的函数

118.

119.
查询关键字

120.

121.
PHP采集程序中常用的函数

122.
<!--?

123.
//获得当前的脚本网址

124.
function
get_php_url(){

125.
if
(!
empty
empty
(
$_SERVER
[
"REQUEST_URI"
])){

126.
$scriptName
=
$_SERVER
[
"REQUEST_URI"
];

127.
$nowurl
=
$scriptName
;

128.
}
else
{

129.
$scriptName
=
$_SERVER
[
"PHP_SELF"
];

130.
if
(
empty
empty
(
$_SERVER
[
"QUERY_STRING"
]))
$nowurl
=
$scriptName
;

131.
else
$nowurl
=
$scriptName
.
"?"
.
$_SERVER
[
"QUERY_STRING"
];

132.
}

133.
return
$nowurl
;

134.
}

135.
//把全角数字转为半角数字

136.
function
GetAlabNum(
$fnum
){

137.
$nums
=
array
(
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"5"
,
"6"
,
"7"
,
"8"
,
"9"
);

138.
$fnums
=
"0123456789"
;

139.
for
(
$i
=0;
$i
<=9;
$i
++)
$fnum
=
str_replace
(
$nums
[
$i
],
$fnums
[
$i
],
$fnum
);

140.
$fnum
=
ereg_replace
(
"[^0-9\.]|^0{1,}"
,
""
,
$fnum
);

141.
if
(
$fnum
==
""
)
$fnum
=0;

142.
return
$fnum
;

143.
}

144.
//去除HTML标记

145.
function
Text2Html(
$txt
){

146.
$txt
=
str_replace
(
"
"
,
" "
,
$txt
);

147.
$txt
=
str_replace
(
"<"
,
"<"
,
$txt
);

148.
$txt
=
str_replace
(
"-->"
,
">"
,
$txt
);

149.
$txt
=
preg_replace(
"/[\r\n]{1,}/isU"
,"

150.
\r\n",
$txt
);

151.
return
$txt
;

152.
}

153.
//清除HTML标记

154.
function
ClearHtml(
$str
){

155.
$str
=
str_replace
(
'<'
,
'<'
,
$str
);

156.
$str
=
str_replace
(
'>'
,
'>'
,
$str
);

157.
return
$str
;

158.
}

159.
//相对路径转化成绝对路径

160.
function
relative_to_absolute(
$content
,
$feed_url
)
{

161.
preg_match(
'/(http|https|ftp):\/\//'
,
$feed_url
,
$protocol
);

162.
$server_url
=
preg_replace(
"/(http|https|ftp|news):\/\//"
,
""
,
$feed_url
);

163.
$server_url
=
preg_replace(
"/\/.*/"
,
""
,
$server_url
);

164.
if
(
$server_url
==
''
)
{

165.
return
$content
;

166.
}

167.
if
(isset(
$protocol
[0]))
{

168.
$new_content
=
preg_replace(
'/href="\//'
,
'href="'
.
$protocol
[0].
$server_url
.
'/'
,
$content
);

169.
$new_content
=
preg_replace(
'/src="\//'
,
'src="'
.
$protocol
[0].
$server_url
.
'/'
,
$new_content
);

170.
}
else
{

171.
$new_content
=
$content
;

172.
}

173.
return
$new_content
;

174.
}

175.
//取得所有链接

176.
function
get_all_url(
$code
){

177.
preg_match_all(
'/<a\s+href=["|\']?([^>"\'
]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i'
,
$code
,
$arr
);

178.
return
array
(
'name'
=>
$arr
[2],
'url'
=>
$arr
[1]);

179.
}

180.
//获取指定标记中的内容

181.
function
get_tag_data(
$str
,
$start
,
$end
){

182.
if
(
$start
==
''
||
$end
==
''
){

183.
return
;

184.
}

185.
$str
=
explode
(
$start
,
$str
);

186.
$str
=
explode
(
$end
,
$str
[1]);

187.
return
$str
[0];

188.
}

189.
//HTML表格的每行转为CSV格式数组

190.
function
get_tr_array(
$table
)
{

191.
$table
=
preg_replace(
"'<td[^>]*?>'si"
,
'"'
,
$table
);

192.
$table
=
str_replace
(
""
,
'",'
,
$table
);

193.
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);

194.
//去掉
HTML 标记

195.
$table
=
preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);

196.
//去掉空白字符

197.
$table
=
preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);

198.
$table
=
str_replace
(
"
"
,
""
,
$table
);

199.
$table
=
str_replace
(
"
"
,
""
,
$table
);

200.
$table
=
explode
(
",{tr}"
,
$table
);

201.
array_pop
(
$table
);

202.
return
$table
;

203.
}

204.
//将HTML表格的每行每列转为数组,采集表格数据

205.
function
get_td_array(
$table
)
{

206.
$table
=
preg_replace(
"'<table[^>]*?>'si"
,
""
,
$table
);

207.
$table
=
preg_replace(
"'<tr[^>]*?>'si"
,
""
,
$table
);

208.
$table
=
preg_replace(
"'<td[^>]*?>'si"
,
""
,
$table
);

209.
$table
=
str_replace
(
""
,
"{tr}"
,
$table
);

210.
$table
=
str_replace
(
""
,
"{td}"
,
$table
);

211.
//去掉
HTML 标记

212.
$table
=
preg_replace(
"'<[\/\!]*?[^<>]*?>'si"
,
""
,
$table
);

213.
//去掉空白字符

214.
$table
=
preg_replace(
"'([\r\n])[\s]+'"
,
""
,
$table
);

215.
$table
=
str_replace
(
"
"
,
""
,
$table
);

216.
$table
=
str_replace
(
"
"
,
""
,
$table
);

217.

218.
$table
=
explode
(
'{tr}'
,
$table
);

219.
array_pop
(
$table
);

220.
foreach
(
$table
as
$key
=>
$tr
)
{

221.
$td
=
explode
(
'{td}'
,
$tr
);

222.
array_pop
(
$td
);

223.
$td_array
[]
=
$td
;

224.
}

225.
return
$td_array
;

226.
}

227.
//返回字符串中的所有单词
$distinct=true 去除重复

228.
function
split_en_str(
$str
,
$distinct
=true)
{

229.
preg_match_all(
'/([a-zA-Z]+)/'
,
$str
,
$match
);

230.
if
(
$distinct
==
true){

231.
$match
[1]
=
array_unique
(
$match
[1]);

232.
}

233.
sort(
$match
[1]);

234.
return
$match
[1];

235.
}

236.

237.
</td[^></tr[^></table[^></td[^></a\s+href=[
"|\']?([^></td[^></tr[^></table[^></td[^></a\s+href=["
|\']?([^>




除非特别声明,PHP100新闻均为原创或投稿报道,转载请注明作者及原文链接

原文地址: http://www.php100.com/html/php/hanshu/2013/0903/1039.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: