win nt下调用nutch的脚本,可实现自动运行咯。
2006-05-01 23:25
429 查看
现在正对搜索引擎有兴趣,
我的搜索基于nutch,并结合了ICTCLAS,分词和速度都不错。
这样做可以不用crywin来模拟linux
下面是在win nt下调用nutch的脚本,
你可以自己改一下,这样就可以很方便的实现自动运行了。
有兴趣的朋友可以用一下,大大方便了操作。
nutch.bat
@cmd /V:on /c %~dp0nutch1.bat %*
nutch1.bat
@echo on
rem *********************************************************************
rem * A script to launch nutch on Windows 2000/XP System.
rem *
rem * Written by babatu
rem * babatu@gmail.com blog: blog.babatu.com
rem *
rem * Because delayed environment is used, cmd /V:on should be used to
rem * run this script.
rem *****************************************************************
if "%OS%"=="Windows_NT" @setlocal
if "%OS%"=="WINNT" @setlocal
if "%1" == "" goto :msg
goto :begin
:msg
echo "Usage: nutch COMMAND"
echo "where COMMAND is one of:"
echo " crawl one-step crawler for intranets"
echo " readdb read / dump crawl db"
echo " readlinkdb read / dump link db"
echo " inject inject new urls into the database"
echo " generate generate new segments to fetch"
echo " fetch fetch a segment's pages"
echo " parse parse a segment's pages"
echo " segread read / dump segment data"
echo " updatedb update crawl db from segments after fetching"
echo " invertlinks create a linkdb from parsed segments"
echo " index run the indexer on parsed segments and linkdb"
echo " merge merge several segment indexes"
echo " dedup remove duplicates from a set of segment indexes"
echo " plugin load a plugin and run one of its classes main()"
echo " server run a search server"
echo " or"
echo " CLASSNAME run the class named CLASSNAME"
echo "Most commands print help when invoked w/o parameters."
pause
goto :end
:begin
rem %~dp0 这个脚本的扩展path ( expanded pathname of the current script under NT)
set DEFAULT_NUTCH_HOME=%~dp0..
rem set DEFAULT_NUTCH_HOME=..
if "%NUTCH_HOME%"=="" set NUTCH_HOME=%DEFAULT_NUTCH_HOME%
set DEFAULT_NUTCH_HOME=""
rem 设置默认DEFAULT_NUTCH_HOME
echo %NUTCH_HOME%
rem set _USE_CLASSPATH=yes
if "%CLASSPATH%"=="" ( set CLASSPATH=%JAVA_HOME%/lib/tools.jar) ELSE set
CLASSPATH=%CLASSPATH%;%JAVA_HOME%/lib/tools.jar
set CLASSPATH=%CLASSPATH%;%NUTCH_HOME%/conf;
echo %CLASSPATH%
echo before other
rem for developers, add plugins, job & test code to CLASSPATH
if exist %NUTCH_HOME%/build/plugins set
CLASSPATH=%CLASSPATH%;%NUTCH_HOME%/build
for /R %NUTCH_HOME%/build %%i in (nutch*.job) do set
CLASSPATH=!CLASSPATH!;%%i
if exist %NUTCH_HOME%/build/test/classes set
CLASSPATH=%CLASSPATH%;%NUTCH_HOME%/build/test/classes
rem for releases, add Nutch job to CLASSPATH
for /R %NUTCH_HOME% %%i in (nutch*.job) do set CLASSPATH=!CLASSPATH!;%%i
rem add plugins to classpath
if exist %NUTCH_HOME%/plugins set CLASSPATH=%CLASSPATH%;%NUTCH_HOME%
rem add libs to CLASSPATH
for /R %NUTCH_HOME%/lib %%f in (*.jar) do set CLASSPATH=!CLASSPATH!;%%f
echo %CLASSPATH%
rem translate command
if "%1"=="crawl" set CLASS=org.apache.nutch.crawl.Crawl
if "%1"=="inject" set CLASS=org.apache.nutch.crawl.Injector
if "%1"=="generate" set CLASS=org.apache.nutch.crawl.Generator
if "%1"=="fetch" set CLASS=org.apache.nutch.fetcher.Fetcher
if "%1"=="parse" set CLASS=org.apache.nutch.parse.ParseSegment
if "%1"=="readdb" set CLASS=org.apache.nutch.crawl.CrawlDbReader
if "%1"=="readlinkdb" set CLASS=org.apache.nutch.crawl.LinkDbReader
if "%1"=="segread" set CLASS=org.apache.nutch.segment.SegmentReader
if "%1"=="updatedb" set CLASS=org.apache.nutch.crawl.CrawlDb
if "%1"=="invertlinks" set CLASS=org.apache.nutch.crawl.LinkDb
if "%1"=="index" set CLASS=org.apache.nutch.indexer.Indexer
if "%1"=="dedup" set CLASS=org.apache.nutch.indexer",0]
);
D(["mi",2,2,"10aeb85bbfc51303",0,"0","Herman Hardenbol","Herman","hardenbol@iss.nl","nutch-user","12:00 am (23 hours ago)",["nutch-user@lucene.apache.org"]
,[]
,[]
,["nutch-user@lucene.apache.org"]
,"May 1, 2006 12:00 AM","Re: Startscript in windows","Sorry, I am on holiday until the 8th of May. Please contact the helpdesk@iss....",[]
,0,,,"Mon May 1 2006_12:00 AM","On 5/1/06, Herman Hardenbol wrote:","On 5/1/06, Herman Hardenbol <hardenbol@iss.nl> wrote:","lucene.apache.org",,,"","",0,,"",0,,0,"In reply to /"Startscript in windows/""]
);
//--> .DeleteDuplicates
if "%1"=="merge" set CLASS=org.apache.nutch.indexer.IndexMerger
if "%1"=="plugin" set CLASS=org.apache.nutch.plugin.PluginRepository
if "%1"=="server" set CLASS='
org.apache.nutch.searcher.DistributedSearch$Server'
if "%CLASS%"=="" set CLASS=%1
%JAVA_HOME%/bin/java -cp %CLASSPATH% %CLASS% %*
if "%OS%"=="Windows_NT" @endlocal
if "%OS%"=="WINNT" @endlocal
:end
我的搜索基于nutch,并结合了ICTCLAS,分词和速度都不错。
这样做可以不用crywin来模拟linux
下面是在win nt下调用nutch的脚本,
你可以自己改一下,这样就可以很方便的实现自动运行了。
有兴趣的朋友可以用一下,大大方便了操作。
nutch.bat
@cmd /V:on /c %~dp0nutch1.bat %*
nutch1.bat
@echo on
rem *********************************************************************
rem * A script to launch nutch on Windows 2000/XP System.
rem *
rem * Written by babatu
rem * babatu@gmail.com blog: blog.babatu.com
rem *
rem * Because delayed environment is used, cmd /V:on should be used to
rem * run this script.
rem *****************************************************************
if "%OS%"=="Windows_NT" @setlocal
if "%OS%"=="WINNT" @setlocal
if "%1" == "" goto :msg
goto :begin
:msg
echo "Usage: nutch COMMAND"
echo "where COMMAND is one of:"
echo " crawl one-step crawler for intranets"
echo " readdb read / dump crawl db"
echo " readlinkdb read / dump link db"
echo " inject inject new urls into the database"
echo " generate generate new segments to fetch"
echo " fetch fetch a segment's pages"
echo " parse parse a segment's pages"
echo " segread read / dump segment data"
echo " updatedb update crawl db from segments after fetching"
echo " invertlinks create a linkdb from parsed segments"
echo " index run the indexer on parsed segments and linkdb"
echo " merge merge several segment indexes"
echo " dedup remove duplicates from a set of segment indexes"
echo " plugin load a plugin and run one of its classes main()"
echo " server run a search server"
echo " or"
echo " CLASSNAME run the class named CLASSNAME"
echo "Most commands print help when invoked w/o parameters."
pause
goto :end
:begin
rem %~dp0 这个脚本的扩展path ( expanded pathname of the current script under NT)
set DEFAULT_NUTCH_HOME=%~dp0..
rem set DEFAULT_NUTCH_HOME=..
if "%NUTCH_HOME%"=="" set NUTCH_HOME=%DEFAULT_NUTCH_HOME%
set DEFAULT_NUTCH_HOME=""
rem 设置默认DEFAULT_NUTCH_HOME
echo %NUTCH_HOME%
rem set _USE_CLASSPATH=yes
if "%CLASSPATH%"=="" ( set CLASSPATH=%JAVA_HOME%/lib/tools.jar) ELSE set
CLASSPATH=%CLASSPATH%;%JAVA_HOME%/lib/tools.jar
set CLASSPATH=%CLASSPATH%;%NUTCH_HOME%/conf;
echo %CLASSPATH%
echo before other
rem for developers, add plugins, job & test code to CLASSPATH
if exist %NUTCH_HOME%/build/plugins set
CLASSPATH=%CLASSPATH%;%NUTCH_HOME%/build
for /R %NUTCH_HOME%/build %%i in (nutch*.job) do set
CLASSPATH=!CLASSPATH!;%%i
if exist %NUTCH_HOME%/build/test/classes set
CLASSPATH=%CLASSPATH%;%NUTCH_HOME%/build/test/classes
rem for releases, add Nutch job to CLASSPATH
for /R %NUTCH_HOME% %%i in (nutch*.job) do set CLASSPATH=!CLASSPATH!;%%i
rem add plugins to classpath
if exist %NUTCH_HOME%/plugins set CLASSPATH=%CLASSPATH%;%NUTCH_HOME%
rem add libs to CLASSPATH
for /R %NUTCH_HOME%/lib %%f in (*.jar) do set CLASSPATH=!CLASSPATH!;%%f
echo %CLASSPATH%
rem translate command
if "%1"=="crawl" set CLASS=org.apache.nutch.crawl.Crawl
if "%1"=="inject" set CLASS=org.apache.nutch.crawl.Injector
if "%1"=="generate" set CLASS=org.apache.nutch.crawl.Generator
if "%1"=="fetch" set CLASS=org.apache.nutch.fetcher.Fetcher
if "%1"=="parse" set CLASS=org.apache.nutch.parse.ParseSegment
if "%1"=="readdb" set CLASS=org.apache.nutch.crawl.CrawlDbReader
if "%1"=="readlinkdb" set CLASS=org.apache.nutch.crawl.LinkDbReader
if "%1"=="segread" set CLASS=org.apache.nutch.segment.SegmentReader
if "%1"=="updatedb" set CLASS=org.apache.nutch.crawl.CrawlDb
if "%1"=="invertlinks" set CLASS=org.apache.nutch.crawl.LinkDb
if "%1"=="index" set CLASS=org.apache.nutch.indexer.Indexer
if "%1"=="dedup" set CLASS=org.apache.nutch.indexer",0]
);
D(["mi",2,2,"10aeb85bbfc51303",0,"0","Herman Hardenbol","Herman","hardenbol@iss.nl","nutch-user","12:00 am (23 hours ago)",["nutch-user@lucene.apache.org"]
,[]
,[]
,["nutch-user@lucene.apache.org"]
,"May 1, 2006 12:00 AM","Re: Startscript in windows","Sorry, I am on holiday until the 8th of May. Please contact the helpdesk@iss....",[]
,0,,,"Mon May 1 2006_12:00 AM","On 5/1/06, Herman Hardenbol wrote:","On 5/1/06, Herman Hardenbol <hardenbol@iss.nl> wrote:","lucene.apache.org",,,"","",0,,"",0,,0,"In reply to /"Startscript in windows/""]
);
//--> .DeleteDuplicates
if "%1"=="merge" set CLASS=org.apache.nutch.indexer.IndexMerger
if "%1"=="plugin" set CLASS=org.apache.nutch.plugin.PluginRepository
if "%1"=="server" set CLASS='
org.apache.nutch.searcher.DistributedSearch$Server'
if "%CLASS%"=="" set CLASS=%1
%JAVA_HOME%/bin/java -cp %CLASSPATH% %CLASS% %*
if "%OS%"=="Windows_NT" @endlocal
if "%OS%"=="WINNT" @endlocal
:end
相关文章推荐
- Windows下调用nutch的脚本,可实现自动运行
- shell脚本调用expect命令实现命令行自动交互
- shell脚本调用expect命令实现命令行自动交互
- shell+expect实现多台服务器自动运行脚本。
- Shell脚本中调用expect,实现自动输入密码的功能
- 通过crond自动运行Python脚本实现多台linux服务器的监控
- 用Shell编程写一个能实现自动安装目录和运行游戏的脚本
- Python实现保证只能运行一个脚本实例
- 运行时中函数调用黑魔法swizzle,交换两个方法或者改变一个方法的实现
- shell脚本实现关于可执行exec文件的自动打包
- ubuntu下开机自动运行脚本、定时运行脚本
- 脚本监测多ISP链路实现自动切换(初级脚本)
- 在C#中调用VBScript、javascript等脚本的实现
- CentOS开机自动运行程序的脚本
- linux脚本实现ssh自动登陆远程桌面
- Android实现开机自动运行程序
- 无密钥登录的自动脚本实现(ssh-copy-id、expect免交互输入脚本)
- Android 电话的反射调用机制实现自动接听电话
- 腾讯云cdn自动刷新api调用功能实现-python
- ASP.NET中服务器控件Button调用javascript代码运用正则表达式验证TextBox中输入的是否为正整数或正小数,符合要求则继续运行服务器端代码功能的实现