将以下文本内容存放入NUTCH_HOME\bin 目录下 , 命名随便,设置以下JAVA_HOME 和 NUTCH_HOME,然后在命令行运行 %NUTCH_HOME%\bin\nutch
@echo offset JAVA_HEAP_MAX="-Xmx512M"if not "%1"=="" goto INIT else goto echoMSG:echoMSG echo Title:欢迎使用北京线点科技 Nutch 运行脚本 echo Author:jaddy0302 mail:jaddy0302@126.com QQ:5622928 echo Site:http://www.xd-tech.com.cn 线点科技 专业垂直搜索引擎产品 echo Nutch Version: 0.7.2 echo Usage: nutch COMMAND echo where COMMAND is one of: echo crawl one-step crawler for intranets echo admin database administration, including creation echo inject inject new urls into the database echo generate generate new segments to fetch echo fetchlist print the fetchlist of a segment echo fetch fetch a segment's pages echo parse parse a segment's pages echo index run the indexer on a segment's fetcher output echo merge merge several segment indexes echo dedup remove duplicates from a set of segment indexes echo updatedb update db from segments after fetching echo updatesegs update segments with link data from the db echo mergesegs merge multiple segments into a single segment echo readdb examine arbitrary fields of the database echo analyze adjust database link-analysis scoring echo prune prune segment index(es) of unwanted content echo segread read, fix and dump segment data echo segslice append, join and slice segment data echo server run a search server echo namenode run the NDFS namenode echo datanode run an NDFS datanode echo ndfs run an NDFS admin client echo jobtracker run the MapReduce job Tracker node echo tasktracker run a MapReduce task Tracker node echo or echo CLASSNAME run the class named CLASSNAME echo Most commands print help when invoked w/o parameters. goto end;:INIT set NUTCH_HOME=C:\work\nutch-0.7.2 if "%NUTCH_HOME%"=="" set NUTCH_HOME=.. set CLASSPATH=%NUTCH_HOME%;%NUTCH_HOME%\conf;%NUTCH_HOME%\plugin @echo @echo off>setclasspath.bat for %%i in (%NUTCH_HOME%\nutch-*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat;& for %%i in (%NUTCH_HOME%\lib\*.jar) do @echo set CLASSPATH=%%CLASSPATH%%;%%i>>setclasspath.bat; goto EXEC:EXEC call setclasspath if "%1" == "crawl" set CLASS=org.apache.nutch.tools.CrawlTool if "%1" == "admin" set CLASS=org.apache.nutch.tools.WebDBAdminTool if "%1" == "inject" set CLASS=org.apache.nutch.db.WebDBInjector if "%1" == "generate" set CLASS=org.apache.nutch.tools.FetchListTool if "%1" == "fetchlist" set CLASS=org.apache.nutch.pagedb.FetchListEntry if "%1" == "fetch" set CLASS=org.apache.nutch.fetcher.Fetcher if "%1" == "parse" set CLASS=org.apache.nutch.tools.ParseSegment if "%1" == "index" set CLASS=org.apache.nutch.indexer.IndexSegment if "%1" == "merge" set CLASS=org.apache.nutch.indexer.IndexMerger if "%1" == "dedup" set CLASS=org.apache.nutch.indexer.DeleteDuplicates if "%1" == "updatedb" set CLASS=org.apache.nutch.tools.UpdateDatabaseTool if "%1" == "updatesegs" set CLASS=org.apache.nutch.tools.UpdateSegmentsFromDb if "%1" == "mergesegs" set CLASS=org.apache.nutch.tools.SegmentMergeTool if "%1" == "readdb" set CLASS=org.apache.nutch.db.WebDBReader if "%1" == "prune" set CLASS=org.apache.nutch.tools.PruneIndexTool if "%1" == "segread" set CLASS=org.apache.nutch.segment.SegmentReader if "%1" == "segslice" set CLASS=org.apache.nutch.segment.SegmentSlicer if "%1" == "analyze" set CLASS=org.apache.nutch.tools.LinkAnalysisTool if "%1" == "server" set CLASS=org.apache.nutch.searcher.DistributedSearch$Server if "%1" == "namenode" set CLASS=org.apache.nutch.ndfs.NDFS$NameNode if "%1" == "datanode" set CLASS=org.apache.nutch.ndfs.NDFS$DataNode if "%1" == "ndfs" set CLASS=org.apache.nutch.fs.TestClient if "%1" == "jobtracker" set CLASS=org.apache.nutch.mapReduce.JobTracker if "%1" == "tasktracker" set CLASS=org.apache.nutch.mapReduce.TaskTracker call "%JAVA_HOME%\bin\java" %JAVA_HEAP_MAX% -classpath "%CLASSPATH%" %CLASS% %2 %3 %4 %5 %6 %7 %8 %9:end
