一 代码
  
 
  Wordcount.java
  
 
  import java.io.IOException;
  
 
  import java.util.StringTokenizer;
  
 
  import org.apache.hadoop.conf.Configuration;
  
 
  import org.apache.hadoop.fs.Path;
  
 
  import org.apache.hadoop.io.IntWritable;
  
 
  import org.apache.hadoop.io.LongWritable;
  
 
  import org.apache.hadoop.io.Text;
  
 
  import org.apache.hadoop.mapreduce.Job;
  
 
  import org.apache.hadoop.mapreduce.Mapper;
  
 
  import org.apache.hadoop.mapreduce.Reducer;
  
 
  import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  
 
  import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  
 
  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  
 
  import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  
 
   
  
 
  public class WordCount {
  
 
  public static class WordCountMap extends
  
 
  Mapper<LongWritable, Text, Text, IntWritable> {
  
 
  private final IntWritable one = new IntWritable(1);
  
 
  private Text word = new Text();
  
 
   
  
 
  public void map(LongWritable key, Text value, Context context)
  
 
  throws IOException, InterruptedException {
  
 
  String line = value.toString();
  
 
  StringTokenizer token = new StringTokenizer(line);
  
 
  while (token.hasMoreTokens()) {
  
 
  word.set(token.nextToken());
  
 
  context.write(word, one);
  
 
  }
  
 
  }
  
 
  }
  
 
   
  
 
  public static class WordCountReduce extends
  
 
  Reducer<Text, IntWritable, Text, IntWritable> {
  
 
  public void reduce(Text key, Iterable<IntWritable> values,
  
 
  Context context) throws IOException, InterruptedException {
  
 
  int sum = 0;
  
 
  for (IntWritable val : values) {
  
 
  sum += val.get();
  
 
  }
  
 
  context.write(key, new IntWritable(sum));
  
 
  }
  
 
  }
  
 
   
  
 
  public static void main(String[] args) throws Exception {
  
 
  Configuration conf = new Configuration();
  
 
  Job job = new Job(conf);
  
 
  job.setJarByClass(WordCount.class);
  
 
  job.setJobName("wordcount");
  
 
  job.setOutputKeyClass(Text.class);
  
 
  job.setOutputValueClass(IntWritable.class);
  
 
  job.setMapperClass(WordCountMap.class);
  
 
  job.setReducerClass(WordCountReduce.class);
  
 
  job.setInputFormatClass(TextInputFormat.class);
  
 
  job.setOutputFormatClass(TextOutputFormat.class);
  
 
  FileInputFormat.addInputPath(job, new Path(args[0]));
  
 
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  
 
  job.waitForCompletion(true);
  
 
  }
  
 
  }
  
 
   
  
 
  二 构建运行
  
 
  1、编译
  
 
  [root@localhost word_count]# ll
  
 
  total 4
  
 
  drwxr-xr-x. 2 root root 101 Aug 20 14:27 word_count_class
  
 
  -rwxr-xr-x. 1 root root 2132 Aug 20 14:22 WordCount.java
  
 
  [root@localhost word_count]# javac -classpath /opt/hadoop-1.2.1/hadoop-core-1.2.1.jar:/opt/hadoop-1.2.1/lib/commons-cli-1.2.jar -d word_count_class/ WordCount.java
  
 
  [root@localhost word_count]# cd word_count_class/
  
 
  [root@localhost word_count_class]# ls
  
 
  WordCount.class WordCount$WordCountMap.class WordCount$WordCountReduce.class
  
 
  2、打包
  
 
  [root@localhost word_count_class]# jar -cvf wordcount.jar *.class
  
 
  added manifest
  
 
  adding: WordCount.class(in = 1539) (out= 772)(deflated 49%)
  
 
  adding: WordCount$WordCountMap.class(in = 1829) (out= 767)(deflated 58%)
  
 
  adding: WordCount$WordCountReduce.class(in = 1645) (out= 687)(deflated 58%)
  
 
  [root@localhost word_count_class]# ls
  
 
  WordCount.class wordcount.jar WordCount$WordCountMap.class WordCount$WordCountReduce.class
  
 
  3、准备输入文件file1和输入文件file2
  
 
  [root@localhost input]# ls
  
 
  file1 file2
  
 
  file1的内容:
  
 
  hello world
  
 
  hello hadoop
  
 
  hadoop file system
  
 
  hadoop java api
  
 
  hello java
  
 
  file2的内容:
  
 
  new file
  
 
  hadoop file
  
 
  hadoop new world
  
 
  hadoop free home
  
 
  hadoop free school
  
 
  4、将输入文件提交HDFS
  
 
  [root@localhost word_count]# hadoop fs -mkdir input_wordcount
  
 
  Warning: $HADOOP_HOME is deprecated.
  
 
   
  
 
  [root@localhost word_count]# hadoop fs -put input/* input_wordcount/
  
 
  Warning: $HADOOP_HOME is deprecated.
  
 
  [root@localhost word_count]# hadoop fs -ls
  
 
  Warning: $HADOOP_HOME is deprecated.
  
 
   
  
 
  Found 2 items
  
 
  drwxr-xr-x - root supergroup 0 2017-08-20 12:44 /user/root/input
  
 
  drwxr-xr-x - root supergroup 0 2017-08-20 14:41 /user/root/input_wordcount
  
 
  [root@localhost word_count]# hadoop fs -ls input_wordcount
  
 
  Warning: $HADOOP_HOME is deprecated.
  
 
   
  
 
  Found 2 items
  
 
  -rw-r--r-- 3 root supergroup 71 2017-08-20 14:41 /user/root/input_wordcount/file1
  
 
  -rw-r--r-- 3 root supergroup 74 2017-08-20 14:41 /user/root/input_wordcount/file2
  
 
  [root@localhost word_count]# hadoop fs -cat input_wordcount/file1
  
 
  Warning: $HADOOP_HOME is deprecated.
  
 
   
  
 
  hello world
  
 
  hello hadoop
  
 
  hadoop file system
  
 
  hadoop java api
  
 
  hello java
  
 
  5、任务提交
  
 
  [root@localhost word_count]# hadoop jar word_count_class/wordcount.jar WordCount input_wordcount output_wordcount
  
 
  Warning: $HADOOP_HOME is deprecated.
  
 
   
  
 
  17/08/20 14:50:30 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
  
 
  17/08/20 14:50:31 INFO input.FileInputFormat: Total input paths to process : 2
  
 
  17/08/20 14:50:31 INFO util.NativeCodeLoader: Loaded the native-hadoop library
  
 
  17/08/20 14:50:31 WARN snappy.LoadSnappy: Snappy native library not loaded
  
 
  17/08/20 14:50:33 INFO mapred.JobClient: Running job: job_201708201140_0001
  
 
  17/08/20 14:50:34 INFO mapred.JobClient: map 0% reduce 0%
  
 
  17/08/20 14:51:20 INFO mapred.JobClient: map 100% reduce 0%
  
 
  17/08/20 14:51:45 INFO mapred.JobClient: map 100% reduce 100%
  
 
  17/08/20 14:51:51 INFO mapred.JobClient: Job complete: job_201708201140_0001
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Counters: 29
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Job Counters
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Launched reduce tasks=1
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=81389
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Launched map tasks=2
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Data-local map tasks=2
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=24253
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: File Output Format Counters
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Bytes Written=83
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: FileSystemCounters
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: FILE_BYTES_READ=301
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: HDFS_BYTES_READ=381
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: FILE_BYTES_WRITTEN=156847
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=83
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: File Input Format Counters
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Bytes Read=145
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Map-Reduce Framework
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Map output materialized bytes=307
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Map input records=10
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Reduce shuffle bytes=307
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Spilled Records=50
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Map output bytes=245
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Total committed heap usage (bytes)=246751232
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: CPU time spent (ms)=5290
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Combine input records=0
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: SPLIT_RAW_BYTES=236
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Reduce input records=25
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Reduce input groups=11
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Combine output records=0
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Physical memory (bytes) snapshot=382996480
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Reduce output records=11
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Virtual memory (bytes) snapshot=2590666752
  
 
  17/08/20 14:51:52 INFO mapred.JobClient: Map output records=25
  
 
  6、查看结果
  
 
  [root@localhost word_count]# hadoop fs -ls output_wordcount
  
 
  Warning: $HADOOP_HOME is deprecated.
  
 
   
  
 
  Found 3 items
  
 
  -rw-r--r-- 3 root supergroup 0 2017-08-20 14:51 /user/root/output_wordcount/_SUCCESS
  
 
  drwxr-xr-x - root supergroup 0 2017-08-20 14:50 /user/root/output_wordcount/_logs
  
 
  -rw-r--r-- 3 root supergroup 83 2017-08-20 14:51 /user/root/output_wordcount/part-r-00000
  
 
  [root@localhost word_count]# hadoop fs -cat output_wordcount/part-r-00000
  
 
  Warning: $HADOOP_HOME is deprecated.
  
 
   
  
 
  api 1
  
 
  file 3
  
 
  free 2
  
 
  hadoop 7
  
 
  hello 3
  
 
  home 1
  
 
  java 2
  
 
  new 2
  
 
  school 1
  
 
  system 1
  
 
  world 2