MapReduce编程实例(三),数据去重

2019-12-26  本文已影响0人  Mr_K_

实验内容为:MapReduce编程实例(三),数据去重


输入:
2013-11-01 aa
2013-11-02 bb
2013-11-03 cc
2013-11-04 aa
2013-11-05 dd
2013-11-06 dd
2013-11-07 aa
2013-11-09 cc
2013-11-10 ee

2013-11-01 bb
2013-11-02 33
2013-11-03 cc
2013-11-04 bb
2013-11-05 23
2013-11-06 dd
2013-11-07 99
2013-11-09 99
2013-11-10 ee


头文件:

3.  import java.io.IOException;  
4.  import java.util.HashSet;  
5.  import java.util.StringTokenizer;  
6.    
7.  import org.apache.hadoop.conf.Configuration;  
8.  import org.apache.hadoop.fs.Path;  
9.  import org.apache.hadoop.io.Text;  
10. import org.apache.hadoop.mapreduce.Job;  
11. import org.apache.hadoop.mapreduce.Mapper;  
12. import org.apache.hadoop.mapreduce.Reducer;  
13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
14. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
15. import org.apache.hadoop.util.GenericOptionsParser;  

其他部分:

public class Dedup {  
23.   
24.     public static class MyMapper extends Mapper<Object, Text, Text, Text>{  
25.   
26.         @Override  
27.         protected void map(Object key, Text value, Context context)  
28.                 throws IOException, InterruptedException {  
29.                 context.write(value, new Text(""));  
30.         }  
31.     }  
32.       
33.     public static class MyReducer extends Reducer<Text, Text, Text, Text>{  
34.   
35.         @Override  
36.         protected void reduce(Text key, Iterable<Text> value,  
37.                 Context context)  
38.                 throws IOException, InterruptedException {  
39.             context.write(key, new Text(""));  
40.         }  
41.     }  
42.       
43.       
44.     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{  
45.         Configuration conf = new Configuration();  
46.         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();  
47.           
48.         if(otherArgs.length<2){  
49.             System.out.println("parameter errors!");  
50.             System.exit(2);  
51.         }  
52.           
53.         Job job = new org.apache.hadoop.mapreduce.Job(conf, "Dedup");  
54.         job.setJarByClass(Dedup.class);  
55.         job.setMapperClass(MyMapper.class);  
56.         job.setCombinerClass(MyReducer.class);  
57.         job.setReducerClass(MyReducer.class);  
58.         job.setOutputKeyClass(Text.class);  
59.         job.setOutputValueClass(Text.class);  
60.           
61.         FileInputFormat.addInputPath(job,new ath(otherArgs[0]));  
62.         FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));  
63.           
64.         System.exit(job.waitForCompletion(true)?0:1);  
65.           
66.     }  
67.       
68. }  


输出结果:
2013-11-01 aa
2013-11-01 bb
2013-11-02 33
2013-11-02 bb
2013-11-03 cc
2013-11-03 cc
2013-11-04 98
2013-11-04 aa
2013-11-04 bb
2013-11-05 23
2013-11-05 93
2013-11-05 dd
2013-11-06 99
2013-11-06 dd
2013-11-07 92
2013-11-07 99
2013-11-07 aa
2013-11-09 99
2013-11-09 aa
2013-11-09 cc
2013-11-10 ee

上一篇下一篇

猜你喜欢

热点阅读