Hadoop学习-第二天(MapReduce原理及WordCou

2018-07-02  本文已影响14人  风笑天2013

2018-07-01

1、给合WordCount程序分析MapReduce的运行原理

WordCount数据处理流程

2、WordCount程序

// WordCountMapper.java文件     Map阶段

package cn.xia.java.wordcount;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMapper extends Mapper {

        private static IntWritable one = new IntWritable(1);

        private static Text word = new Text();

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

                String v1 = value.toString();

                String output[] = v1.split(" ");

                for (String s : output) {

                        word.set(s);

                        context.write(word, one);

                }

        }

}

// WordCountReducer.java文件    Reduce阶段

package cn.xia.java.wordcount;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer{

        private static IntWritable num = new IntWritable();

        protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {

                int count = 0;

                for (IntWritable val : values) {

                        count += val.get();

                }

                num.set(count);

                context.write(key, num);

        }

}

// WordCount.java文件       主程序,组装成Job

package cn.xia.java.wordcount;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {

        public static void main(String[] args) throws Exception  {

                // 删除Reduce的输出文件夹

                Configuration conf = new Configuration(); // 读取本地配置文件,读取本地配置

                FileSystem.get(conf).delete(new Path(args[1]), true);

                // Job

                Job job = Job.getInstance(conf);

                job.setJarByClass(WordCount.class);

                job.setMapperClass(WordCountMapper.class);

                job.setMapOutputKeyClass(Text.class);

                job.setMapOutputValueClass(IntWritable.class);

                FileInputFormat.setInputPaths(job, new Path(args[0])); 

                job.setReducerClass(WordCountReducer.class);

                job.setOutputKeyClass(Text.class);

                job.setOutputValueClass(IntWritable.class);

                FileOutputFormat.setOutputPath(job, new Path(args[1]));

                job.waitForCompletion(true);

        }

}

上一篇下一篇

猜你喜欢

热点阅读