倒排索引案例(多Job串联)

2019-03-09  本文已影响0人  bullion

需求

需求分析

第一次处理

OneIndexMapper

public class OneIndexMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    String name;

    @Override

    protected void setup(Context context) throws IOException, InterruptedException {

        // 获取文件名称

        FileSplit inputSplit = (FileSplit) context.getInputSplit();

        name = inputSplit.getPath().getName();

    }

    Text k = new Text();

    IntWritable v = new IntWritable(1);

    @Override

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        // 1 获取一行

        String line = value.toString();

        // 2 切割

        String[] fields = line.split(" ");

        // 3 写出

        for (String word : fields) {

            k.set(word + "--" + name);

            context.write(k, v);

        }

    }

}

OneIndexReducer

public class OneIndexReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

    @Override

    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        // 1 累加求和

        int sum = 0;

        for (IntWritable value : values) {

            sum += value.get();

        }

        IntWritable v = new IntWritable();

        v.set(sum);

        // 2 写出

        context.write(key, v);

    }

}

OneIndexDriver

public class OneIndexDriver {

    public static void main(String[] args) throws Exception {

        args = new String[]{"e:/input/inputoneindex", "e:/output5"};

        Configuration configuration = new Configuration();

        Job job = Job.getInstance(configuration);

        job.setJarByClass(OneIndexDriver.class);

        job.setMapperClass(OneIndexMapper.class);

        job.setReducerClass(OneIndexReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);

        System.exit(result ? 0 : 1);

    }

}


第二次处理

TwoIndexMapper

public class TwoIndexMapper extends Mapper<LongWritable, Text, Text, Text> {

    Text k = new Text();

    Text v = new Text();

    @Override

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        // 1 获取一行

        String line = value.toString();

        // 2 切割

        String[] fields = line.split("--");

        // 3 封装

        k.set(fields[0]);

        v.set(fields[1]);

        // 4 写出

        context.write(k, v);

    }

}

TwoIndexReducer

public class TwoIndexReducer extends Reducer<Text, Text, Text, Text> {

    Text v = new Text();

    @Override

    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        // 1 拼接字符串

        StringBuffer sb = new StringBuffer();

        for (Text value : values) {

            sb.append(value.toString().replace("\t", "-->") + "\t");

        }

        v.set(sb.toString());

        // 2 写出

        context.write(key, v);

    }

}

TwoIndexDriver

public class TwoIndexDriver {

    public static void main(String[] args) throws Exception {

        args = new String[]{"e:/input/inputtwoindex", "e:/output6"};

        Configuration configuration = new Configuration();

        Job job = Job.getInstance(configuration);

        job.setJarByClass(TwoIndexDriver.class);

        job.setMapperClass(TwoIndexMapper.class);

        job.setReducerClass(TwoIndexReducer.class);

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);

        System.exit(result ? 0 : 1);

    }

}

上一篇 下一篇

猜你喜欢

热点阅读