MapReduce之多字段排序(String型)
2020-04-02 本文已影响0人
还闹不闹
1、需求描述:输入有3列,对3列值进行排序
inputFile:
ab d 1a
b1 d1 1a
c2 a1 1a
d3 e3 1a
d2 a4 1b
b1 a2 1b
c1 b2 1c
2、代码
import java.io.DataInput;
import java.io.DataOutput;
import java.io.CharArrayWriter;
import java.io.CharArrayReader;
import java.util.StringTokenizer;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TripleIntSortApp {
public static void main(String[] args) throws Exception {
// 创建Job对象
Job job = Job.getInstance(new Configuration(), TripleIntSortApp.class.getSimpleName());
// 设置运行job的类
job.setJarByClass(TripleIntSortApp.class);
// 设置输入的路径
FileInputFormat.setInputPaths(job, args[0]);
// 设置mapper类
job.setMapperClass(TripleStringSortMapper.class);
// 设置map输出的key value
job.setMapOutputKeyClass(TripleString.class);
job.setMapOutputValueClass(NullWritable.class);
// 设置reduce类
job.setReducerClass(TripleStringSortReducer.class);
// 设置reduce输出的key value值
job.setOutputKeyClass(TripleString.class);
job.setOutputValueClass(NullWritable.class);
// 设置输出的路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 提交job
job.waitForCompletion(true);
}
public static class TripleStringSortMapper extends Mapper<LongWritable, Text, TripleString, NullWritable>{
TripleString k3 = new TripleString();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, TripleString, NullWritable>.Context context) throws IOException, InterruptedException {
String[] splited = value.toString().split("\t");
k3.set(splited[0], splited[1], splited[2]);
context.write(k3, NullWritable.get());
System.out.println("Mapper-----第一个数:"+k3.first+" 第二个数:"+k3.second+" 第三个数:"+k3.third);
}
}
public static class TripleStringSortReducer extends Reducer<TripleString, NullWritable, TripleString, NullWritable>{
int i=1;
@Override
protected void reduce(TripleString k3, Iterable<NullWritable> arg1, Reducer<TripleString, NullWritable, TripleString, NullWritable>.Context context) throws IOException, InterruptedException {
context.write(k3, NullWritable.get());
System.out.println("调用次数"+(i++));
System.out.println("Reducer-----第一个数:"+k3.first+" 第二个数:"+k3.second+" 第三个数:"+k3.third);
}
}
public static class TripleString implements WritableComparable<TripleString>{
String first;
String second;
String third;
public void write(DataOutput out) throws IOException {
out.writeUTF(first);
out.writeUTF(second);
out.writeUTF(third);
}
public void set(String s1, String s2, String s3){
this.first = s1;
this.second = s2;
this.third = s3;
}
public void readFields(DataInput in) throws IOException {
this.first = in.readUTF();
this.second = in.readUTF();
this.third = in.readUTF();
}
// 正序:从小到大排序
// public int compareTo(TripleString o) {
// //int r1 = this.first - o.first;
// int r1 = (this.first).compareTo(o.first);
// if(r1 < 0){
// return -1;
// }else if(r1 > 0){
// return 1;
// }
// //int r2 = this.second - o.second;
// int r2 = (this.second).compareTo(o.second);
// if(r2 < 0){
// return -1;
// }else if(r2 > 0){
// return 1;
// }
// //int r3 = this.third - o.third;
// int r3 = (this.third).compareTo(o.third);
// return (r3 < 0 ? -1 : (r3 > 0 ? 1: 0));
// }
// 倒序:从大到小排序
public int compareTo(TripleString o) {
// int r1 = this.first - o.first;
int r1 = (this.first).compareTo(o.first);
if(r1 > 0){
return -1;
}else if(r1 < 0){
return 1;
}
// int r2 = this.second - o.second;
int r2 = (this.second).compareTo(o.second);
if(r2 > 0){
return -1;
}else if(r2 < 0){
return 1;
}
// int r3 = this.third - o.third;
int r3 = (this.third).compareTo(o.third);
return (r3 > 0 ? -1 : (r3 < 0 ? 1: 0));
}
@Override
public String toString() {
return this.first+"\t"+this.second+"\t"+this.third;
}
}
}
3、输出结果
d3 e3 1a
d2 a4 1b
c2 a1 1a
c1 b2 1c
b1 d1 1a
b1 a2 1b
ab d 1a