超大数据排序

1、分
内存中维护一个极小的核心缓冲区memBuffer,将大文件bigdata按行读入,搜集到memBuffer满或者大文件读完时,对memBuffer中的数据调用内排进行排序,排序后将有序结果写入磁盘文件bigdata.xxx.part.sorted. 循环利用memBuffer直到大文件处理完毕,得到n个有序的磁盘文件
2、合
现在有了n个有序的小文件,怎么合并成1个有序的大文件?把所有小文件读入内存,然后内排?(⊙o⊙)… no!
利用如下原理进行归并排序:
贴代码:
产生大数据文件
public class Test {
public static void main(String[] args)throws IOException {
File file =new File("E:/排序/source.txt");
int numCount =10000000;
Random r =new Random();
if (file.exists()) file.delete();
FileWriter fw =new FileWriter(file);
for (int i =0; i < numCount; i++) {
fw.write(r.nextInt() +"\n");
}
fw.close();
}
}
2、排序
说明 此类从网上抄写 ,合并有mergeSort 和 merge
mergeSort方法感觉逻辑有点绕, 即另外实现了merge 方法
public class Sort {
public static void main(String[] args)throws IOException {
File file =new File("E:/排序/source.txt");
BufferedReader fr =new BufferedReader(new FileReader(file));//源数据文件读取。
final int SIZE =10000;//这里是定义我们将源文件中以10000条记录作为单位进行分割。
int[] nums =new int[SIZE];//临时存放分割时的记录
List fileNames =new ArrayList();//保存所有分割文件的名称
int index =0;
while (true) {
String num = fr.readLine();//从原文件中读取一条记录
if (num ==null) {//如果读取完毕后,进行一次排序并保存
fileNames.add(sortAndSave(nums, index));
break;
}
nums[index] = Integer.valueOf(num);
index++;
if (index == SIZE) {//当nums里面读的数字到达长度边界时,排序,存储
fileNames.add(sortAndSave(nums, index));//sortAndSave是将nums中前index条记录先快速排序,然后存入文件,最好将文件名返回。
index =0;//重置index
}
}
fr.close();
merge(fileNames);
}
public static void merge(List fileNames)throws IOException {
List list =new ArrayList<>(fileNames.size());
List brs =new ArrayList<>(fileNames.size());
for (String fileName : fileNames) {
File file =new File(fileName);
BufferedReader br =new BufferedReader(new FileReader(file));
String num = br.readLine();
brs.add(br);
list.add(Integer.valueOf(num));
}
File resultFile =new File("E:/排序/result.txt");
BufferedWriter bw =new BufferedWriter(new FileWriter(resultFile));
while (fileNames.size() >0) {
int min = Collections.min(list);
bw.write(min +"\n");
int i = list.indexOf(min);
String num = brs.get(i).readLine();
if (num ==null) {
String fileName = fileNames.get(i);
File file =new File(fileName);
if (file.exists()) {
file.delete();
}
fileNames.remove(i);
brs.get(i).close();
brs.remove(i);
list.remove(i);
}else {
list.set(i, Integer.valueOf(num));
}
}
bw.close();
}
public static StringsortAndSave(int[] nums, int size)throws IOException {
quicksort.sort(nums, 0, size -1);
String fileName ="E:/排序/sort" + System.nanoTime() +".txt";
File rf =new File(fileName);
BufferedWriter bw =new BufferedWriter(new FileWriter(rf));
for (int i =0; i < nums.length; i++) bw.write(nums[i] +"\n");
bw.close();
return fileName;
}
public static void mergeSort(List fileNames)throws IOException {
List tempFileNames =new ArrayList();
for (int i =0; i < fileNames.size(); i++) {
String resultFileName ="E:/排序/sort" + System.nanoTime() +".txt";
File resultFile =new File(resultFileName);
tempFileNames.add(resultFileName);
BufferedWriter bw =new BufferedWriter(new FileWriter(resultFile));
File file1 =new File(fileNames.get(i++));
BufferedReader br1 =new BufferedReader(new FileReader(file1));
if (i < fileNames.size()) {
File file2 =new File(fileNames.get(i));
BufferedReader br2 =new BufferedReader(new FileReader(file2));
int num1 =0;
int num2 =0;
boolean isFirst =true;
boolean firstNext =true;
String numVal1 =null, numVal2 =null;
for (; ; ) {
if (isFirst) {
numVal1 = br1.readLine();
numVal2 = br2.readLine();
num1 = Integer.valueOf(numVal1);
num2 = Integer.valueOf(numVal2);
isFirst =false;
}else if (firstNext)
numVal1 = br1.readLine();
else
numVal2 = br2.readLine();
if (numVal1 !=null && numVal2 !=null) {
if (firstNext) {
num1 = Integer.valueOf(numVal1);
}else {
num2 = Integer.valueOf(numVal2);
}
if (num1 < num2) {
bw.write(num1 +"\n");
firstNext =true;
}else {
bw.write(num2 +"\n");
firstNext =false;
}
}else {
if (numVal1 !=null) bw.write(numVal1 +"\n");
if (numVal2 !=null) bw.write(numVal2 +"\n");
break;
}
}
while (true) {
numVal2 = br2.readLine();
;
if (numVal2 !=null) bw.write(numVal2 +"\n");
else break;
}
br2.close();
file2.delete();
}
while (true) {
String numVal1 = br1.readLine();
if (numVal1 !=null) {
bw.write(numVal1 +"\n");
}else break;
}
br1.close();
file1.delete();
bw.close();
}
int size = tempFileNames.size();
if (size >1) {
mergeSort(tempFileNames);
}else if (size ==1) {
File file =new File(tempFileNames.get(0));
file.renameTo(new File("E:/排序/result.txt"));
}
}
}
//快速排序
class quicksort {
public static void sort(int data[], int low, int hight) {
quicksort qs =new quicksort();
qs.data = data;
qs.sort(low, hight);
}
public int data[];
private int partition(int sortArray[], int low, int hight) {
int key = sortArray[low];
while (low < hight) {
while (low < hight && sortArray[hight] >= key) hight--;
sortArray[low] = sortArray[hight];
while (low < hight && sortArray[low] <= key) low++;
sortArray[hight] = sortArray[low];
}
sortArray[low] = key;
return low;
}
public void sort(int low, int hight) {
if (low < hight) {
int result = partition(data, low, hight);
sort(low, result -1);
sort(result +1, hight);
}
}
}