【算法笔记】各种排序基础(续)
以下内容部分来自极客时间 -王争-数据结构与算法之美 的学习笔记,在上面有延伸拓展
算法和数据结构系列文章:
【算法和数据结构基础知识】各种排序基础
【算法和数据结构基础知识】树和二叉树相关基础
【算法和数据结构基础知识】字符串匹配
【算法和数据结构基础知识】线性表基础
【算法和数据结构基础知识】Some pieces of code
【算法和数据结构基础知识】队列相关基础
【算法和数据结构基础知识】C语言基础
在上次的文章【算法和数据结构基础知识】各种排序基础中讲述了一些基础的排序算法,(冒泡、插入、选择 时间复杂度:O(n^2) 基于比较)本篇将继续进行整理学习,包含(快排、归并 时间复杂度 :O(nlogn) 基于比较)。
一、分治思想
1.分治思想
分治,就是分而治之,将一个大问题分解成小的子问题来解决,小的子问题解决了,大问题也就解决了。
2.分治与递归的区别
分治算法一般都用递归来实现的。分治是一种解决问题的处理思想,递归是一种编程技巧。
二、归并排序
1.算法原理
先把数组从中间分成前后两部分,然后对前后两部分分别进行排序,再将排序好的两部分合并到一起,这样整个数组就有序了。这就是归并排序的核心思想。如何用递归实现归并排序呢?
写递归代码的技巧就是分写得出递推公式,然后找到终止条件,最后将递推公式翻译成递归代码。递推公式怎么写?如下
递推公式:mergeSort(lo…hi) = merge(mergeSort(lo…mid), mergeSort(mid+1…hi))
终止条件:lo >= hi 不用再继续分解
2.代码实现
参考《算法 4》里提供的java实现:自顶向下的递归版本的归并排序,时间复杂度nlog n
这是对应的例子:
/******************************************************************************
* Compilation: javac Merge.java
* Execution: java Merge < input.txt
* Dependencies: StdOut.java StdIn.java
* Data files: https://algs4.cs.princeton.edu/22mergesort/tiny.txt
* https://algs4.cs.princeton.edu/22mergesort/words3.txt
*
* Sorts a sequence of strings from standard input using mergesort.
*
* % more tiny.txt
* S O R T E X A M P L E
*
* % java Merge < tiny.txt
* A E E L M O P R S T X [ one string per line ]
*
* % more words3.txt
* bed bug dad yes zoo ... all bad yet
*
* % java Merge < words3.txt
* all bad bed bug dad ... yes yet zoo [ one string per line ]
*
******************************************************************************/
package com.anthony.algo;
import edu.princeton.cs.algs4.MergeX;
import edu.princeton.cs.algs4.StdIn;
import edu.princeton.cs.algs4.StdOut;
/**
* The {@code Merge} class provides static methods for sorting an
* array using a top-down, recursive version of <em>mergesort</em>.
* <p>
* This implementation takes Θ(<em>n</em> log <em>n</em>) time
* to sort any array of length <em>n</em> (assuming comparisons
* take constant time). It makes between
* ~ ½ <em>n</em> log<sub>2</sub> <em>n</em> and
* ~ 1 <em>n</em> log<sub>2</sub> <em>n</em> compares.
* <p>
* This sorting algorithm is stable.
* It uses Θ(<em>n</em>) extra memory (not including the input array).
* <p>
* For additional documentation, see
* <a href="https://algs4.cs.princeton.edu/22mergesort">Section 2.2</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
* For an optimized version, see {@link MergeX}.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class Merge {
// This class should not be instantiated.
private Merge() { }
// stably merge a[lo .. mid] with a[mid+1 ..hi] using aux[lo .. hi]
private static void merge(Comparable[] a, Comparable[] aux, int lo, int mid, int hi) {
// precondition: a[lo .. mid] and a[mid+1 .. hi] are sorted subarrays
assert isSorted(a, lo, mid);
assert isSorted(a, mid+1, hi);
// copy to aux[]
for (int k = lo; k <= hi; k++) {
aux[k] = a[k];
}
// merge back to a[]
int i = lo, j = mid+1;
for (int k = lo; k <= hi; k++) {
if (i > mid) a[k] = aux[j++]; //左半边用尽(取右边元素)
else if (j > hi) a[k] = aux[i++]; //右半边用尽(取左边元素)
else if (less(aux[j], aux[i])) a[k] = aux[j++];//右半边当前元素小于左半边当前元素(取右边元素)
else a[k] = aux[i++];//右半边当前元素大于等于左半边当前元素(取左边元素)
}
// postcondition: a[lo .. hi] is sorted
assert isSorted(a, lo, hi);
}
// mergesort a[lo..hi] using auxiliary array aux[lo..hi]
private static void mergeSort(Comparable[] a, Comparable[] aux, int lo, int hi) {
if (hi <= lo) return;
int mid = lo + (hi - lo) / 2;
mergeSort(a, aux, lo, mid);
mergeSort(a, aux, mid + 1, hi);
merge(a, aux, lo, mid, hi);
}
/**
* Rearranges the array in ascending order, using the natural order.
* @param a the array to be sorted
*/
public static void mergeSort(Comparable[] a) {
Comparable[] aux = new Comparable[a.length];
mergeSort(a, aux, 0, a.length-1);
assert isSorted(a);
}
/***************************************************************************
* Helper sorting function.
***************************************************************************/
// is v < w ?
private static boolean less(Comparable v, Comparable w) {
return v.compareTo(w) < 0;
}
/***************************************************************************
* Check if array is sorted - useful for debugging.
***************************************************************************/
private static boolean isSorted(Comparable[] a) {
return isSorted(a, 0, a.length - 1);
}
private static boolean isSorted(Comparable[] a, int lo, int hi) {
for (int i = lo + 1; i <= hi; i++)
if (less(a[i], a[i-1])) return false;
return true;
}
/***************************************************************************
* Index mergesort.
***************************************************************************/
// stably merge a[lo .. mid] with a[mid+1 .. hi] using aux[lo .. hi]
private static void merge(Comparable[] a, int[] index, int[] aux, int lo, int mid, int hi) {
// copy to aux[]
for (int k = lo; k <= hi; k++) {
aux[k] = index[k];
}
// merge back to a[]
int i = lo, j = mid+1;
for (int k = lo; k <= hi; k++) {
if (i > mid) index[k] = aux[j++];
else if (j > hi) index[k] = aux[i++];
else if (less(a[aux[j]], a[aux[i]])) index[k] = aux[j++];
else index[k] = aux[i++];
}
}
/**
* Returns a permutation that gives the elements in the array in ascending order.
* @param a the array
* @return a permutation {@code p[]} such that {@code a[p[0]]}, {@code a[p[1]]},
* ..., {@code a[p[N-1]]} are in ascending order
*/
public static int[] indexSort(Comparable[] a) {
int n = a.length;
int[] index = new int[n];
for (int i = 0; i < n; i++)
index[i] = i;
int[] aux = new int[n];
mergeSort(a, index, aux, 0, n-1);
return index;
}
// mergesort a[lo..hi] using auxiliary array aux[lo..hi]
private static void mergeSort(Comparable[] a, int[] index, int[] aux, int lo, int hi) {
if (hi <= lo) return;
int mid = lo + (hi - lo) / 2;
mergeSort(a, index, aux, lo, mid);
mergeSort(a, index, aux, mid + 1, hi);
merge(a, index, aux, lo, mid, hi);
}
// print array to standard output
private static void show(Comparable[] a) {
for (int i = 0; i < a.length; i++) {
StdOut.println(a[i]);
}
}
/**
* Reads in a sequence of strings from standard input; mergesorts them;
* and prints them to standard output in ascending order.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
String[] a = StdIn.readAllStrings();
Merge.mergeSort(a);
show(a);
}
}
/******************************************************************************
* Copyright 2002-2020, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/
《算法 4》 中还提供了自底向上的归并排序,和优化版的归并排序(当元素个数小于8个的时候,采用插入排序),代码贴在文章底部。
3.性能分析
1)算法稳定性:
归并排序稳不稳定关键要看merge()函数,也就是两个子数组合并成一个有序数组的那部分代码。在合并的过程中,如果 A[lo…mid] 和 A[mid+1…hi] 之间有值相同的元素,那我们就可以像伪代码中那样,先把 A[lo…mid] 中的元素放入tmp数组,这样 就保证了值相同的元素,在合并前后的先后顺序不变。所以,归并排序是一种稳定排序算法。
2)时间复杂度:分析归并排序的时间复杂度就是分析递归代码的时间复杂度
如何分析递归代码的时间复杂度?
递归的适用场景是一个问题a可以分解为多个子问题b、c,那求解问题a就可以分解为求解问题b、c。问题b、c解决之后,我们再把b、c的结果合并成a的结果。若定义求解问题a的时间是T(a),则求解问题b、c的时间分别是T(b)和T(c),那就可以得到这样的递推公式:T(a) = T(b) + T(c) + K,其中K等于将两个子问题b、c的结果合并成问题a的结果所消耗的时间。这里有一个重要的结论:不仅递归求解的问题可以写成递推公式,递归代码的时间复杂度也可以写成递推公式。套用这个公式,那么归并排序的时间复杂度就可以表示为:
T(1) = C; n=1 时,只需要常量级的执行时间,所以表示为 C。
T(n) = 2T(n/2) + n; n>1,其中n就是merge()函数合并两个子数组的的时间复杂度O(n)。
T(n) = 2T(n/2) + n
= 2(2T(n/4) + n/2) + n = 4T(n/4) + 2n
= 4(2T(n/8) + n/4) + 2n = 8T(n/8) + 3n
= 8(2T(n/16) + n/8) + 3n = 16T(n/16) + 4n
......
= 2^k * T(n/2^k) + k * n
......
当T(n/2^k)=T(1) 时,也就是 n/2^k=1,我们得到k=log2n。将k带入上面的公式就得到T(n)=Cn+nlog2n。如用大O表示法,T(n)就等于O(nlogn)。所以,归并排序的是复杂度时间复杂度就是O(nlogn)。
3)空间复杂度:归并排序算法不是原地排序算法,空间复杂度是O(n)
为什么?因为归并排序的合并函数,在合并两个数组为一个有序数组时,需要借助额外的存储空间,比如上方的aux[]。为什么空间复杂度是O(n)而不是O(nlogn)呢?如果我们按照分析递归的时间复杂度的方法,通过递推公式来求解,那整个归并过程需要的空间复杂度就是O(nlogn),但这种分析思路是有问题的!
因为,在实际上,递归代码的空间复杂度并不是像时间复杂度那样累加,而是这样的过程,即在每次合并过程中都需要申请额外的内存空间,但是合并完成后,临时开辟的内存空间就被释放掉了,在任意时刻,CPU只会有一个函数在执行,也就只会有一个临时的内存空间在使用。临时空间再大也不会超过n个数据的大小,所以空间复杂度是O(n)。
三、快速排序
1.算法原理
快排的思想是这样的:如果要排序数组中下标从p到r之间的一组数据,我们选择p到r之间的任意一个数据作为pivot(分区点)。然后遍历p到r之间的数据,将小于pivot的放到左边,将大于pivot的放到右边,将povit放到中间。经过这一步之后,数组p到r之间的数据就分成了3部分,前面p到q-1之间都是小于povit的,中间是povit,后面的q+1到r之间是大于povit的。根据分治、递归的处理思想,我们可以用递归排序下标从p到q-1之间的数据和下标从q+1到r之间的数据,直到区间缩小为1,就说明所有的数据都有序了。
递推公式:quick_sort(p…r) = quick_sort(p…q-1) + quick_sort(q+1, r)
终止条件:p >= r
2.代码实现
来自《算法 4 》
/******************************************************************************
* Compilation: javac Quick.java
* Execution: java Quick < input.txt
* Dependencies: StdOut.java StdIn.java
* Data files: https://algs4.cs.princeton.edu/23quicksort/tiny.txt
* https://algs4.cs.princeton.edu/23quicksort/words3.txt
*
* Sorts a sequence of strings from standard input using quicksort.
*
* % more tiny.txt
* S O R T E X A M P L E
*
* % java Quick < tiny.txt
* A E E L M O P R S T X [ one string per line ]
*
* % more words3.txt
* bed bug dad yes zoo ... all bad yet
*
* % java Quick < words3.txt
* all bad bed bug dad ... yes yet zoo [ one string per line ]
*
*
* Remark: For a type-safe version that uses static generics, see
*
* https://algs4.cs.princeton.edu/23quicksort/QuickPedantic.java
*
******************************************************************************/
package edu.princeton.cs.algs4;
/**
* The {@code Quick} class provides static methods for sorting an
* array and selecting the ith smallest element in an array using quicksort.
* <p>
* For additional documentation,
* see <a href="https://algs4.cs.princeton.edu/23quick">Section 2.3</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class Quick {
// This class should not be instantiated.
private Quick() { }
/**
* Rearranges the array in ascending order, using the natural order.
* @param a the array to be sorted
*/
public static void sort(Comparable[] a) {
StdRandom.shuffle(a);
sort(a, 0, a.length - 1);
assert isSorted(a);
}
// quicksort the subarray from a[lo] to a[hi]
private static void sort(Comparable[] a, int lo, int hi) {
if (hi <= lo) return;
int j = partition(a, lo, hi);
sort(a, lo, j-1);
sort(a, j+1, hi);
assert isSorted(a, lo, hi);
}
// partition the subarray a[lo..hi] so that a[lo..j-1] <= a[j] <= a[j+1..hi]
// and return the index j.
private static int partition(Comparable[] a, int lo, int hi) {
int i = lo;
int j = hi + 1;
Comparable v = a[lo];
while (true) {
// find item on lo to swap
while (less(a[++i], v)) {
if (i == hi) break;
}
// find item on hi to swap
while (less(v, a[--j])) {
if (j == lo) break; // redundant since a[lo] acts as sentinel
}
// check if pointers cross
if (i >= j) break;
exch(a, i, j);
}
// put partitioning item v at a[j]
exch(a, lo, j);
// now, a[lo .. j-1] <= a[j] <= a[j+1 .. hi]
return j;
}
/**
* Rearranges the array so that {@code a[k]} contains the kth smallest key;
* {@code a[0]} through {@code a[k-1]} are less than (or equal to) {@code a[k]}; and
* {@code a[k+1]} through {@code a[n-1]} are greater than (or equal to) {@code a[k]}.
*
* @param a the array
* @param k the rank of the key
* @return the key of rank {@code k}
* @throws IllegalArgumentException unless {@code 0 <= k < a.length}
*/
public static Comparable select(Comparable[] a, int k) {
if (k < 0 || k >= a.length) {
throw new IllegalArgumentException("index is not between 0 and " + a.length + ": " + k);
}
StdRandom.shuffle(a);
int lo = 0, hi = a.length - 1;
while (hi > lo) {
int i = partition(a, lo, hi);
if (i > k) hi = i - 1;
else if (i < k) lo = i + 1;
else return a[i];
}
return a[lo];
}
/***************************************************************************
* Helper sorting functions.
***************************************************************************/
// is v < w ?
private static boolean less(Comparable v, Comparable w) {
if (v == w) return false; // optimization when reference equals
return v.compareTo(w) < 0;
}
// exchange a[i] and a[j]
private static void exch(Object[] a, int i, int j) {
Object swap = a[i];
a[i] = a[j];
a[j] = swap;
}
/***************************************************************************
* Check if array is sorted - useful for debugging.
***************************************************************************/
private static boolean isSorted(Comparable[] a) {
return isSorted(a, 0, a.length - 1);
}
private static boolean isSorted(Comparable[] a, int lo, int hi) {
for (int i = lo + 1; i <= hi; i++)
if (less(a[i], a[i-1])) return false;
return true;
}
// print array to standard output
private static void show(Comparable[] a) {
for (int i = 0; i < a.length; i++) {
StdOut.println(a[i]);
}
}
/**
* Reads in a sequence of strings from standard input; quicksorts them;
* and prints them to standard output in ascending order.
* Shuffles the array and then prints the strings again to
* standard output, but this time, using the select method.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
String[] a = StdIn.readAllStrings();
Quick.sort(a);
show(a);
assert isSorted(a);
// shuffle
StdRandom.shuffle(a);
// display results again using select
StdOut.println();
for (int i = 0; i < a.length; i++) {
String ith = (String) Quick.select(a, i);
StdOut.println(ith);
}
}
}
/******************************************************************************
* Copyright 2002-2020, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/
3.性能分析
1)算法稳定性:
因为分区过程中涉及交换操作,如果数组中有两个8,其中一个是pivot,经过分区处理后,后面的8就有可能放到了另一个8的前面,先后顺序就颠倒了,所以快速排序是不稳定的排序算法。比如数组[1,2,3,9,8,11,8],取后面的8作为pivot,那么分区后就会将后面的8与9进行交换。
2)时间复杂度:最好、最坏、平均情况
快排也是用递归实现的,所以时间复杂度也可以用递推公式表示。
如果每次分区操作都能正好把数组分成大小接近相等的两个小区间,那快排的时间复杂度递推求解公式跟归并的相同。
T(1) = C; n=1 时,只需要常量级的执行时间,所以表示为 C。
T(n) = 2*T(n/2) + n; n>1
所以,快排的时间复杂度也是O(nlogn)。
如果数组中的元素原来已经有序了,比如1,3,5,6,8,若每次选择最后一个元素作为pivot,那每次分区得到的两个区间都是不均等的,需要进行大约n次的分区,才能完成整个快排过程,而每次分区我们平均要扫描大约n/2个元素,这种情况下,快排的时间复杂度就是O(n^2)。
前面两种情况,一个是分区及其均衡,一个是分区极不均衡,它们分别对应了快排的最好情况时间复杂度和最坏情况时间复杂度。那快排的平均时间复杂度是多少呢?T(n)大部分情况下是O(nlogn),只有在极端情况下才是退化到O(n^2),而且我们也有很多方法将这个概率降低。
3)空间复杂度:快排是一种原地排序算法,空间复杂度是O(1)
四、归并排序与快速排序的区别
归并和快排用的都是分治思想,递推公式和递归代码也非常相似,那它们的区别在哪里呢?
1.归并排序,是先递归调用,再进行合并,合并的时候进行数据的交换。所以它是自下而上的排序方式。何为自下而上?就是先解决子问题,再解决父问题。
2.快速排序,是先分区,在递归调用,分区的时候进行数据的交换。所以它是自上而下的排序方式。何为自上而下?就是先解决父问题,再解决子问题。
五、思考
1.O(n)时间复杂度内求无序数组中第K大元素,比如4,2,5,12,3这样一组数据,第3大元素是4。
我们选择数组区间A[0...n-1]的最后一个元素作为pivot,对数组A[0...n-1]进行原地分区,这样数组就分成了3部分,A[0...p-1]、A[p]、A[p+1...n-1]。
如果如果p+1=K,那A[p]就是要求解的元素;如果K>p+1,说明第K大元素出现在A[p+1...n-1]区间,我们按照上面的思路递归地在A[p+1...n-1]这个区间查找。同理,如果K<p+1,那我们就在A[0...p-1]区间查找。
时间复杂度分析?
第一次分区查找,我们需要对大小为n的数组进行分区操作,需要遍历n个元素。第二次分区查找,我们需要对大小为n/2的数组执行分区操作,需要遍历n/2个元素。依次类推,分区遍历元素的个数分别为n、n/2、n/4、n/8、n/16......直到区间缩小为1。如果把每次分区遍历的元素个数累加起来,就是等比数列求和,结果为2n-1。所以,上述解决问题的思路为O(n)。
2.有10个访问日志文件,每个日志文件大小约为300MB,每个文件里的日志都是按照时间戳从小到大排序的。现在需要将这10个较小的日志文件合并为1个日志文件,合并之后的日志仍然按照时间戳从小到大排列。如果处理上述任务的机器内存只有1GB,你有什么好的解决思路能快速地将这10个日志文件合并?
其他代码
自底向上的归并排序
/******************************************************************************
* Compilation: javac MergeBU.java
* Execution: java MergeBU < input.txt
* Dependencies: StdOut.java StdIn.java
* Data files: https://algs4.cs.princeton.edu/22mergesort/tiny.txt
* https://algs4.cs.princeton.edu/22mergesort/words3.txt
*
* Sorts a sequence of strings from standard input using
* bottom-up mergesort.
*
* % more tiny.txt
* S O R T E X A M P L E
*
* % java MergeBU < tiny.txt
* A E E L M O P R S T X [ one string per line ]
*
* % more words3.txt
* bed bug dad yes zoo ... all bad yet
*
* % java MergeBU < words3.txt
* all bad bed bug dad ... yes yet zoo [ one string per line ]
*
******************************************************************************/
package edu.princeton.cs.algs4;
/**
* The {@code MergeBU} class provides static methods for sorting an
* array using <em>bottom-up mergesort</em>. It is non-recursive.
* <p>
* This implementation takes Θ(<em>n</em> log <em>n</em>) time
* to sort any array of length <em>n</em> (assuming comparisons
* take constant time). It makes between
* ~ ½ <em>n</em> log<sub>2</sub> <em>n</em> and
* ~ 1 <em>n</em> log<sub>2</sub> <em>n</em> compares.
* <p>
* This sorting algorithm is stable.
* It uses Θ(<em>n</em>) extra memory (not including the input array).
* <p>
* For additional documentation, see
* <a href="https://algs4.cs.princeton.edu/21elementary">Section 2.1</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class MergeBU {
// This class should not be instantiated.
private MergeBU() { }
// stably merge a[lo..mid] with a[mid+1..hi] using aux[lo..hi]
private static void merge(Comparable[] a, Comparable[] aux, int lo, int mid, int hi) {
// copy to aux[]
for (int k = lo; k <= hi; k++) {
aux[k] = a[k];
}
// merge back to a[]
int i = lo, j = mid+1;
for (int k = lo; k <= hi; k++) {
if (i > mid) a[k] = aux[j++]; // this copying is unneccessary
else if (j > hi) a[k] = aux[i++];
else if (less(aux[j], aux[i])) a[k] = aux[j++];
else a[k] = aux[i++];
}
}
/**
* Rearranges the array in ascending order, using the natural order.
* @param a the array to be sorted
*/
public static void sort(Comparable[] a) {
int n = a.length;
Comparable[] aux = new Comparable[n];
for (int len = 1; len < n; len *= 2) {
for (int lo = 0; lo < n-len; lo += len+len) {
int mid = lo+len-1;
int hi = Math.min(lo+len+len-1, n-1);
merge(a, aux, lo, mid, hi);
}
}
assert isSorted(a);
}
/***********************************************************************
* Helper sorting functions.
***************************************************************************/
// is v < w ?
private static boolean less(Comparable v, Comparable w) {
return v.compareTo(w) < 0;
}
/***************************************************************************
* Check if array is sorted - useful for debugging.
***************************************************************************/
private static boolean isSorted(Comparable[] a) {
for (int i = 1; i < a.length; i++)
if (less(a[i], a[i-1])) return false;
return true;
}
// print array to standard output
private static void show(Comparable[] a) {
for (int i = 0; i < a.length; i++) {
StdOut.println(a[i]);
}
}
/**
* Reads in a sequence of strings from standard input; bottom-up
* mergesorts them; and prints them to standard output in ascending order.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
String[] a = StdIn.readAllStrings();
MergeBU.sort(a);
show(a);
}
}
/******************************************************************************
* Copyright 2002-2020, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/
优化版的归并排序(当元素个数小于8个的时候,采用插入排序)
/******************************************************************************
* Compilation: javac MergeX.java
* Execution: java MergeX < input.txt
* Dependencies: StdOut.java StdIn.java
* Data files: https://algs4.cs.princeton.edu/22mergesort/tiny.txt
* https://algs4.cs.princeton.edu/22mergesort/words3.txt
*
* Sorts a sequence of strings from standard input using an
* optimized version of mergesort.
*
* % more tiny.txt
* S O R T E X A M P L E
*
* % java MergeX < tiny.txt
* A E E L M O P R S T X [ one string per line ]
*
* % more words3.txt
* bed bug dad yes zoo ... all bad yet
*
* % java MergeX < words3.txt
* all bad bed bug dad ... yes yet zoo [ one string per line ]
*
******************************************************************************/
package edu.princeton.cs.algs4;
import java.util.Comparator;
/**
* The {@code MergeX} class provides static methods for sorting an
* array using an optimized version of mergesort.
* <p>
* In the worst case, this implementation takes
* Θ(<em>n</em> log <em>n</em>) time to sort an array of
* length <em>n</em> (assuming comparisons take constant time).
* <p>
* This sorting algorithm is stable.
* It uses Θ(<em>n</em>) extra memory (not including the input array).
* <p>
* For additional documentation, see
* <a href="https://algs4.cs.princeton.edu/22mergesort">Section 2.2</a> of
* <i>Algorithms, 4th Edition</i> by Robert Sedgewick and Kevin Wayne.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class MergeX {
private static final int CUTOFF = 7; // cutoff to insertion sort
// This class should not be instantiated.
private MergeX() { }
private static void merge(Comparable[] src, Comparable[] dst, int lo, int mid, int hi) {
// precondition: src[lo .. mid] and src[mid+1 .. hi] are sorted subarrays
assert isSorted(src, lo, mid);
assert isSorted(src, mid+1, hi);
int i = lo, j = mid+1;
for (int k = lo; k <= hi; k++) {
if (i > mid) dst[k] = src[j++];
else if (j > hi) dst[k] = src[i++];
else if (less(src[j], src[i])) dst[k] = src[j++]; // to ensure stability
else dst[k] = src[i++];
}
// postcondition: dst[lo .. hi] is sorted subarray
assert isSorted(dst, lo, hi);
}
private static void sort(Comparable[] src, Comparable[] dst, int lo, int hi) {
// if (hi <= lo) return;
if (hi <= lo + CUTOFF) {
insertionSort(dst, lo, hi);
return;
}
int mid = lo + (hi - lo) / 2;
sort(dst, src, lo, mid);
sort(dst, src, mid+1, hi);
// if (!less(src[mid+1], src[mid])) {
// for (int i = lo; i <= hi; i++) dst[i] = src[i];
// return;
// }
// using System.arraycopy() is a bit faster than the above loop
if (!less(src[mid+1], src[mid])) {
System.arraycopy(src, lo, dst, lo, hi - lo + 1);
return;
}
merge(src, dst, lo, mid, hi);
}
/**
* Rearranges the array in ascending order, using the natural order.
* @param a the array to be sorted
*/
public static void sort(Comparable[] a) {
Comparable[] aux = a.clone();
sort(aux, a, 0, a.length-1);
assert isSorted(a);
}
// sort from a[lo] to a[hi] using insertion sort
private static void insertionSort(Comparable[] a, int lo, int hi) {
for (int i = lo; i <= hi; i++)
for (int j = i; j > lo && less(a[j], a[j-1]); j--)
exch(a, j, j-1);
}
/*******************************************************************
* Utility methods.
*******************************************************************/
// exchange a[i] and a[j]
private static void exch(Object[] a, int i, int j) {
Object swap = a[i];
a[i] = a[j];
a[j] = swap;
}
// is a[i] < a[j]?
private static boolean less(Comparable a, Comparable b) {
return a.compareTo(b) < 0;
}
// is a[i] < a[j]?
private static boolean less(Object a, Object b, Comparator comparator) {
return comparator.compare(a, b) < 0;
}
/*******************************************************************
* Version that takes Comparator as argument.
*******************************************************************/
/**
* Rearranges the array in ascending order, using the provided order.
*
* @param a the array to be sorted
* @param comparator the comparator that defines the total order
*/
public static void sort(Object[] a, Comparator comparator) {
Object[] aux = a.clone();
sort(aux, a, 0, a.length-1, comparator);
assert isSorted(a, comparator);
}
private static void merge(Object[] src, Object[] dst, int lo, int mid, int hi, Comparator comparator) {
// precondition: src[lo .. mid] and src[mid+1 .. hi] are sorted subarrays
assert isSorted(src, lo, mid, comparator);
assert isSorted(src, mid+1, hi, comparator);
int i = lo, j = mid+1;
for (int k = lo; k <= hi; k++) {
if (i > mid) dst[k] = src[j++];
else if (j > hi) dst[k] = src[i++];
else if (less(src[j], src[i], comparator)) dst[k] = src[j++];
else dst[k] = src[i++];
}
// postcondition: dst[lo .. hi] is sorted subarray
assert isSorted(dst, lo, hi, comparator);
}
private static void sort(Object[] src, Object[] dst, int lo, int hi, Comparator comparator) {
// if (hi <= lo) return;
if (hi <= lo + CUTOFF) {
insertionSort(dst, lo, hi, comparator);
return;
}
int mid = lo + (hi - lo) / 2;
sort(dst, src, lo, mid, comparator);
sort(dst, src, mid+1, hi, comparator);
// using System.arraycopy() is a bit faster than the above loop
if (!less(src[mid+1], src[mid], comparator)) {
System.arraycopy(src, lo, dst, lo, hi - lo + 1);
return;
}
merge(src, dst, lo, mid, hi, comparator);
}
// sort from a[lo] to a[hi] using insertion sort
private static void insertionSort(Object[] a, int lo, int hi, Comparator comparator) {
for (int i = lo; i <= hi; i++)
for (int j = i; j > lo && less(a[j], a[j-1], comparator); j--)
exch(a, j, j-1);
}
/***************************************************************************
* Check if array is sorted - useful for debugging.
***************************************************************************/
private static boolean isSorted(Comparable[] a) {
return isSorted(a, 0, a.length - 1);
}
private static boolean isSorted(Comparable[] a, int lo, int hi) {
for (int i = lo + 1; i <= hi; i++)
if (less(a[i], a[i-1])) return false;
return true;
}
private static boolean isSorted(Object[] a, Comparator comparator) {
return isSorted(a, 0, a.length - 1, comparator);
}
private static boolean isSorted(Object[] a, int lo, int hi, Comparator comparator) {
for (int i = lo + 1; i <= hi; i++)
if (less(a[i], a[i-1], comparator)) return false;
return true;
}
// print array to standard output
private static void show(Object[] a) {
for (int i = 0; i < a.length; i++) {
StdOut.println(a[i]);
}
}
/**
* Reads in a sequence of strings from standard input; mergesorts them
* (using an optimized version of mergesort);
* and prints them to standard output in ascending order.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
String[] a = StdIn.readAllStrings();
MergeX.sort(a);
show(a);
}
}
/******************************************************************************
* Copyright 2002-2020, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with algs4.jar. If not, see http://www.gnu.org/licenses.
******************************************************************************/