F#中的并行Quicksort

问题描述

使用基于任务的并行性在f#中使用快速排序并行化。

我无法使并行代码更快地按顺序运行。 “ quicksortParallel”函数的深度参数采用一个深度参数,该参数决定在该“深度/级别”处的递归调用是顺序运行还是并行运行。通过传递负数深度,可以按顺序方式运行代码。顺序运行大约需要9秒才能对200万个数字进行排序。现在,如果我传递非负( 4),运行时间又开始增加,这是因为并行化的成本大于收益可以使代码并行化。

我不明白的是为什么我看不到深度参数值0到4的性能提升?我在16逻辑核心Intel i9 cpu上运行它。我如何并行化它?

open System
open System.Threading.Tasks
module myMod =
    let genRandomNums count =
        let rnd = System.Random()
        List.init count (fun _ -> rnd.Next())

    let rec quicksortParallel depth aList =
        match aList with
        | [] -> []
        | firstElement :: restOfList ->
            let smaller,larger =
                List.partition (fun number -> number < firstElement) restOfList
            if depth < 0 then
                let left  = quicksortParallel depth smaller
                let right = quicksortParallel depth larger
                left @ (firstElement :: right)
            else
                let left  = Task.Run(fun () -> quicksortParallel (depth-1) smaller)
                let right = Task.Run(fun () -> quicksortParallel (depth-1) larger)
                Task.WaitAll(left,right)
                left.Result @ (firstElement :: right.Result)
    
    let sampleNumbers = genRandomNums 2000000
    
    let stopWatch = System.Diagnostics.Stopwatch.StartNew()
    //let sortedSnums = quicksortParallel -1 sampleNumbers //this runs the quicksort sequentially
    let sortedSnums = quicksortParallel 4 sampleNumbers
    stopWatch.Stop()

    printfn "time taken %A millseconds\n" stopWatch.Elapsed.TotalMilliseconds
    printfn "time taken %A seconds\n" stopWatch.Elapsed.TotalSeconds
    printfn "time taken %A minutes\n" stopWatch.Elapsed.TotalMinutes
    printfn "time taken %A hours\n" stopWatch.Elapsed.TotalHours

并行化时,c#中的等效代码(没有就地分区)运行速度更快:

class Program
    {
        static List<int> genRandomNums(int count)
        {
            var rnd = new System.Random();
            IEnumerable<int> enumerable = Enumerable.Range(0,count)
                .Select(i => new Tuple<int,int>(rnd.Next(int.MaxValue),i))
                                     //.OrderBy(i => i.Item1)
                                     .Select(i => i.Item1);
            return enumerable.ToList();
        }

        static List<T> QuickSort<T>(List<T> values,int depth)
           where T : IComparable
        {
            if (values.Count == 0)
            {
                return new List<T>();
            }

            //get the first element       
            T firstElement = values[0];

            //get the smaller and larger elements       
            var smallerElements = new List<T>();
            var largerElements = new List<T>();
            for (int i = 1; i < values.Count; i++)  // i starts at 1       
            {                                       // not 0!          
                var elem = values[i];
                if (elem.Compareto(firstElement) < 0)
                {
                    smallerElements.Add(elem);
                }
                else
                {
                    largerElements.Add(elem);
                }
            }

            //return the result       
            var result = new List<T>();
            if (depth < 0)
            {
                List<T> smallList = QuickSort(smallerElements.ToList(),depth);
                result.AddRange(smallList);
                result.Add(firstElement);
                List<T> bigList = QuickSort(largerElements.ToList(),depth);
                result.AddRange(bigList);
                return result;
            }
            else
            {
                Task<List<T>> smallTask = Task.Run(() => { return QuickSort(smallerElements.ToList(),depth - 1); });
                Task<List<T>> bigTask = Task.Run(() => { return QuickSort(largerElements.ToList(),depth - 1); });


                List<Task<List<T>>> tasks = new List<Task<List<T>>>();
                tasks.Add(smallTask);
                tasks.Add(bigTask);
                Task.WaitAll(tasks.ToArray());

                List<T> smallList = smallTask.Result;
                result.AddRange(smallList);

                result.Add(firstElement);

                List<T> bigList = bigTask.Result;
                result.AddRange(bigList);
                return result;
            }
        }

        static void Main(string[] args)
        {
            var sampleNumbers = genRandomNums(50000000);

            int depth = 4;//set it to a negative value to run serially
            var stopWatch = System.Diagnostics.Stopwatch.StartNew();
            List<int> sortedList = QuickSort<int>(sampleNumbers,depth);
            stopWatch.Stop();

            Console.WriteLine("time taken {0} seconds\n",stopWatch.Elapsed.TotalSeconds);
            Console.WriteLine("time taken {0} minutes\n",stopWatch.Elapsed.TotalMinutes);
        }
    }

在并行执行任务时,使用就地排序/分区的F#中快速排序的正确实现确实运行得更快。

module myMod =
    
    let genRandomNums_arr count =
        let rnd = System.Random()
        Array.init count (fun _ -> rnd.Next(system.int32.MaxValue))
    
    let swap (aArray: int array) indexA indexB = 
        let temp = aArray.[indexA]
        Array.set aArray indexA (aArray.[indexB])
        Array.set aArray indexB (temp)

    let partition (aArray: int array) first last =
        let pivot = aArray.[last]
        let mutable wallindex = first;
        let mutable currentindex = first
        while currentindex < last do  
            if aArray.[currentindex] < pivot then
                swap aArray wallindex currentindex
                wallindex <- wallindex + 1

            currentindex <- currentindex + 1    

        swap aArray wallindex last
        wallindex

    let rec quicksortParallelInPlace (aArray: int array) first last depth =
        if ((last - first) >= 1) then
            let pivotposition = partition aArray first last
            if depth < 0 then
                quicksortParallelInPlace aArray first (pivotposition - 1) depth
                quicksortParallelInPlace aArray (pivotposition + 1) last depth
            else
                let left  = Task.Run(fun () -> quicksortParallelInPlace aArray first (pivotposition - 1) (depth-1))
                let right = Task.Run(fun () -> quicksortParallelInPlace aArray (pivotposition + 1) last (depth-1))
                Task.WaitAll(left,right)
                        

    let quickSortInPlace (aArray: int array) depth =
        quicksortParallelInPlace aArray 0 (aArray.Length - 1) depth

    let sampleNumbers_arr = genRandomNums_arr 50000000    
    //printfn "un-sorted list %A" sampleNumbers_arr 

    let stopWatch1 = System.Diagnostics.Stopwatch.StartNew()
    //let sortedSnums = quicksortParallel -1 sampleNumbers //this runs the quicksort sequentially
    quickSortInPlace sampleNumbers_arr 4 //run serially using a negative number
    stopWatch1.Stop()

    //printfn "un-sorted list %A" sampleNumbers_arr

    printfn "time taken %A millseconds\n" stopWatch1.Elapsed.TotalMilliseconds
    printfn "time taken %A seconds\n" stopWatch1.Elapsed.TotalSeconds
    printfn "time taken %A minutes\n" stopWatch1.Elapsed.TotalMinutes
    printfn "time taken %A hours\n" stopWatch1.Elapsed.TotalHours        

解决方法

我怀疑性能低下的罪魁祸首实际上是List.partition。参见this。通过计算分区索引并与之配合使用,可能比在分区周围复制更好。