@@ -61,7 +61,7 @@ Indicates that a sorting function should use the paged merge sort
61
61
algorithm. Paged merge sort uses is a merge sort, that uses different
62
62
merge routines to achieve stable sorting with a scratch space of size O(√n).
63
63
The merge routine for merging large subarrays merges
64
- blocks/ pages of size O(√n) almost in place, before reordering them using a page table.
64
+ pages of size O(√n) almost in place, before reordering them using a page table.
65
65
At deeper recursion levels, where the scratch space is big enough,
66
66
normal merging is used, where one input is copied into the scratch space.
67
67
When the scratch space is large enough to hold the complete subarray,
@@ -776,25 +776,28 @@ function merge!(v::AbstractVector{T}, lo::Integer, m::Integer, hi::Integer, o::O
776
776
end
777
777
end
778
778
779
- # macro used for block management in pagedMerge!
780
- # use next block in A (left subarray) if it is free,
781
- # otherwise use next block in B
782
- macro getNextBlock! ()
783
- quote
784
- if a > nextBlockA * blocksize + lo
785
- currentBlock = nextBlockA
786
- nextBlockA += 1
787
- else
788
- currentBlock = nextBlockB
789
- nextBlockB += 1
790
- end
791
- blockLocation[currentBlockIdx] = currentBlock
792
- currentBlockIdx += 1
793
- end |> esc
779
+ struct Pages
780
+ current:: Int # current page being merged into
781
+ nextA:: Int # next possible page in A
782
+ nextB:: Int # next possible page in B
794
783
end
795
784
796
- # merge v[lo:m] and v[m+1:hi] using buffer buf in O(sqrt(n)) space
797
- function pagedMerge! (v:: AbstractVector{T} , lo:: Integer , m:: Integer , hi:: Integer , o:: Ordering , buf:: AbstractVector{T} , blockLocation:: AbstractVector{<:Integer} ) where T
785
+ next_page_A (pages:: Pages ) = Pages (pages. nextA, pages. nextA + 1 , pages. nextB)
786
+ next_page_B (pages:: Pages ) = Pages (pages. nextB, pages. nextA, pages. nextB + 1 )
787
+
788
+ function next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
789
+ if a > pages. nextA * pagesize + lo
790
+ pages = next_page_A (pages)
791
+ else
792
+ pages = next_page_B (pages)
793
+ end
794
+ pageLocations[currentPageIndex] = pages. current
795
+ currentPageIndex += 1
796
+ pages, currentPageIndex
797
+ end
798
+
799
+ # merge v[lo:m] (A) and v[m+1:hi] (B) using buffer buf in O(sqrt(n)) space
800
+ function paged_merge! (v:: AbstractVector{T} , lo:: Integer , m:: Integer , hi:: Integer , o:: Ordering , buf:: AbstractVector{T} , pageLocations:: AbstractVector{<:Integer} ) where T
798
801
@assert lo < m < hi
799
802
a = lo
800
803
b = m + 1
@@ -813,142 +816,144 @@ function pagedMerge!(v::AbstractVector{T}, lo::Integer, m::Integer, hi::Integer,
813
816
end
814
817
815
818
len = lenA + lenB
816
- blocksize = isqrt (len)
817
- nBlocks = len ÷ blocksize
818
- @assert length (buf) >= 3 blocksize
819
- @assert length (blockLocation ) >= nBlocks + 1
819
+ pagesize = isqrt (len)
820
+ nPages = len ÷ pagesize
821
+ @assert length (buf) >= 3 pagesize
822
+ @assert length (pageLocations ) >= nPages + 1
820
823
821
- @inline getBlockOffset (block ) = (block - 1 )* blocksize + lo - 1
824
+ @inline page_offset (page ) = (page - 1 )* pagesize + lo - 1
822
825
823
826
@inbounds begin
824
827
# #################
825
828
# merge
826
829
# #################
827
- # merge into buf until full
828
- a,b,k = merge! ((_,_,k) -> k<= 3 blocksize,buf,v,v,o,a,b,1 )
829
-
830
- nextBlockA = 1
831
- nextBlockB = (m + blocksize- lo) ÷ blocksize + 1
832
- blockLocation .= 0
833
- blockLocation[1 : 3 ] = - 1 : - 1 : - 3
834
-
830
+ # merge the first 3 pages into buf
831
+ a,b,k = merge! ((_,_,k) -> k<= 3 pagesize,buf,v,v,o,a,b,1 )
832
+ # initialize variable for merging into pages
833
+ pageLocations .= 0
834
+ pageLocations[1 : 3 ] = - 1 : - 1 : - 3
835
+ currentPageIndex = 4
836
+ currentPage = 0
837
+ nextPageA = 1
838
+ nextPageB = (m + pagesize- lo) ÷ pagesize + 1
839
+ pages = Pages (currentPage, nextPageA, nextPageB)
835
840
k = 1
836
- currentBlock = 0
837
- currentBlockIdx = 4
838
- # more efficient loop while more than blocksize elements of A and B are remaining
839
- while_condition1 (offset) = (_,_,k) -> k <= offset + blocksize
840
- while a < m- blocksize && b < hi- blocksize
841
- @getNextBlock!
842
- offset = getBlockOffset (currentBlock)
841
+ # more efficient loop while more than pagesize elements of A and B are remaining
842
+ while_condition1 (offset) = (_,_,k) -> k <= offset + pagesize
843
+ while a < m- pagesize && b < hi- pagesize
844
+ pages, currentPageIndex = next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
845
+ offset = page_offset (pages. current)
843
846
a,b,k = merge! (while_condition1 (offset),v,v,v,o,a,b,offset+ 1 )
844
847
end
845
848
# merge until either A or B is empty
846
- while_condition2 (offset) = (a,b,k) -> k <= offset + blocksize && a <= m && b <= hi
849
+ while_condition2 (offset) = (a,b,k) -> k <= offset + pagesize && a <= m && b <= hi
847
850
while a <= m && b <= hi
848
- @getNextBlock!
849
- offset = getBlockOffset (currentBlock )
851
+ pages, currentPageIndex = next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
852
+ offset = page_offset (pages . current )
850
853
a,b,k = merge! (while_condition2 (offset),v,v,v,o,a,b,offset+ 1 )
851
854
end
852
- k_block = k - getBlockOffset (currentBlock )
855
+ k_page = k - page_offset (pages . current )
853
856
# copy remaining elements
854
857
# either A or B is empty
855
858
# copy rest of A
856
859
while a <= m
857
- if k_block > blocksize
858
- @getNextBlock!
859
- k_block = 1
860
+ if k_page > pagesize
861
+ pages, currentPageIndex = next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
862
+ k_page = 1
860
863
end
861
- offset = getBlockOffset (currentBlock )
862
- while k_block <= blocksize && a <= m
863
- v[offset + k_block ] = v[a]
864
+ offset = page_offset (pages . current )
865
+ while k_page <= pagesize && a <= m
866
+ v[offset + k_page ] = v[a]
864
867
a += 1
865
- k_block += 1
868
+ k_page += 1
866
869
end
867
870
end
868
871
# copy rest of B
869
872
while b <= hi
870
- if k_block > blocksize
871
- @getNextBlock!
872
- k_block = 1
873
+ if k_page > pagesize
874
+ pages, currentPageIndex = next_page! (pageLocations, pages, currentPageIndex, pagesize, lo, a)
875
+ k_page = 1
873
876
end
874
- offset = getBlockOffset (currentBlock )
875
- while k_block <= blocksize && b <= hi
876
- v[offset + k_block ] = v[b]
877
+ offset = page_offset (pages . current )
878
+ while k_page <= pagesize && b <= hi
879
+ v[offset + k_page ] = v[b]
877
880
b += 1
878
- k_block += 1
881
+ k_page += 1
879
882
end
880
883
end
881
- # copy last partial block to end
882
- partialBlockPresent = k_block <= blocksize
883
- if partialBlockPresent
884
- offset = getBlockOffset (currentBlock )
885
- offset2 = nBlocks * blocksize + lo - 1
886
- for j = 1 : k_block - 1
884
+ # copy last partial page to end
885
+ partialPagePresent = k_page <= pagesize
886
+ if partialPagePresent
887
+ offset = page_offset (pages . current )
888
+ offset2 = nPages * pagesize + lo - 1
889
+ for j = 1 : k_page - 1
887
890
v[offset2 + j] = v[offset + j]
888
891
end
889
- blockLocation[currentBlockIdx - 1 ] = 0
892
+ pageLocations[currentPageIndex - 1 ] = 0
890
893
end
891
894
# ########################################
892
- # calculate location of the 3 free blocks
895
+ # calculate location of the 3 free pages
893
896
# ########################################
894
- nFreeBlocksB = nBlocks + 1 - nextBlockB
895
- nFreeBlocksA = 3 - nFreeBlocksB - Int (partialBlockPresent )
896
- freeBlocks = MVector {3,Int} (undef)
897
+ nFreePagesB = nPages + 1 - pages . nextB
898
+ nFreePagesA = 3 - nFreePagesB - Int (partialPagePresent )
899
+ freePages = MVector {3,Int} (undef)
897
900
i = 1
898
- for j = 0 : nFreeBlocksA - 1
899
- freeBlocks [i] = nextBlockA + j
901
+ for j = 0 : nFreePagesA - 1
902
+ freePages [i] = pages . nextA + j
900
903
i += 1
901
904
end
902
- for j = 0 : nFreeBlocksB - 1
903
- freeBlocks [i] = nextBlockB + j
905
+ for j = 0 : nFreePagesB - 1
906
+ freePages [i] = pages . nextB + j
904
907
i += 1
905
908
end
906
- if partialBlockPresent
907
- freeBlocks [i] = currentBlock
909
+ if partialPagePresent
910
+ freePages [i] = pages . current
908
911
end
909
- freeBlocksIdx = 3
910
- doneBlockIdx = 1
911
- currentBlock = freeBlocks[end ]
912
+ freePagesIndex = 3
913
+ donePageIndex = 1
914
+ # use currentPage instead of pages.current because
915
+ # pages.nextA and pages.nextB are no longer needed
916
+ currentPage = freePages[end ]
912
917
# #################
913
- # rearrange blocks
918
+ # rearrange pages
914
919
# #################
915
920
while true
916
- blc = blockLocation[currentBlock ] # index of block with data belonging to currentBlock
917
- if blc > 0
918
- # data for currentBlock is in v
919
- offset = getBlockOffset (currentBlock )
920
- offset2 = getBlockOffset (blc )
921
- for j = 1 : blocksize
921
+ plc = pageLocations[currentPage ] # page with data belonging to currentPage
922
+ if plc > 0
923
+ # data for currentPage is in v
924
+ offset = page_offset (currentPage )
925
+ offset2 = page_offset (plc )
926
+ for j = 1 : pagesize
922
927
v[offset + j] = v[offset2 + j]
923
928
end
924
- blockLocation[currentBlock ] = 0
925
- currentBlock = blc
929
+ pageLocations[currentPage ] = 0
930
+ currentPage = plc
926
931
else
927
- # data for currentBlock is in buf
928
- offset = getBlockOffset (currentBlock )
929
- offset2 = (- blc - 1 )* blocksize
930
- for j = 1 : blocksize
932
+ # data for currentPage is in buf
933
+ offset = page_offset (currentPage )
934
+ offset2 = (- plc - 1 )* pagesize
935
+ for j = 1 : pagesize
931
936
v[offset + j] = buf[offset2 + j]
932
937
end
933
- blockLocation[currentBlock ] = 0
934
- if freeBlocksIdx > 1
935
- # get next free block
936
- freeBlocksIdx -= 1
937
- currentBlock = freeBlocks[freeBlocksIdx ]
938
+ pageLocations[currentPage ] = 0
939
+ if freePagesIndex > 1
940
+ # get next free page
941
+ freePagesIndex -= 1
942
+ currentPage = freePages[freePagesIndex ]
938
943
else
939
- # no free block remains
940
- # make sure that all blocks are done
941
- while blockLocation[doneBlockIdx ] == 0 || blockLocation[doneBlockIdx ] == doneBlockIdx
942
- doneBlockIdx += 1
943
- doneBlockIdx == nBlocks && return
944
+ # no free page remains
945
+ # make sure that all pages are done
946
+ while pageLocations[donePageIndex ] == 0 || pageLocations[donePageIndex ] == donePageIndex
947
+ donePageIndex += 1
948
+ donePageIndex == nPages && return
944
949
end
945
- # copy misplaced block into buf and continue
946
- currentBlock = blockLocation[doneBlockIdx ]
947
- offset = getBlockOffset (currentBlock )
948
- for j = 1 : blocksize
950
+ # copy misplaced page into buf and continue
951
+ currentPage = pageLocations[donePageIndex ]
952
+ offset = page_offset (currentPage )
953
+ for j = 1 : pagesize
949
954
buf[j] = v[offset + j]
950
955
end
951
- blockLocation[doneBlockIdx ] = - 1
956
+ pageLocations[donePageIndex ] = - 1
952
957
end
953
958
end
954
959
end
@@ -959,59 +964,59 @@ end
959
964
# -> redefine for compatibility with earlier versions
960
965
midpoint (lo:: Integer , hi:: Integer ) = lo + ((hi - lo) >>> 0x01 )
961
966
962
- function pagedmergesort! (v:: AbstractVector{T} , lo:: Integer , hi:: Integer , o:: Ordering , buf:: AbstractVector{T} , blockLocation ) where T
967
+ function pagedmergesort! (v:: AbstractVector{T} , lo:: Integer , hi:: Integer , o:: Ordering , buf:: AbstractVector{T} , pageLocations ) where T
963
968
len = hi + 1 - lo
964
969
if len <= Base. SMALL_THRESHOLD
965
970
return Base. Sort. sort! (v, lo, hi, Base. Sort. InsertionSortAlg (), o)
966
971
end
967
972
m = midpoint (lo, hi- 1 ) # hi-1: ensure midpoint is rounded down. OK, because lo < hi is satisfied here
968
- pagedmergesort! (v, lo, m, o, buf, blockLocation )
969
- pagedmergesort! (v, m+ 1 , hi, o, buf, blockLocation )
973
+ pagedmergesort! (v, lo, m, o, buf, pageLocations )
974
+ pagedmergesort! (v, m+ 1 , hi, o, buf, pageLocations )
970
975
if len <= length (buf)
971
976
twoended_merge! (v, lo, m, hi, o, buf)
972
977
else
973
- pagedMerge ! (v, lo, m, hi, o, buf, blockLocation )
978
+ paged_merge ! (v, lo, m, hi, o, buf, pageLocations )
974
979
end
975
980
return v
976
981
end
977
982
978
983
function sort! (v:: AbstractVector , lo:: Integer , hi:: Integer , :: PagedMergeSortAlg , o:: Ordering )
979
984
lo >= hi && return v
980
985
n = hi + 1 - lo
981
- blocksize = isqrt (n)
982
- buf = Vector {eltype(v)} (undef, 3 blocksize )
983
- nBlocks = n ÷ blocksize
984
- blockLocation = Vector {Int} (undef, nBlocks + 1 )
985
- pagedmergesort! (v, lo, hi, o, buf, blockLocation )
986
+ pagesize = isqrt (n)
987
+ buf = Vector {eltype(v)} (undef, 3 pagesize )
988
+ nPages = n ÷ pagesize
989
+ pageLocations = Vector {Int} (undef, nPages + 1 )
990
+ pagedmergesort! (v, lo, hi, o, buf, pageLocations )
986
991
return v
987
992
end
988
993
989
994
Base. @static if VERSION >= v " 1.3"
990
995
const PAGEDMERGESORT_THREADING_THRESHOLD = 2 ^ 13
991
- function threaded_pagedmergesort! (v:: AbstractVector , lo:: Integer , hi:: Integer , o:: Ordering , bufs, blockLocations , c:: Channel , threadingThreshold:: Integer )
996
+ function threaded_pagedmergesort! (v:: AbstractVector , lo:: Integer , hi:: Integer , o:: Ordering , bufs, pageLocations , c:: Channel , threadingThreshold:: Integer )
992
997
len = hi + 1 - lo
993
998
if len <= Base. SMALL_THRESHOLD
994
999
return Base. Sort. sort! (v, lo, hi, Base. Sort. InsertionSortAlg (), o)
995
1000
end
996
1001
m = midpoint (lo, hi- 1 ) # hi-1: ensure midpoint is rounded down. OK, because lo < hi is satisfied here
997
1002
if len > threadingThreshold
998
- thr = Threads. @spawn threaded_pagedmergesort! (v, lo, m, o, bufs, blockLocations , c, threadingThreshold)
999
- threaded_pagedmergesort! (v, m+ 1 , hi, o, bufs, blockLocations , c, threadingThreshold)
1003
+ thr = Threads. @spawn threaded_pagedmergesort! (v, lo, m, o, bufs, pageLocations , c, threadingThreshold)
1004
+ threaded_pagedmergesort! (v, m+ 1 , hi, o, bufs, pageLocations , c, threadingThreshold)
1000
1005
wait (thr)
1001
1006
id = take! (c)
1002
1007
buf = bufs[id]
1003
- blockLocation = blockLocations [id]
1008
+ pageLocations = pageLocations [id]
1004
1009
else
1005
1010
id = take! (c)
1006
1011
buf = bufs[id]
1007
- blockLocation = blockLocations [id]
1008
- pagedmergesort! (v, lo, m, o, buf, blockLocation )
1009
- pagedmergesort! (v, m+ 1 , hi, o, buf, blockLocation )
1012
+ pageLocations = pageLocations [id]
1013
+ pagedmergesort! (v, lo, m, o, buf, pageLocations )
1014
+ pagedmergesort! (v, m+ 1 , hi, o, buf, pageLocations )
1010
1015
end
1011
1016
if len <= length (buf)
1012
1017
twoended_merge! (v, lo, m, hi, o, buf)
1013
1018
else
1014
- pagedMerge ! (v, lo, m, hi, o, buf, blockLocation )
1019
+ paged_merge ! (v, lo, m, hi, o, buf, pageLocations )
1015
1020
end
1016
1021
put! (c, id)
1017
1022
return v
@@ -1022,15 +1027,15 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::ThreadedPagedMerge
1022
1027
nThreads= Threads. nthreads ()
1023
1028
(n < PAGEDMERGESORT_THREADING_THRESHOLD || nThreads < 2 ) && return sort! (v, lo, hi, PagedMergeSortAlg (), o)
1024
1029
threadingThreshold = max (n ÷ 4 nThreads, PAGEDMERGESORT_THREADING_THRESHOLD)
1025
- blocksize = isqrt (n)
1026
- nBlocks = n ÷ blocksize
1027
- bufs = [Vector {eltype(v)} (undef, 3 blocksize ) for _ in 1 : nThreads] # allocate buffer for each thread
1028
- blockLocation = [Vector {Int} (undef, nBlocks + 1 ) for _ in 1 : nThreads]
1030
+ pagesize = isqrt (n)
1031
+ nPages = n ÷ pagesize
1032
+ bufs = [Vector {eltype(v)} (undef, 3 pagesize ) for _ in 1 : nThreads] # allocate buffer for each thread
1033
+ pageLocations = [Vector {Int} (undef, nPages + 1 ) for _ in 1 : nThreads]
1029
1034
c = Channel {Int} (nThreads) # channel holds indices of available buffers
1030
1035
for i= 1 : nThreads
1031
1036
put! (c, i)
1032
1037
end
1033
- threaded_pagedmergesort! (v, lo, hi, o, bufs, blockLocation , c, threadingThreshold)
1038
+ threaded_pagedmergesort! (v, lo, hi, o, bufs, pageLocations , c, threadingThreshold)
1034
1039
return v
1035
1040
end
1036
1041
else
0 commit comments