Skip to content

Commit bb4e706

Browse files
committed
Replace getNextBlock! macro by function
and always use "page" instead of "block"
1 parent f66e799 commit bb4e706

File tree

1 file changed

+131
-126
lines changed

1 file changed

+131
-126
lines changed

src/SortingAlgorithms.jl

Lines changed: 131 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ Indicates that a sorting function should use the paged merge sort
6161
algorithm. Paged merge sort uses is a merge sort, that uses different
6262
merge routines to achieve stable sorting with a scratch space of size O(√n).
6363
The merge routine for merging large subarrays merges
64-
blocks/pages of size O(√n) almost in place, before reordering them using a page table.
64+
pages of size O(√n) almost in place, before reordering them using a page table.
6565
At deeper recursion levels, where the scratch space is big enough,
6666
normal merging is used, where one input is copied into the scratch space.
6767
When the scratch space is large enough to hold the complete subarray,
@@ -776,25 +776,28 @@ function merge!(v::AbstractVector{T}, lo::Integer, m::Integer, hi::Integer, o::O
776776
end
777777
end
778778

779-
# macro used for block management in pagedMerge!
780-
# use next block in A (left subarray) if it is free,
781-
# otherwise use next block in B
782-
macro getNextBlock!()
783-
quote
784-
if a > nextBlockA * blocksize + lo
785-
currentBlock = nextBlockA
786-
nextBlockA += 1
787-
else
788-
currentBlock = nextBlockB
789-
nextBlockB += 1
790-
end
791-
blockLocation[currentBlockIdx] = currentBlock
792-
currentBlockIdx += 1
793-
end |> esc
779+
struct Pages
780+
current::Int # current page being merged into
781+
nextA::Int # next possible page in A
782+
nextB::Int # next possible page in B
794783
end
795784

796-
# merge v[lo:m] and v[m+1:hi] using buffer buf in O(sqrt(n)) space
797-
function pagedMerge!(v::AbstractVector{T}, lo::Integer, m::Integer, hi::Integer, o::Ordering, buf::AbstractVector{T}, blockLocation::AbstractVector{<:Integer}) where T
785+
next_page_A(pages::Pages) = Pages(pages.nextA, pages.nextA + 1, pages.nextB)
786+
next_page_B(pages::Pages) = Pages(pages.nextB, pages.nextA, pages.nextB + 1)
787+
788+
function next_page!(pageLocations, pages, currentPageIndex, pagesize, lo, a)
789+
if a > pages.nextA * pagesize + lo
790+
pages = next_page_A(pages)
791+
else
792+
pages = next_page_B(pages)
793+
end
794+
pageLocations[currentPageIndex] = pages.current
795+
currentPageIndex += 1
796+
pages, currentPageIndex
797+
end
798+
799+
# merge v[lo:m] (A) and v[m+1:hi] (B) using buffer buf in O(sqrt(n)) space
800+
function paged_merge!(v::AbstractVector{T}, lo::Integer, m::Integer, hi::Integer, o::Ordering, buf::AbstractVector{T}, pageLocations::AbstractVector{<:Integer}) where T
798801
@assert lo < m < hi
799802
a = lo
800803
b = m + 1
@@ -813,142 +816,144 @@ function pagedMerge!(v::AbstractVector{T}, lo::Integer, m::Integer, hi::Integer,
813816
end
814817

815818
len = lenA + lenB
816-
blocksize = isqrt(len)
817-
nBlocks = len ÷ blocksize
818-
@assert length(buf) >= 3blocksize
819-
@assert length(blockLocation) >= nBlocks + 1
819+
pagesize = isqrt(len)
820+
nPages = len ÷ pagesize
821+
@assert length(buf) >= 3pagesize
822+
@assert length(pageLocations) >= nPages + 1
820823

821-
@inline getBlockOffset(block) = (block-1)*blocksize + lo - 1
824+
@inline page_offset(page) = (page-1)*pagesize + lo - 1
822825

823826
@inbounds begin
824827
##################
825828
# merge
826829
##################
827-
# merge into buf until full
828-
a,b,k = merge!((_,_,k) -> k<=3blocksize,buf,v,v,o,a,b,1)
829-
830-
nextBlockA = 1
831-
nextBlockB = (m + blocksize-lo) ÷ blocksize + 1
832-
blockLocation .= 0
833-
blockLocation[1:3] = -1:-1:-3
834-
830+
# merge the first 3 pages into buf
831+
a,b,k = merge!((_,_,k) -> k<=3pagesize,buf,v,v,o,a,b,1)
832+
# initialize variable for merging into pages
833+
pageLocations .= 0
834+
pageLocations[1:3] = -1:-1:-3
835+
currentPageIndex = 4
836+
currentPage = 0
837+
nextPageA = 1
838+
nextPageB = (m + pagesize-lo) ÷ pagesize + 1
839+
pages = Pages(currentPage, nextPageA, nextPageB)
835840
k = 1
836-
currentBlock = 0
837-
currentBlockIdx = 4
838-
# more efficient loop while more than blocksize elements of A and B are remaining
839-
while_condition1(offset) = (_,_,k) -> k <= offset + blocksize
840-
while a < m-blocksize && b < hi-blocksize
841-
@getNextBlock!
842-
offset = getBlockOffset(currentBlock)
841+
# more efficient loop while more than pagesize elements of A and B are remaining
842+
while_condition1(offset) = (_,_,k) -> k <= offset + pagesize
843+
while a < m-pagesize && b < hi-pagesize
844+
pages, currentPageIndex = next_page!(pageLocations, pages, currentPageIndex, pagesize, lo, a)
845+
offset = page_offset(pages.current)
843846
a,b,k = merge!(while_condition1(offset),v,v,v,o,a,b,offset+1)
844847
end
845848
# merge until either A or B is empty
846-
while_condition2(offset) = (a,b,k) -> k <= offset + blocksize && a <= m && b <= hi
849+
while_condition2(offset) = (a,b,k) -> k <= offset + pagesize && a <= m && b <= hi
847850
while a <= m && b <= hi
848-
@getNextBlock!
849-
offset = getBlockOffset(currentBlock)
851+
pages, currentPageIndex = next_page!(pageLocations, pages, currentPageIndex, pagesize, lo, a)
852+
offset = page_offset(pages.current)
850853
a,b,k = merge!(while_condition2(offset),v,v,v,o,a,b,offset+1)
851854
end
852-
k_block = k - getBlockOffset(currentBlock)
855+
k_page = k - page_offset(pages.current)
853856
# copy remaining elements
854857
# either A or B is empty
855858
# copy rest of A
856859
while a <= m
857-
if k_block > blocksize
858-
@getNextBlock!
859-
k_block = 1
860+
if k_page > pagesize
861+
pages, currentPageIndex = next_page!(pageLocations, pages, currentPageIndex, pagesize, lo, a)
862+
k_page = 1
860863
end
861-
offset = getBlockOffset(currentBlock)
862-
while k_block <= blocksize && a <= m
863-
v[offset + k_block] = v[a]
864+
offset = page_offset(pages.current)
865+
while k_page <= pagesize && a <= m
866+
v[offset + k_page] = v[a]
864867
a += 1
865-
k_block += 1
868+
k_page += 1
866869
end
867870
end
868871
# copy rest of B
869872
while b <= hi
870-
if k_block > blocksize
871-
@getNextBlock!
872-
k_block = 1
873+
if k_page > pagesize
874+
pages, currentPageIndex = next_page!(pageLocations, pages, currentPageIndex, pagesize, lo, a)
875+
k_page = 1
873876
end
874-
offset = getBlockOffset(currentBlock)
875-
while k_block <= blocksize && b <= hi
876-
v[offset + k_block] = v[b]
877+
offset = page_offset(pages.current)
878+
while k_page <= pagesize && b <= hi
879+
v[offset + k_page] = v[b]
877880
b += 1
878-
k_block += 1
881+
k_page += 1
879882
end
880883
end
881-
# copy last partial block to end
882-
partialBlockPresent = k_block <= blocksize
883-
if partialBlockPresent
884-
offset = getBlockOffset(currentBlock)
885-
offset2 = nBlocks*blocksize + lo - 1
886-
for j = 1:k_block-1
884+
# copy last partial page to end
885+
partialPagePresent = k_page <= pagesize
886+
if partialPagePresent
887+
offset = page_offset(pages.current)
888+
offset2 = nPages*pagesize + lo - 1
889+
for j = 1:k_page-1
887890
v[offset2 + j] = v[offset + j]
888891
end
889-
blockLocation[currentBlockIdx-1] = 0
892+
pageLocations[currentPageIndex-1] = 0
890893
end
891894
#########################################
892-
# calculate location of the 3 free blocks
895+
# calculate location of the 3 free pages
893896
#########################################
894-
nFreeBlocksB = nBlocks + 1 - nextBlockB
895-
nFreeBlocksA = 3 - nFreeBlocksB - Int(partialBlockPresent)
896-
freeBlocks = MVector{3,Int}(undef)
897+
nFreePagesB = nPages + 1 - pages.nextB
898+
nFreePagesA = 3 - nFreePagesB - Int(partialPagePresent)
899+
freePages = MVector{3,Int}(undef)
897900
i = 1
898-
for j = 0:nFreeBlocksA-1
899-
freeBlocks[i] = nextBlockA + j
901+
for j = 0:nFreePagesA-1
902+
freePages[i] = pages.nextA + j
900903
i += 1
901904
end
902-
for j = 0:nFreeBlocksB-1
903-
freeBlocks[i] = nextBlockB + j
905+
for j = 0:nFreePagesB-1
906+
freePages[i] = pages.nextB + j
904907
i += 1
905908
end
906-
if partialBlockPresent
907-
freeBlocks[i] = currentBlock
909+
if partialPagePresent
910+
freePages[i] = pages.current
908911
end
909-
freeBlocksIdx = 3
910-
doneBlockIdx = 1
911-
currentBlock = freeBlocks[end]
912+
freePagesIndex = 3
913+
donePageIndex = 1
914+
# use currentPage instead of pages.current because
915+
# pages.nextA and pages.nextB are no longer needed
916+
currentPage = freePages[end]
912917
##################
913-
# rearrange blocks
918+
# rearrange pages
914919
##################
915920
while true
916-
blc = blockLocation[currentBlock] # index of block with data belonging to currentBlock
917-
if blc > 0
918-
# data for currentBlock is in v
919-
offset = getBlockOffset(currentBlock)
920-
offset2 = getBlockOffset(blc)
921-
for j = 1:blocksize
921+
plc = pageLocations[currentPage] # page with data belonging to currentPage
922+
if plc > 0
923+
# data for currentPage is in v
924+
offset = page_offset(currentPage)
925+
offset2 = page_offset(plc)
926+
for j = 1:pagesize
922927
v[offset + j] = v[offset2 + j]
923928
end
924-
blockLocation[currentBlock] = 0
925-
currentBlock = blc
929+
pageLocations[currentPage] = 0
930+
currentPage = plc
926931
else
927-
# data for currentBlock is in buf
928-
offset = getBlockOffset(currentBlock)
929-
offset2 = (-blc-1)*blocksize
930-
for j = 1:blocksize
932+
# data for currentPage is in buf
933+
offset = page_offset(currentPage)
934+
offset2 = (-plc-1)*pagesize
935+
for j = 1:pagesize
931936
v[offset + j] = buf[offset2 + j]
932937
end
933-
blockLocation[currentBlock] = 0
934-
if freeBlocksIdx > 1
935-
# get next free block
936-
freeBlocksIdx -= 1
937-
currentBlock = freeBlocks[freeBlocksIdx]
938+
pageLocations[currentPage] = 0
939+
if freePagesIndex > 1
940+
# get next free page
941+
freePagesIndex -= 1
942+
currentPage = freePages[freePagesIndex]
938943
else
939-
# no free block remains
940-
# make sure that all blocks are done
941-
while blockLocation[doneBlockIdx] == 0 || blockLocation[doneBlockIdx] == doneBlockIdx
942-
doneBlockIdx += 1
943-
doneBlockIdx == nBlocks && return
944+
# no free page remains
945+
# make sure that all pages are done
946+
while pageLocations[donePageIndex] == 0 || pageLocations[donePageIndex] == donePageIndex
947+
donePageIndex += 1
948+
donePageIndex == nPages && return
944949
end
945-
# copy misplaced block into buf and continue
946-
currentBlock = blockLocation[doneBlockIdx]
947-
offset = getBlockOffset(currentBlock)
948-
for j = 1:blocksize
950+
# copy misplaced page into buf and continue
951+
currentPage = pageLocations[donePageIndex]
952+
offset = page_offset(currentPage)
953+
for j = 1:pagesize
949954
buf[j] = v[offset + j]
950955
end
951-
blockLocation[doneBlockIdx] = -1
956+
pageLocations[donePageIndex] = -1
952957
end
953958
end
954959
end
@@ -959,59 +964,59 @@ end
959964
# -> redefine for compatibility with earlier versions
960965
midpoint(lo::Integer, hi::Integer) = lo + ((hi - lo) >>> 0x01)
961966

962-
function pagedmergesort!(v::AbstractVector{T}, lo::Integer, hi::Integer, o::Ordering, buf::AbstractVector{T}, blockLocation) where T
967+
function pagedmergesort!(v::AbstractVector{T}, lo::Integer, hi::Integer, o::Ordering, buf::AbstractVector{T}, pageLocations) where T
963968
len = hi + 1 - lo
964969
if len <= Base.SMALL_THRESHOLD
965970
return Base.Sort.sort!(v, lo, hi, Base.Sort.InsertionSortAlg(), o)
966971
end
967972
m = midpoint(lo, hi-1) # hi-1: ensure midpoint is rounded down. OK, because lo < hi is satisfied here
968-
pagedmergesort!(v, lo, m, o, buf, blockLocation)
969-
pagedmergesort!(v, m+1, hi, o, buf, blockLocation)
973+
pagedmergesort!(v, lo, m, o, buf, pageLocations)
974+
pagedmergesort!(v, m+1, hi, o, buf, pageLocations)
970975
if len <= length(buf)
971976
twoended_merge!(v, lo, m, hi, o, buf)
972977
else
973-
pagedMerge!(v, lo, m, hi, o, buf, blockLocation)
978+
paged_merge!(v, lo, m, hi, o, buf, pageLocations)
974979
end
975980
return v
976981
end
977982

978983
function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::PagedMergeSortAlg, o::Ordering)
979984
lo >= hi && return v
980985
n = hi + 1 - lo
981-
blocksize = isqrt(n)
982-
buf = Vector{eltype(v)}(undef, 3blocksize)
983-
nBlocks = n ÷ blocksize
984-
blockLocation = Vector{Int}(undef, nBlocks+1)
985-
pagedmergesort!(v, lo, hi, o, buf, blockLocation)
986+
pagesize = isqrt(n)
987+
buf = Vector{eltype(v)}(undef, 3pagesize)
988+
nPages = n ÷ pagesize
989+
pageLocations = Vector{Int}(undef, nPages+1)
990+
pagedmergesort!(v, lo, hi, o, buf, pageLocations)
986991
return v
987992
end
988993

989994
Base.@static if VERSION >= v"1.3"
990995
const PAGEDMERGESORT_THREADING_THRESHOLD = 2^13
991-
function threaded_pagedmergesort!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering, bufs, blockLocations, c::Channel, threadingThreshold::Integer)
996+
function threaded_pagedmergesort!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering, bufs, pageLocations, c::Channel, threadingThreshold::Integer)
992997
len = hi + 1 -lo
993998
if len <= Base.SMALL_THRESHOLD
994999
return Base.Sort.sort!(v, lo, hi, Base.Sort.InsertionSortAlg(), o)
9951000
end
9961001
m = midpoint(lo, hi-1) # hi-1: ensure midpoint is rounded down. OK, because lo < hi is satisfied here
9971002
if len > threadingThreshold
998-
thr = Threads.@spawn threaded_pagedmergesort!(v, lo, m, o, bufs, blockLocations, c, threadingThreshold)
999-
threaded_pagedmergesort!(v, m+1, hi, o, bufs, blockLocations, c, threadingThreshold)
1003+
thr = Threads.@spawn threaded_pagedmergesort!(v, lo, m, o, bufs, pageLocations, c, threadingThreshold)
1004+
threaded_pagedmergesort!(v, m+1, hi, o, bufs, pageLocations, c, threadingThreshold)
10001005
wait(thr)
10011006
id = take!(c)
10021007
buf = bufs[id]
1003-
blockLocation = blockLocations[id]
1008+
pageLocations = pageLocations[id]
10041009
else
10051010
id = take!(c)
10061011
buf = bufs[id]
1007-
blockLocation = blockLocations[id]
1008-
pagedmergesort!(v, lo, m, o, buf, blockLocation)
1009-
pagedmergesort!(v, m+1, hi, o, buf, blockLocation)
1012+
pageLocations = pageLocations[id]
1013+
pagedmergesort!(v, lo, m, o, buf, pageLocations)
1014+
pagedmergesort!(v, m+1, hi, o, buf, pageLocations)
10101015
end
10111016
if len <= length(buf)
10121017
twoended_merge!(v, lo, m, hi, o, buf)
10131018
else
1014-
pagedMerge!(v, lo, m, hi, o, buf, blockLocation)
1019+
paged_merge!(v, lo, m, hi, o, buf, pageLocations)
10151020
end
10161021
put!(c, id)
10171022
return v
@@ -1022,15 +1027,15 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::ThreadedPagedMerge
10221027
nThreads=Threads.nthreads()
10231028
(n < PAGEDMERGESORT_THREADING_THRESHOLD || nThreads < 2) && return sort!(v, lo, hi, PagedMergeSortAlg(), o)
10241029
threadingThreshold = max(n ÷ 4nThreads, PAGEDMERGESORT_THREADING_THRESHOLD)
1025-
blocksize = isqrt(n)
1026-
nBlocks = n ÷ blocksize
1027-
bufs = [Vector{eltype(v)}(undef, 3blocksize) for _ in 1:nThreads] # allocate buffer for each thread
1028-
blockLocation = [Vector{Int}(undef, nBlocks+1) for _ in 1:nThreads]
1030+
pagesize = isqrt(n)
1031+
nPages = n ÷ pagesize
1032+
bufs = [Vector{eltype(v)}(undef, 3pagesize) for _ in 1:nThreads] # allocate buffer for each thread
1033+
pageLocations = [Vector{Int}(undef, nPages+1) for _ in 1:nThreads]
10291034
c = Channel{Int}(nThreads) # channel holds indices of available buffers
10301035
for i=1:nThreads
10311036
put!(c, i)
10321037
end
1033-
threaded_pagedmergesort!(v, lo, hi, o, bufs, blockLocation, c, threadingThreshold)
1038+
threaded_pagedmergesort!(v, lo, hi, o, bufs, pageLocations, c, threadingThreshold)
10341039
return v
10351040
end
10361041
else

0 commit comments

Comments
 (0)