18
18
#include <hse/util/event_counter.h>
19
19
#include <hse/util/keycmp.h>
20
20
#include <hse/util/perfc.h>
21
+ #include <hse/util/fmt.h>
21
22
22
23
#include "cn_perfc.h"
23
24
#include "cn_tree.h"
@@ -452,30 +453,6 @@ get_vblk_split_index(
452
453
return v ;
453
454
}
454
455
455
- static void
456
- find_max_key_among_overlapping_vblocks (
457
- const uint32_t nvgroups ,
458
- const struct vgroup_split_metadata * metadatav ,
459
- struct kvset * const ks ,
460
- struct key_obj * max_key )
461
- {
462
- for (uint32_t i = 0 ; i < nvgroups ; i ++ ) {
463
- struct key_obj curr_key = { 0 };
464
- const struct vblock_desc * vbd ;
465
- const struct vgroup_split_metadata * metadata = metadatav + i ;
466
-
467
- if (!metadata -> overlaps )
468
- continue ;
469
-
470
- vbd = kvset_get_nth_vblock_desc (ks , metadata -> vblk_idx );
471
-
472
- key2kobj (& curr_key , vbd -> vbd_mblkdesc -> map_base + vbd -> vbd_max_koff , vbd -> vbd_max_klen );
473
-
474
- if (key_obj_cmp (max_key , & curr_key ) < 0 )
475
- * max_key = curr_key ;
476
- }
477
- }
478
-
479
456
static merr_t
480
457
mark_vgroup_accesses (
481
458
const uint32_t nvgroups ,
@@ -545,7 +522,7 @@ mark_vgroup_accesses(
545
522
metadata = metadatav + vgidx ;
546
523
if (metadata -> offset == -1 ) {
547
524
metadata -> offset = vboff ;
548
-
525
+ log_debug ( "vgidx=%lu vbidx=%u offset=%u" , vgidx , vbidx , vboff );
549
526
/* Exit because we have marked the offset for all vgroups */
550
527
if (++ accesses == nvgroups )
551
528
goto out ;
@@ -580,6 +557,7 @@ vblocks_split(
580
557
struct kvset_split_res * result )
581
558
{
582
559
struct key_obj max_key ;
560
+ char buf [HSE_KVS_KEY_LEN_MAX + 1 ];
583
561
struct vgroup_split_metadata * metadatav ;
584
562
struct vgmap * vgmap_src = ks -> ks_vgmap ;
585
563
struct vgmap * vgmap_left = work [LEFT ].vgmap ;
@@ -594,21 +572,18 @@ vblocks_split(
594
572
uint64_t perfc_rwb = 0 ;
595
573
merr_t err ;
596
574
597
- log_debug ("splitting" );
575
+ log_debug ("splitting %d vgroups" , nvgroups );
598
576
599
577
if (move_left && move_right ) {
600
578
assert (nvgroups == 0 );
601
579
return 0 ;
602
580
}
603
581
604
- metadatav = calloc (nvgroups , sizeof (* metadatav ));
582
+ metadatav = malloc (nvgroups * sizeof (* metadatav ));
605
583
if (ev (!metadatav ))
606
584
return merr (ENOMEM );
607
- for (uint32_t i = 0 ; i < nvgroups ; i ++ ) {
608
- /* Negative offset implies overlapping vblock was not accessed */
609
- metadatav [i ].offset = -1 ;
610
- }
611
585
586
+ max_key = * split_key ;
612
587
for (uint32_t i = 0 ; i < nvgroups ; i ++ ) {
613
588
uint16_t start , end ;
614
589
struct vgroup_split_metadata * metadata = metadatav + i ;
@@ -617,22 +592,42 @@ vblocks_split(
617
592
start = vgmap_vbidx_out_start (ks , i );
618
593
end = vgmap_vbidx_out_end (ks , i );
619
594
595
+ /* Negative offset implies overlapping vblock was not accessed */
596
+ metadata -> offset = -1 ;
620
597
if (move_left || move_right ) {
598
+ log_debug ("move_left=%d move_right=%d" , move_left , move_right );
621
599
metadata -> vblk_idx = move_right ? start : end ;
622
600
metadata -> overlaps = false;
623
601
} else {
624
602
metadata -> vblk_idx = get_vblk_split_index (ks , start , end , split_key ,
625
603
& metadata -> overlaps );
604
+ if (metadata -> overlaps ) {
605
+ struct key_obj curr_key ;
606
+ const struct vblock_desc * vbd ;
607
+
608
+ vbd = kvset_get_nth_vblock_desc (ks , metadata -> vblk_idx );
609
+
610
+ key2kobj (& curr_key , vbd -> vbd_mblkdesc -> map_base + vbd -> vbd_max_koff , vbd -> vbd_max_klen );
611
+
612
+ fmt_hexp (buf , sizeof (buf ), curr_key .ko_sfx , curr_key .ko_sfx_len , "0x" , 4 , "-" , "\0" );
613
+ log_debug ("vgidx=%u vblkidx=%u maxkey=%s" , i , metadata -> vblk_idx , buf );
614
+ if (key_obj_cmp (& max_key , & curr_key ) < 0 )
615
+ max_key = curr_key ;
616
+ }
626
617
}
627
618
assert (metadata -> vblk_idx >= start && metadata -> vblk_idx <= end + 1 );
628
619
}
629
620
630
- max_key = * split_key ;
631
- find_max_key_among_overlapping_vblocks (nvgroups , metadatav , ks , & max_key );
621
+ fmt_hexp (buf , sizeof (buf ), split_key -> ko_sfx , split_key -> ko_sfx_len , "0x" , 4 , "-" , "\0" );
622
+ log_debug ("split key: %s" , buf );
623
+ fmt_hexp (buf , sizeof (buf ), max_key .ko_sfx , max_key .ko_sfx_len , "0x" , 4 , "-" , "\0" );
624
+ log_debug ("max key: %s" , buf );
632
625
633
- err = mark_vgroup_accesses (nvgroups , metadatav , ks , split_key , & max_key );
634
- if (ev (err ))
635
- goto out ;
626
+ if (!(move_left || move_right )) {
627
+ err = mark_vgroup_accesses (nvgroups , metadatav , ks , split_key , & max_key );
628
+ if (ev (err ))
629
+ goto out ;
630
+ }
636
631
637
632
for (uint32_t i = 0 ; i < nvgroups ; i ++ ) {
638
633
uint32_t vbcnt = 0 ;
@@ -649,122 +644,136 @@ vblocks_split(
649
644
650
645
log_debug ("start=%u end=%u split=%u overlaps=%d offset=%jd overlapping_access=%d" , start , end , split , metadata -> overlaps , metadata -> offset , overlapping_access );
651
646
652
- /* Add the vblocks in [boundary, end] to the right kvset */
653
- boundary = split ;
654
- for (uint16_t j = boundary ; j <= end ; j ++ ) {
655
- uint32_t alen ;
656
- uint64_t mbid ;
657
-
658
- mbid = kvset_get_nth_vblock_id (ks , j );
647
+ if (!move_left ) {
648
+ /* Add the vblocks in [boundary, end] to the right kvset */
649
+ boundary = overlapping_access ? split : split + 1 ;
650
+ for (uint16_t j = boundary ; j <= end ; j ++ ) {
651
+ uint32_t alen ;
652
+ uint64_t mbid ;
653
+
654
+ mbid = kvset_get_nth_vblock_id (ks , j );
655
+
656
+ if (j == split && overlapping_access ) {
657
+ off_t off ;
658
+ uint64_t clone_mbid ;
659
+ struct mblock_props props ;
660
+
661
+ /* We want clone more than enough data in the event the
662
+ * offset doesn't fall right on a page boundary.
663
+ */
664
+ off = metadata -> offset ;
665
+ off = off < PAGE_SIZE ? 0 : roundup (off - PAGE_SIZE , PAGE_SIZE );
666
+
667
+ err = mpool_mblock_clone (ks -> ks_mp , mbid , off , 0 , & clone_mbid );
668
+ if (!err ) {
669
+ err = blk_list_append (& blks_right -> vblks , clone_mbid );
670
+ if (!err )
671
+ err = blk_list_append (result -> ks [RIGHT ].blks_commit , clone_mbid );
672
+ }
673
+
674
+ if (err )
675
+ goto out ;
659
676
660
- if (j == split && overlapping_access ) {
661
- off_t off ;
662
- uint64_t clone_mbid ;
663
- struct mblock_props props ;
677
+ log_debug ("Cloned mblock (0x%" PRIx64 ") starting at offset %jd" , mbid , off );
664
678
665
- off = metadata -> offset ;
666
- off = off < PAGE_SIZE ? 0 : roundup (off - PAGE_SIZE , PAGE_SIZE );
679
+ err = mpool_mblock_props_get (ks -> ks_mp , clone_mbid , & props );
680
+ if (ev (err ))
681
+ goto out ;
667
682
668
- err = mpool_mblock_clone (ks -> ks_mp , mbid , off , 0 , & clone_mbid );
669
- if (!err ) {
670
- err = blk_list_append (& blks_right -> vblks , clone_mbid );
671
- if (!err )
672
- err = blk_list_append (result -> ks [RIGHT ].blks_commit , clone_mbid );
673
- }
683
+ perfc_rwc ++ ;
684
+ if (perfc_ison (pc , PERFC_RA_CNCOMP_RBYTES ) ||
685
+ perfc_ison (pc , PERFC_RA_CNCOMP_WBYTES ))
686
+ perfc_rwb += props .mpr_write_len - off ;
674
687
675
- if (err )
676
- goto out ;
688
+ alen = props .mpr_alloc_cap - VBLOCK_FOOTER_LEN ;
689
+ log_debug ("right: cloned id=0x%lx vgidx=%u vblkidx=%u alen=%u" , clone_mbid , i , j , alen );
690
+ } else {
691
+ err = blk_list_append (& blks_right -> vblks , mbid );
692
+ if (err )
693
+ goto out ;
677
694
678
- log_debug ("Cloned mblock (0x%" PRIx64 ") starting at offset %jd" , mbid , off );
695
+ alen = kvset_get_nth_vblock_alen (ks , j );
696
+ log_debug ("right: moved id=0x%lx vgidx=%u vblkidx=%u alen=%u" , mbid , i , j , alen );
697
+ }
679
698
680
- err = mpool_mblock_props_get ( ks -> ks_mp , clone_mbid , & props ) ;
681
- if ( ev ( err ))
682
- goto out ;
699
+ vbcnt ++ ;
700
+ blks_right -> bl_vtotal += alen ;
701
+ }
683
702
684
- perfc_rwc ++ ;
685
- if (perfc_ison (pc , PERFC_RA_CNCOMP_RBYTES ) ||
686
- perfc_ison (pc , PERFC_RA_CNCOMP_WBYTES ))
687
- perfc_rwb += props .mpr_write_len - off ;
703
+ if (vbcnt > 0 ) {
704
+ vbidx_right += vbcnt ;
688
705
689
- alen = props .mpr_alloc_cap - VBLOCK_FOOTER_LEN ;
690
- } else {
691
- err = blk_list_append (& blks_right -> vblks , mbid );
706
+ err = vgmap_vbidx_set (vgmap_src , end , vgmap_right , vbidx_right - 1 , vgidx_right );
692
707
if (err )
693
708
goto out ;
694
709
695
- alen = kvset_get_nth_vblock_alen ( ks , j ) ;
710
+ vgidx_right ++ ;
696
711
}
697
-
698
- vbcnt ++ ;
699
- blks_right -> bl_vtotal += alen ;
700
- }
701
-
702
- if (vbcnt > 0 ) {
703
- vbidx_right += vbcnt ;
704
-
705
- err = vgmap_vbidx_set (vgmap_src , end , vgmap_right , vbidx_right - 1 , vgidx_right );
706
- if (err )
707
- goto out ;
708
-
709
- vgidx_right ++ ;
710
712
}
711
713
712
- /* Add vblocks in [start, boundary] to the left kvset
713
- */
714
- vbcnt = 0 ; /* reset vbcnt for the right kvset */
715
- boundary = overlapping_access ? split : split - 1 ;
716
- for (uint16_t j = start ; j <= boundary ; j ++ ) {
717
- uint32_t alen ;
718
- uint64_t mbid ;
719
-
720
- mbid = kvset_get_nth_vblock_id (ks , j );
721
-
722
- if (j == split ) {
723
- off_t off ;
724
- uint32_t wlen ;
725
- struct mblock_props props ;
714
+ if (!move_right ) {
715
+ /* Add vblocks in [start, boundary] to the left kvset */
716
+ vbcnt = 0 ; /* reset vbcnt for the left kvset */
717
+ boundary = split ;
718
+ for (uint16_t j = start ; j <= boundary ; j ++ ) {
719
+ uint32_t alen ;
720
+ uint64_t mbid ;
721
+
722
+ mbid = kvset_get_nth_vblock_id (ks , j );
723
+
724
+ if (j == split && overlapping_access ) {
725
+ off_t off ;
726
+ uint32_t wlen ;
727
+ struct mblock_props props ;
728
+
729
+ /* Offset must be page aligned. Punching the rest of the
730
+ * vblock from the page aligned offset up to the vblock
731
+ * footer.
732
+ */
733
+ off = roundup (metadata -> offset , PAGE_SIZE );
734
+ wlen = kvset_get_nth_vblock_wlen (ks , j ) - off ;
735
+
736
+ err = mpool_mblock_punch (ks -> ks_mp , mbid , off , wlen );
737
+ if (ev (err ))
738
+ goto out ;
726
739
727
- /* Offset must be page aligned. Punching the rest of the vblock
728
- * from the page aligned offset up to the vblock footer.
729
- */
730
- off = roundup (metadata -> offset , PAGE_SIZE );
731
- wlen = kvset_get_nth_vblock_wlen (ks , j ) - off ;
740
+ log_debug ("Punched mblock (0x%" PRIx64 ") starting at offset %jd for %u bytes" ,
741
+ mbid , off , wlen );
732
742
733
- err = mpool_mblock_punch (ks -> ks_mp , mbid , off , wlen );
734
- if (ev (err ))
735
- goto out ;
743
+ err = mpool_mblock_props_get (ks -> ks_mp , mbid , & props );
744
+ if (ev (err ))
745
+ goto out ;
736
746
737
- log_debug ("Punched mblock (0x%" PRIx64 ") starting at offset %jd for %u bytes" ,
738
- mbid , off , wlen );
747
+ alen = props .mpr_alloc_cap - VBLOCK_FOOTER_LEN ;
748
+ log_debug ("left: punched id=0x%lx vgidx=%u vblkidx=%u alen=%u" , mbid , i , j , alen );
749
+ } else {
750
+ alen = kvset_get_nth_vblock_alen (ks , j );
751
+ log_debug ("left: moved id=0x%lx vgidx=%u vblkidx=%u alen=%u" , mbid , i , j , alen );
752
+ }
739
753
740
- err = mpool_mblock_props_get ( ks -> ks_mp , mbid , & props );
754
+ err = blk_list_append ( & blks_left -> vblks , mbid );
741
755
if (ev (err ))
742
756
goto out ;
743
757
744
- alen = props .mpr_alloc_cap - VBLOCK_FOOTER_LEN ;
745
- } else {
746
- alen = kvset_get_nth_vblock_alen (ks , j );
758
+ vbcnt ++ ;
759
+ blks_left -> bl_vtotal += alen ;
747
760
}
748
761
749
- err = blk_list_append (& blks_left -> vblks , mbid );
750
- if (ev (err ))
751
- goto out ;
762
+ if (vbcnt > 0 ) {
763
+ vbidx_left += vbcnt ;
752
764
753
- vbcnt ++ ;
754
- blks_left -> bl_vtotal += alen ;
755
- }
756
-
757
- if (vbcnt > 0 ) {
758
- vbidx_left += vbcnt ;
759
-
760
- err = vgmap_vbidx_set (vgmap_src , boundary , vgmap_left , vbidx_left - 1 , vgidx_left );
761
- if (err )
762
- goto out ;
765
+ err = vgmap_vbidx_set (vgmap_src , boundary , vgmap_left , vbidx_left - 1 , vgidx_left );
766
+ if (err )
767
+ goto out ;
763
768
764
- vgidx_left ++ ;
769
+ vgidx_left ++ ;
770
+ }
765
771
}
766
772
}
767
773
774
+ log_debug ("left: vtotal=%lu vused=%lu" , blks_left -> bl_vtotal , blks_left -> bl_vused );
775
+ log_debug ("right: vtotal=%lu vused=%lu" , blks_right -> bl_vtotal , blks_right -> bl_vused );
776
+
768
777
/* Sanity check, so that we don't fall into these asserts elsewhere later
769
778
* on.
770
779
*/
0 commit comments