Skip to content

Commit 23453ae

Browse files
authored
Merge pull request sorintlab#518 from sgotti/consider_only_real_in_sync_standbys
*: syncrepl, consider only really in sync standbys
2 parents e81e5d7 + bb89f80 commit 23453ae

File tree

8 files changed

+678
-28
lines changed

8 files changed

+678
-28
lines changed

cmd/keeper/cmd/keeper.go

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,22 @@ func parseSynchronousStandbyNames(s string) ([]string, error) {
611611
return entries, nil
612612
}
613613

614+
func (p *PostgresKeeper) GetInSyncStandbys() ([]string, error) {
615+
inSyncStandbysFullName, err := p.pgm.GetSyncStandbys()
616+
if err != nil {
617+
return nil, fmt.Errorf("failed to retrieve current sync standbys status from instance: %v", err)
618+
}
619+
620+
inSyncStandbys := []string{}
621+
for _, s := range inSyncStandbysFullName {
622+
if common.IsStolonName(s) {
623+
inSyncStandbys = append(inSyncStandbys, common.NameFromStolonName(s))
624+
}
625+
}
626+
627+
return inSyncStandbys, nil
628+
}
629+
614630
func (p *PostgresKeeper) GetPGState(pctx context.Context) (*cluster.PostgresState, error) {
615631
p.getPGStateMutex.Lock()
616632
defer p.getPGStateMutex.Unlock()
@@ -644,20 +660,13 @@ func (p *PostgresKeeper) GetPGState(pctx context.Context) (*cluster.PostgresStat
644660
log.Debugw("filtered out managed pg parameters", "filteredPGParameters", filteredPGParameters)
645661
pgState.PGParameters = filteredPGParameters
646662

647-
synchronousStandbyNames, err := parseSynchronousStandbyNames(pgParameters["synchronous_standby_names"])
663+
inSyncStandbys, err := p.GetInSyncStandbys()
648664
if err != nil {
649-
log.Errorw("error parsing synchronous_standby_names", zap.Error(err))
665+
log.Errorw("failed to retrieve current in sync standbys from instance", zap.Error(err))
650666
return pgState, nil
651667
}
652-
synchronousStandbys := []string{}
653-
for _, n := range synchronousStandbyNames {
654-
// pgState.SynchronousStandbys must contain only the internal standbys dbUIDs
655-
if !common.IsStolonName(n) {
656-
continue
657-
}
658-
synchronousStandbys = append(synchronousStandbys, common.NameFromStolonName(n))
659-
}
660-
pgState.SynchronousStandbys = synchronousStandbys
668+
669+
pgState.SynchronousStandbys = inSyncStandbys
661670

662671
sd, err := p.pgm.GetSystemData()
663672
if err != nil {

cmd/sentinel/cmd/sentinel.go

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ func (s *Sentinel) updateKeepersStatus(cd *cluster.ClusterData, keepersInfo clus
306306
db.Status.TimelinesHistory = dbs.TimelinesHistory
307307
db.Status.PGParameters = cluster.PGParameters(dbs.PGParameters)
308308

309-
db.Status.SynchronousStandbys = dbs.SynchronousStandbys
309+
db.Status.CurSynchronousStandbys = dbs.SynchronousStandbys
310310

311311
db.Status.OlderWalFile = dbs.OlderWalFile
312312
} else {
@@ -1202,13 +1202,50 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
12021202
}
12031203
}
12041204

1205+
// if the current known in sync syncstandbys are different than the required ones wait for them and remove non good ones
1206+
if !util.CompareStringSliceNoOrder(masterDB.Status.SynchronousStandbys, masterDB.Spec.SynchronousStandbys) {
1207+
1208+
// remove old syncstandbys from current status
1209+
masterDB.Status.SynchronousStandbys = util.CommonElements(masterDB.Status.SynchronousStandbys, masterDB.Spec.SynchronousStandbys)
1210+
1211+
// add reported in sync syncstandbys to the current status
1212+
curSyncStandbys := util.CommonElements(masterDB.Status.CurSynchronousStandbys, masterDB.Spec.SynchronousStandbys)
1213+
toAddSyncStandbys := util.Difference(curSyncStandbys, masterDB.Status.SynchronousStandbys)
1214+
masterDB.Status.SynchronousStandbys = append(masterDB.Status.SynchronousStandbys, toAddSyncStandbys...)
1215+
1216+
// if some of the non yet in sync syncstandbys are failed, set Spec.SynchronousStandbys to the current in sync ones, se other could be added.
1217+
notInSyncSyncStandbys := util.Difference(masterDB.Spec.SynchronousStandbys, masterDB.Status.SynchronousStandbys)
1218+
update := false
1219+
for _, dbUID := range notInSyncSyncStandbys {
1220+
if _, ok := newcd.DBs[dbUID]; !ok {
1221+
log.Infow("one of the new synchronousStandbys has been removed", "db", dbUID, "inSyncStandbys", masterDB.Status.SynchronousStandbys, "synchronousStandbys", masterDB.Spec.SynchronousStandbys)
1222+
update = true
1223+
continue
1224+
}
1225+
if _, ok := goodStandbys[dbUID]; !ok {
1226+
log.Infow("one of the new synchronousStandbys is not in good state", "db", dbUID, "inSyncStandbys", masterDB.Status.SynchronousStandbys, "synchronousStandbys", masterDB.Spec.SynchronousStandbys)
1227+
update = true
1228+
continue
1229+
}
1230+
}
1231+
if update {
1232+
// Use the current known in sync syncStandbys as Spec.SynchronousStandbys
1233+
log.Infow("setting the expected sync-standbys to the current known in sync sync-standbys", "inSyncStandbys", masterDB.Status.SynchronousStandbys, "synchronousStandbys", masterDB.Spec.SynchronousStandbys)
1234+
masterDB.Spec.SynchronousStandbys = masterDB.Status.SynchronousStandbys
1235+
1236+
// Just sort to always have them in the same order and avoid
1237+
// unneeded updates to synchronous_standby_names by the keeper.
1238+
sort.Sort(sort.StringSlice(masterDB.Spec.SynchronousStandbys))
1239+
}
1240+
}
1241+
12051242
// update synchronousStandbys only if the reported
12061243
// SynchronousStandbys are the same as the required ones. In
12071244
// this way, when we have to choose a new master we are sure
12081245
// that there're no intermediate changes between the
12091246
// reported standbys and the required ones.
12101247
if !util.CompareStringSliceNoOrder(masterDB.Status.SynchronousStandbys, masterDB.Spec.SynchronousStandbys) {
1211-
log.Infof("won't update masterDB required synchronous standby since the latest master reported synchronous standbys are different from the db spec ones", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys)
1248+
log.Infow("waiting for new defined synchronous standbys to be in sync", "inSyncStandbys", curMasterDB.Status.SynchronousStandbys, "synchronousStandbys", curMasterDB.Spec.SynchronousStandbys)
12121249
} else {
12131250
addFakeStandby := false
12141251
externalSynchronousStandbys := map[string]struct{}{}
@@ -1358,6 +1395,9 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
13581395
masterDB.Spec.ExternalSynchronousStandbys = append(masterDB.Spec.ExternalSynchronousStandbys, fakeStandbyName)
13591396
}
13601397

1398+
// remove old syncstandbys from current status
1399+
masterDB.Status.SynchronousStandbys = util.CommonElements(masterDB.Status.SynchronousStandbys, masterDB.Spec.SynchronousStandbys)
1400+
13611401
// Just sort to always have them in the same order and avoid
13621402
// unneeded updates to synchronous_standby_names by the keeper.
13631403
sort.Sort(sort.StringSlice(masterDB.Spec.SynchronousStandbys))
@@ -1367,6 +1407,8 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
13671407
masterDB.Spec.SynchronousReplication = false
13681408
masterDB.Spec.SynchronousStandbys = nil
13691409
masterDB.Spec.ExternalSynchronousStandbys = nil
1410+
1411+
masterDB.Status.SynchronousStandbys = nil
13701412
}
13711413

13721414
// NotFailed != Good since there can be some dbs that are converging

0 commit comments

Comments
 (0)