Skip to content

[internal] Renewal of expiring certs; go hook migration; cert alerts #223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 83 commits into from
Apr 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
de2649a
label expiring certs
astef Feb 28, 2025
8eb0835
fix label
astef Feb 28, 2025
434867a
fix ci
astef Feb 28, 2025
a7331ca
fix image
astef Feb 28, 2025
dda72dd
add 1_23 go image
astef Feb 28, 2025
2cf481b
add 1_23 go image
astef Feb 28, 2025
727d224
add alert
astef Feb 28, 2025
b2d4b49
some magic spells to make it work
astef Feb 28, 2025
951d6a6
Update monitoring/prometheus-rules/expiring-certs.yaml
astef Feb 28, 2025
77cd9e6
Update monitoring/prometheus-rules/expiring-certs.yaml
astef Feb 28, 2025
762721f
check all *.crt keys instead of just tls.crt
astef Mar 4, 2025
83cee8d
remove the label, when not expired
astef Mar 4, 2025
2755e48
change startup time
astef Mar 4, 2025
0fbc388
test
astef Mar 4, 2025
a2007f8
fix time to utc
astef Mar 4, 2025
91e0b11
fix crontab
astef Mar 4, 2025
7871824
cron
astef Mar 4, 2025
bcae116
cron
astef Mar 4, 2025
2d6004f
do not retry certificate errors
astef Mar 4, 2025
dbd5fde
debug
astef Mar 5, 2025
de01c70
fixes
astef Mar 5, 2025
dd37ad8
jq filter
astef Mar 5, 2025
466f1ed
debug
astef Mar 5, 2025
fecebcf
debug jq
astef Mar 6, 2025
2386ad1
fix selector
astef Mar 6, 2025
094ece6
fix selector
astef Mar 6, 2025
7554a7c
shotgun debugging
astef Mar 6, 2025
c76d47d
debug
astef Mar 6, 2025
2f06ee4
fixate progress
astef Mar 10, 2025
7a08880
cert renewal draft
astef Mar 11, 2025
ac8e482
changes after review
astef Mar 12, 2025
883cd6e
label secrets with certs
astef Mar 12, 2025
7ba13b7
fix after review
astef Mar 14, 2025
c9d2d1f
ExecuteHookOnSynchronization=true
astef Mar 14, 2025
0b5f44b
invalid error
astef Mar 14, 2025
5c3d3cd
logs
astef Mar 14, 2025
f7a829b
err
astef Mar 14, 2025
75ba072
fix
astef Mar 14, 2025
8d6f4a1
fix keys during restore; handle panics
astef Mar 15, 2025
1e9cbbf
add resetting by removing label
astef Mar 17, 2025
24198b5
patch instead of update
astef Mar 17, 2025
268a36b
fixate progress - generate certificates
astef Mar 20, 2025
d39203d
refactor
astef Mar 20, 2025
ef172af
add global isExpiringAnyCerts check; refactor; remove step.Doc
astef Mar 20, 2025
1c509c4
revert labeling cert secrets
astef Mar 20, 2025
7178318
remove empty line
astef Mar 20, 2025
633342f
rest of certs
astef Mar 21, 2025
a11f622
support force flag
astef Mar 21, 2025
eedecf8
add log
astef Mar 21, 2025
0c9f913
add in progress label
astef Mar 21, 2025
552ccdb
test
astef Mar 21, 2025
fb4dbf4
debug python hook
astef Mar 21, 2025
bf46b84
debug python hook 2
astef Mar 21, 2025
f105a70
support patterns in SANs, pkix extensions
astef Mar 22, 2025
89e0db2
replace spaas certs with go hook
astef Mar 25, 2025
7d15d64
remove spaas hook entirely
astef Mar 25, 2025
bff8328
debug
astef Mar 25, 2025
cf963ef
fix forbidden property
astef Mar 25, 2025
18b353b
playing with magic
astef Mar 26, 2025
2bc34d1
debug
astef Mar 26, 2025
0a19fd7
fix
astef Mar 26, 2025
881705c
fix
astef Mar 26, 2025
068bf89
got rid of python hooks; consolidate codebase with module-sdk
astef Mar 30, 2025
e2982ae
fix b64enc problem
astef Mar 31, 2025
ecab5a3
fix CA secret double- encoding
astef Apr 1, 2025
5a83077
extract cert configs to a separate package
astef Apr 1, 2025
b7376e2
fixate
astef Apr 2, 2025
9fc59ce
refactor
astef Apr 2, 2025
94e2780
remove localhost SANs for all but linstor certs
astef Apr 2, 2025
8fe2334
change hook filenames
astef Apr 3, 2025
13444e0
fix bugs
astef Apr 3, 2025
563fc6a
fix net.IP comparison
astef Apr 3, 2025
bbb3683
fix linter
astef Apr 3, 2025
a2a85a8
event suppression
astef Apr 3, 2025
946f1bd
adjust log level
astef Apr 3, 2025
415653e
add log
astef Apr 3, 2025
aee4791
more logs
astef Apr 3, 2025
2acb30a
docs
astef Apr 8, 2025
14cfb5c
fix webhooks container reload
astef Apr 8, 2025
f41abfd
faq
astef Apr 8, 2025
f0788eb
fix
astef Apr 9, 2025
b29b8f2
polished docs
astef Apr 9, 2025
7b99e32
docs
astef Apr 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .werf/bundle.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,16 @@ import:
add: /lib/python/dist
to: /lib/python/dist
after: setup
# Rendering .werf/go-hooks.yaml is required!
- image: go-hooks-artifact
add: /usr/local/bin/go-hooks
to: /hooks/go-hooks
after: setup
git:
- add: /
to: /
excludePaths:
- hooks/go
includePaths:
- .helmignore
- charts
Expand Down
1 change: 1 addition & 0 deletions .werf/consts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
{{- $_ := set $ "BASE_ALPINE_DEV" "registry.deckhouse.io/base_images/dev-alpine:3.16.3@sha256:c706fa83cc129079e430480369a3f062b8178cac9ec89266ebab753a574aca8e" }}
{{- $_ := set $ "BASE_ALT_P11" "registry.deckhouse.io/base_images/alt:p11@sha256:c396cd7348a48f9236413e2ef5569223c15e554c0a3ca37f9d92fb787d4f1893" }}
{{- $_ := set $ "BASE_GOLANG_1_22" "registry.deckhouse.io/base_images/golang:1.22.7-bullseye@sha256:e5dc67bf84590c008338a0e30f56a6ed2092a38e0d2895c797dd501db73a2330" }}
{{- $_ := set $ "BASE_GOLANG_1_23" "registry.deckhouse.io/base_images/golang:1.23.6-alpine3.20@sha256:3058c63e0e2532881949c4186414baa24a0f9a8f9349b1853daa49be816f42e9" }}
{{- $_ := set $ "BASE_PYTHON" "registry.deckhouse.io/base_images/python:3.7.16-alpine3.16@sha256:054c898ee5eacb0b3d85bdb603d6229b93619964cc01be5274acdf3e451e5ef8" }}
{{- $_ := set $ "BASE_SCRATCH" "registry.deckhouse.io/base_images/scratch@sha256:653ae76965c98c8cd1c8c9ff7725316d2983986f896655b30e0f44d2f8b2dd7e" }}

Expand Down
35 changes: 35 additions & 0 deletions docs/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -672,3 +672,38 @@ DRBD with a replica count greater than 1 provides de facto network RAID. Using R
## Why do you recommend using local disks (and not NAS)?

DRBD uses the network for data replication. When using NAS, network load will increase significantly because nodes will synchronize data not only with NAS but also between each other. Similarly, read/write latency will also increase. NAS typically involves using RAID on its side, which also adds overhead.


## How to manually trigger the certificate renewal process?

Although the certificate renewal process is automated, manual renewal might still be necessary because it can be performed during a convenient maintenance window when it is acceptable to restart the module's objects. The automated renewal does not restart any objects.

To manually trigger the certificate renewal process, create a `ConfigMap` named `manualcertrenewal-trigger`:

```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: manualcertrenewal-trigger
namespace: d8-sds-replicated-volume
```

The system will stop all necessary module objects, update the certificates, and then restart them.

You can check the operation status using the following command:

```shell
kubectl -n d8-sds-replicated-volume get cm manualcertrenewal-trigger -ojsonpath='{.data.step}'
```
- `Prepared` — health checks have passed successfully, and the downtime window has started.
- `TurnedOffAndRenewedCerts` — the system has been stopped and certificates have been renewed.
- `TurnedOn` — the system has been restarted.
- `Done` — the operation is complete and ready to be repeated.

Certificates are issued for a period of one year and are marked as expiring 30 days before their expiration date. The monitoring system alerts about expiring certificates (see the `D8LinstorCertificateExpiringIn30d` alert).

To repeat the operation, simply remove the label from the trigger using the following command:

```shell
kubectl -n d8-sds-replicated-volume label cm manualcertrenewal-trigger storage.deckhouse.io/sds-replicated-volume-manualcertrenewal-completed-
```
36 changes: 36 additions & 0 deletions docs/FAQ_RU.md
Original file line number Diff line number Diff line change
Expand Up @@ -704,3 +704,39 @@ DRBD с количеством реплик больше 1 предоставл

DRBD использует сеть для репликации данных. При использовании NAS нагрузка на сеть будет расти кратно, так как узлы будут синхронизировать данные не только с NAS, но и между собой. Аналогично будет расти задержка на чтение или запись.
NAS обычно предполагает использование RAID на своей стороны, что также увеличивает дополнительную нагрузку.

## Как вручную инициировать процесс перевыпуска сертификатов?

Несмотря на то, что процесс перевыпуска сертификатов автоматизирован, ручной перевыпуск всё ещё может понадобиться, так как его можно произвести в удобное временное окно, когда допустимо перезапустить объекты модуля. Автоматизированный перевыпуск не перезапускает объекты.

Чтобы вручную инициировать процесс продления сертификатов, создайте `ConfigMap` с именем `manualcertrenewal-trigger`:

```yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: manualcertrenewal-trigger
namespace: d8-sds-replicated-volume
```

Система остановит все необходимые объекты модуля, обновит сертификаты и затем снова их запустит.

Статус операции можно определить с помощью команды:

```shell
kubectl -n d8-sds-replicated-volume get cm manualcertrenewal-trigger -ojsonpath='{.data.step}'
```
- `Prepared` — проверки состояния успешно пройдены, начато время простоя.
- `TurnedOffAndRenewedCerts` — система остановлена, сертификаты обновлены.
- `TurnedOn` — система снова запущена.
- `Done` — операция завершена и готова к повторению.

Сертификаты выдаются сроком на один год и помечаются как устаревающие за 30 дней до истечения срока действия. Система мониторинга оповещает о наличии устаревающих сертификатов (см. оповещение `D8LinstorCertificateExpiringIn30d`)

Чтобы повторить операцию, достаточно удалить метку с триггера, с помощью команды:

```shell
kubectl -n d8-sds-replicated-volume label cm manualcertrenewal-trigger storage.deckhouse.io/sds-replicated-volume-manualcertrenewal-completed-
```


71 changes: 0 additions & 71 deletions hooks/generate_certs.py

This file was deleted.

46 changes: 0 additions & 46 deletions hooks/generate_scheduler_extender_certs.py

This file was deleted.

41 changes: 0 additions & 41 deletions hooks/generate_spaas_certs.py

This file was deleted.

53 changes: 0 additions & 53 deletions hooks/generate_webhook_certs.py

This file was deleted.

79 changes: 79 additions & 0 deletions hooks/go/050-label-expiring-certs/label_expiring_certs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package labelexpiringcerts

import (
"context"
"errors"
"fmt"
"time"

"github.com/deckhouse/module-sdk/pkg"
"github.com/deckhouse/module-sdk/pkg/registry"
"github.com/deckhouse/sds-replicated-volume/hooks/go/consts"
"github.com/deckhouse/sds-replicated-volume/hooks/go/utils"
v1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
SecretCertExpire30dLabel = "storage.deckhouse.io/sds-replicated-volume-cert-expire-in-30d"
SecretExpirationThreshold = time.Hour * 24 * 30
)

var _ = registry.RegisterFunc(
&pkg.HookConfig{
Schedule: []pkg.ScheduleConfig{
{Name: "daily", Crontab: "40 12 * * *"},
},
Queue: fmt.Sprintf("modules/%s", consts.ModuleName),
},
labelExpiringCerts,
)

func labelExpiringCerts(ctx context.Context, input *pkg.HookInput) error {
cl := input.DC.MustGetK8sClient()

secrets := &v1.SecretList{}
if err := cl.List(ctx, secrets, client.InNamespace(consts.ModuleNamespace)); err != nil {
return fmt.Errorf("listing secrets: %w", err)
}

var resultErr error
for _, secret := range secrets.Items {
log := input.Logger.With("name", secret.Name)

if expiring, err := utils.AnyCertIsExpiringSoon(log, &secret, SecretExpirationThreshold); err != nil {
// do not retry certificate errors, probably just a format problem
log.Error("error checking certificates", "err", err)
continue
} else if !expiring {
log.Info("no expiring certs found")

if secret.Labels[SecretCertExpire30dLabel] == "" {
continue
}

log.Info("secret have obsolete label, remove")

delete(secret.Labels, SecretCertExpire30dLabel)
if err := cl.Update(ctx, &secret); err != nil {
resultErr = errors.Join(resultErr, fmt.Errorf("error removing label from secret: %w", err))
log.Error("error removing label from secret", "err", err)
}

continue
}

if secret.Labels[SecretCertExpire30dLabel] != "" {
log.Info("cert already have label, skip")
continue
}

secret.Labels[SecretCertExpire30dLabel] = "true"
if err := cl.Update(ctx, &secret); err != nil {
resultErr = errors.Join(resultErr, fmt.Errorf("error adding label to secret: %w", err))
log.Error("error adding label to secret", "err", err)
}
}

return resultErr
}
Loading
Loading