Skip to content

Commit

Permalink
feat: track point in time of most recent successful sync, helpful wit…
Browse files Browse the repository at this point in the history
…h metrics in higher-level code
  • Loading branch information
FlorianLoch committed Jun 12, 2024
1 parent eea907a commit b8ed988
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 27 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ The library supports to continue from where it left off, the `sync` command ment

## API

The API is really simple; one type, holding three methods, is exported (and additionally, typed configuration options):
The API is really simple; one type, providing four methods, is exported (and additionally, typed configuration options):

```go
New(options ...CommonOption) *HIBP
New(options ...CommonOption) (*HIBP, error)
HIBP#Sync(options ...SyncOption) error // Syncs the local copy with the upstream database
HIBP#Export(w io.Writer, options ...ExportOption) error // Writes a continuous, decompressed and "free-of-etags" stream to the given io.Writer with the lines being prefix by the k-proximity range
HIBP#.Query("ABCDE") (io.ReadClose, error) // Returns the k-proximity API result as the upstream API would (without the k-proximity range as prefix)
HIBP#Query("ABCDE") (io.ReadClose, error) // Returns the k-proximity API result as the upstream API would (without the k-proximity range as prefix)
HIBP#MostRecentSuccessfulSync() time.Time // Returns the point in time the last successful sync finished
```

All of them operate on disk but, depending on the medium, should provide access times that are probably good enough for all scenarios.
Expand Down
7 changes: 6 additions & 1 deletion cmd/export/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ func main() {
dataDir = os.Args[1]
}

h := hibp.New(hibp.WithDataDir(dataDir))
h, err := hibp.New(hibp.WithDataDir(dataDir))
if err != nil {
_, _ = os.Stderr.WriteString("Failed to init HIBP sync: " + err.Error())

os.Exit(1)
}

if err := h.Export(os.Stdout); err != nil {
_, _ = os.Stderr.WriteString("Failed to export HIBP data: " + err.Error())
Expand Down
5 changes: 4 additions & 1 deletion cmd/sync/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ func run(dataDir string) error {
return nil
}

h := hibp.New(hibp.WithDataDir(dataDir))
h, err := hibp.New(hibp.WithDataDir(dataDir))
if err != nil {
return fmt.Errorf("initialising HIBP sync: %w", err)
}

if err := h.Sync(
hibp.SyncWithProgressFn(updateProgressBar),
Expand Down
80 changes: 65 additions & 15 deletions lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,31 @@ import (
"github.com/hashicorp/go-retryablehttp"
"io"
"os"
"path"
"strconv"
"sync/atomic"
"time"
)

const (
DefaultDataDir = "./.hibp-data"
DefaultStateFileName = "state"
defaultEndpoint = "https://api.pwnedpasswords.com/range/"
defaultWorkers = 50
defaultLastRange = 0xFFFFF
DefaultDataDir = "./.hibp-data"
DefaultStateFileName = "state"
defaultEndpoint = "https://api.pwnedpasswords.com/range/"
defaultWorkers = 50
defaultLastRange = 0xFFFFF
hibpMostRecentSuccessfulSyncPath = ".most_recent_successful_sync"
)

// HIBP bundles the functionality of the HIBP package.
// In order to allow concurrent operations on the local, file-based dataset efficiently and safely, a shared set of
// locks is required - this gets managed by the HIBP type.
type HIBP struct {
store storage
store storage
dataDir string
mostRecentSuccessfulSync atomic.Pointer[time.Time]
}

func New(options ...CommonOption) *HIBP {
func New(options ...CommonOption) (*HIBP, error) {
config := commonConfig{
dataDir: DefaultDataDir,
noCompression: false,
Expand All @@ -39,21 +45,45 @@ func New(options ...CommonOption) *HIBP {

storage := newFSStorage(config.dataDir, config.noCompression)

return &HIBP{
store: storage,
var mostRecentSuccessfulSync time.Time

mostRecentSuccessfulSyncPath := path.Join(config.dataDir, hibpMostRecentSuccessfulSyncPath)
mostRecentSuccessfullSyncBytes, err := os.ReadFile(mostRecentSuccessfulSyncPath)
if err != nil {
// It is ok if the file does not exist
if !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("reading timestamp of most recent successful sync at %q: %w", mostRecentSuccessfulSyncPath, err)
}
} else {
seconds, err := strconv.ParseInt(string(mostRecentSuccessfullSyncBytes), 10, 64)
if err != nil {
return nil, fmt.Errorf("parsing timestamp %q of most recent successful sync from %q: %w", mostRecentSuccessfullSyncBytes, mostRecentSuccessfulSyncPath, err)
}

mostRecentSuccessfulSync = time.Unix(seconds, 0)
}

h := &HIBP{
store: storage,
dataDir: config.dataDir,
}

h.mostRecentSuccessfulSync.Store(&mostRecentSuccessfulSync)

return h, nil
}

// Sync copies the ranges, i.e., the HIBP data, from the upstream API to the local storage.
// The function will start from the lowest prefix and continue until the highest prefix.
// See the set of SyncOption functions for customizing the behavior of the sync operation.
func (h *HIBP) Sync(options ...SyncOption) error {
config := &syncConfig{
ctx: context.Background(),
endpoint: defaultEndpoint,
minWorkers: defaultWorkers,
progressFn: func(_, _, _, _, _ int64) error { return nil },
lastRange: defaultLastRange,
ctx: context.Background(),
endpoint: defaultEndpoint,
minWorkers: defaultWorkers,
progressFn: func(_, _, _, _, _ int64) error { return nil },
lastRange: defaultLastRange,
trackMostRecentSuccessfulSyncInFile: true,
}

for _, option := range options {
Expand Down Expand Up @@ -87,7 +117,22 @@ func (h *HIBP) Sync(options ...SyncOption) error {
// This would cause problems, especially when cancelling the context.
pool := pond.New(config.minWorkers, 0, pond.MinWorkers(config.minWorkers))

return sync(config.ctx, from, config.lastRange+1, client, h.store, pool, config.progressFn)
if err := sync(config.ctx, from, config.lastRange+1, client, h.store, pool, config.progressFn); err != nil {
return err
}

now := time.Now()
h.mostRecentSuccessfulSync.Store(&now)

if config.trackMostRecentSuccessfulSyncInFile {
mostRecentSuccessfulSyncPath := path.Join(h.dataDir, hibpMostRecentSuccessfulSyncPath)

if err := os.WriteFile(mostRecentSuccessfulSyncPath, []byte(strconv.FormatInt(now.Unix(), 10)), 0o644); err != nil {
return fmt.Errorf("writing timestamp of most recent successful sync to %q: %w", mostRecentSuccessfulSyncPath, err)
}
}

return nil
}

// Export writes the dataset to the given writer.
Expand All @@ -113,6 +158,11 @@ func (h *HIBP) Query(prefix string) (io.ReadCloser, error) {
return reader, nil
}

// MostRecentSuccessfulSync returns the point in the most recent successful sync finished.
func (h *HIBP) MostRecentSuccessfulSync() time.Time {
return *h.mostRecentSuccessfulSync.Load()
}

func readStateFile(stateFile io.ReadWriteSeeker) (int64, error) {
state, err := io.ReadAll(stateFile)
if err != nil {
Expand Down
5 changes: 4 additions & 1 deletion lib_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ func BenchmarkQuery(b *testing.B) {

dataDir := b.TempDir()

h := New(WithDataDir(dataDir))
h, err := New(WithDataDir(dataDir))
if err != nil {
b.Fatalf("initialising hibp sync: %v", err)
}

if err := h.Sync(SyncWithLastRange(lastRange)); err != nil {
b.Fatalf("unexpected error: %v", err)
Expand Down
22 changes: 16 additions & 6 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,13 @@ func WithoutCompression() CommonOption {
}

type syncConfig struct {
ctx context.Context
endpoint string
minWorkers int
progressFn ProgressFunc
stateFile io.ReadWriteSeeker
lastRange int64
ctx context.Context
endpoint string
minWorkers int
progressFn ProgressFunc
stateFile io.ReadWriteSeeker
lastRange int64
trackMostRecentSuccessfulSyncInFile bool
}

// SyncOption represents a type of function that can be used to customize the behavior of the Sync function.
Expand Down Expand Up @@ -106,3 +107,12 @@ func SyncWithLastRange(to int64) SyncOption {
c.lastRange = to
}
}

// SyncWithoutTrackingMostRecentSuccessfulSyncInFile disables tracking of this information in a file.
// A file will be placed in the data dir to keep the information when ending the process.
// Default: creating a file for the timestamp is enabled
func SyncWithoutTrackingMostRecentSuccessfulSyncInFile() SyncOption {
return func(c *syncConfig) {
c.trackMostRecentSuccessfulSyncInFile = false
}
}

0 comments on commit b8ed988

Please sign in to comment.