|
| 1 | +package x |
| 2 | + |
| 3 | +import ( |
| 4 | + "os" |
| 5 | + "strings" |
| 6 | + "time" |
| 7 | + |
| 8 | + "github.com/docker/infrakit/pkg/discovery" |
| 9 | + "github.com/docker/infrakit/pkg/plugin" |
| 10 | + instance_rpc "github.com/docker/infrakit/pkg/rpc/instance" |
| 11 | + "github.com/docker/infrakit/pkg/spi/instance" |
| 12 | + "github.com/docker/infrakit/pkg/types" |
| 13 | + "github.com/spf13/cobra" |
| 14 | +) |
| 15 | + |
| 16 | +func maxlifeCommand(plugins func() discovery.Plugins) *cobra.Command { |
| 17 | + |
| 18 | + cmd := &cobra.Command{ |
| 19 | + Use: "maxlife <instance plugin name>...", |
| 20 | + Short: "Sets max life on the given instances", |
| 21 | + } |
| 22 | + |
| 23 | + //name := cmd.Flags().String("name", "", "Name to use as name of this plugin") |
| 24 | + poll := cmd.Flags().DurationP("poll", "i", 10*time.Second, "Polling interval") |
| 25 | + maxlife := cmd.Flags().DurationP("maxlife", "m", 10*time.Minute, "Max lifetime of the resource") |
| 26 | + flagTags := cmd.Flags().StringSliceP("tag", "t", []string{}, "Tags to filter instance by") |
| 27 | + |
| 28 | + cmd.RunE = func(c *cobra.Command, args []string) error { |
| 29 | + |
| 30 | + if len(args) == 0 { |
| 31 | + cmd.Usage() |
| 32 | + os.Exit(-1) |
| 33 | + } |
| 34 | + |
| 35 | + tags := toTags(*flagTags) |
| 36 | + |
| 37 | + // Now we have a list of instance plugins to maxlife |
| 38 | + plugins, err := getInstancePlugins(plugins, args) |
| 39 | + if err != nil { |
| 40 | + return err |
| 41 | + } |
| 42 | + |
| 43 | + // For each we start a goroutine to poll and kill instances |
| 44 | + stops := []chan struct{}{} |
| 45 | + |
| 46 | + retry := false |
| 47 | + |
| 48 | + loop: |
| 49 | + for { |
| 50 | + for name, plugin := range plugins { |
| 51 | + |
| 52 | + stop := make(chan struct{}) |
| 53 | + stops = append(stops, stop) |
| 54 | + |
| 55 | + described, err := plugin.DescribeInstances(tags, false) |
| 56 | + if err != nil { |
| 57 | + log.Warn("cannot get initial count", "name", name, "err", err) |
| 58 | + retry = true |
| 59 | + } |
| 60 | + |
| 61 | + go ensureMaxlife(name, plugin, stop, *poll, *maxlife, tags, len(described)) |
| 62 | + } |
| 63 | + |
| 64 | + if !retry { |
| 65 | + break loop |
| 66 | + } |
| 67 | + |
| 68 | + // Wait a little bit before trying again -- use the same poll interval |
| 69 | + <-time.After(*poll) |
| 70 | + } |
| 71 | + |
| 72 | + return nil |
| 73 | + } |
| 74 | + |
| 75 | + return cmd |
| 76 | +} |
| 77 | + |
| 78 | +func age(instance instance.Description, now time.Time) (age time.Duration) { |
| 79 | + link := types.NewLinkFromMap(instance.Tags) |
| 80 | + if link.Valid() { |
| 81 | + age = now.Sub(link.Created()) |
| 82 | + } |
| 83 | + return |
| 84 | +} |
| 85 | + |
| 86 | +func maxAge(instances []instance.Description, now time.Time) instance.Description { |
| 87 | + // check to see if the tags of the instances have links. Links have a creation date and |
| 88 | + // we can use it to compute the age |
| 89 | + var max time.Duration |
| 90 | + var found = 0 |
| 91 | + for i, instance := range instances { |
| 92 | + age := age(instance, now) |
| 93 | + if age > max { |
| 94 | + max = age |
| 95 | + found = i |
| 96 | + } |
| 97 | + } |
| 98 | + return instances[found] |
| 99 | +} |
| 100 | + |
| 101 | +func ensureMaxlife(name string, plugin instance.Plugin, stop chan struct{}, poll, maxlife time.Duration, |
| 102 | + tags map[string]string, initialCount int) { |
| 103 | + |
| 104 | + // Count is used to track the steady state... we don't want to keep killing instances |
| 105 | + // if the counts are steadily decreasing. The idea here is that once we terminate a resource |
| 106 | + // another one will be resurrected so we will be back to steady state. |
| 107 | + // Of course it's possible that the size of the cluster actually is decreased. So we'd |
| 108 | + // wait for a few samples to get to steady state before we terminate another instance. |
| 109 | + // Currently we assume damping == 1 or 1 successive samples of delta >= 0 is sufficient to terminate |
| 110 | + // another instance. |
| 111 | + |
| 112 | + last := initialCount |
| 113 | + tick := time.Tick(poll) |
| 114 | +loop: |
| 115 | + for { |
| 116 | + |
| 117 | + select { |
| 118 | + |
| 119 | + case now := <-tick: |
| 120 | + |
| 121 | + described, err := plugin.DescribeInstances(tags, false) |
| 122 | + if err != nil { |
| 123 | + // Transient error? |
| 124 | + log.Warn("error describing instances", "name", name, "err", err) |
| 125 | + continue |
| 126 | + } |
| 127 | + |
| 128 | + // TODO -- we should compute the 2nd derivative wrt time to make sure we |
| 129 | + // are truly in a steady state... |
| 130 | + |
| 131 | + current := len(described) |
| 132 | + delta := current - last |
| 133 | + last = current |
| 134 | + |
| 135 | + if current < 2 { |
| 136 | + log.Info("there are less than 2 instances. No actions.", "name", name) |
| 137 | + continue |
| 138 | + } |
| 139 | + |
| 140 | + if delta < 0 { |
| 141 | + // Don't do anything if there are fewer instances at this iteration |
| 142 | + // than the last. We want to wait until steady state |
| 143 | + log.Info("fewer instances in this round. No actions taken", "name", name) |
| 144 | + continue |
| 145 | + } |
| 146 | + |
| 147 | + // Just pick a single oldest instance per polling cycle. This is so |
| 148 | + // that we don't end up destroying the cluster by taking down too many instances |
| 149 | + // all at once. |
| 150 | + oldest := maxAge(described, now) |
| 151 | + |
| 152 | + // check to make sure the age is over the maxlife |
| 153 | + if age(oldest, now) > maxlife { |
| 154 | + // terminate it and hope the group controller restores with a new intance |
| 155 | + err = plugin.Destroy(oldest.ID) |
| 156 | + if err != nil { |
| 157 | + log.Warn("cannot destroy instance", "name", name, "id", oldest.ID, "err", err) |
| 158 | + continue |
| 159 | + } |
| 160 | + } |
| 161 | + |
| 162 | + case <-stop: |
| 163 | + log.Info("stop requested", "name", name) |
| 164 | + break loop |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + log.Info("maxlife stopped", "name", name) |
| 169 | + return |
| 170 | +} |
| 171 | + |
| 172 | +func toTags(slice []string) map[string]string { |
| 173 | + tags := map[string]string{} |
| 174 | + |
| 175 | + for _, tag := range slice { |
| 176 | + kv := strings.SplitN(tag, "=", 2) |
| 177 | + if len(kv) != 2 { |
| 178 | + log.Warn("bad format tag", "input", tag) |
| 179 | + continue |
| 180 | + } |
| 181 | + key := strings.TrimSpace(kv[0]) |
| 182 | + val := strings.TrimSpace(kv[1]) |
| 183 | + if key != "" && val != "" { |
| 184 | + tags[key] = val |
| 185 | + } |
| 186 | + } |
| 187 | + return tags |
| 188 | +} |
| 189 | + |
| 190 | +func getInstancePlugins(plugins func() discovery.Plugins, names []string) (map[string]instance.Plugin, error) { |
| 191 | + targets := map[string]instance.Plugin{} |
| 192 | + for _, target := range names { |
| 193 | + endpoint, err := plugins().Find(plugin.Name(target)) |
| 194 | + if err != nil { |
| 195 | + return nil, err |
| 196 | + } |
| 197 | + if p, err := instance_rpc.NewClient(plugin.Name(target), endpoint.Address); err == nil { |
| 198 | + targets[target] = p |
| 199 | + } else { |
| 200 | + return nil, err |
| 201 | + } |
| 202 | + } |
| 203 | + return targets, nil |
| 204 | +} |
0 commit comments