Skip to content

Commit f3d3ab8

Browse files
pdimitraAndrey Slotin
and
Andrey Slotin
authored
Gcr in process collector (#339)
* initial gcr collector flow * adding the specific google cloud run metrics * small cleanup * adding tests for gcr * add headers to the request * add debug logs for testing * add debug logs for testing * fixing parsing gcr response * update version * changes in regards to PR comments and cleanup * use request mocking in tests * add requests mock in python27 requirements * fixing typo * Update instana/agent/google_cloud_run.py Co-authored-by: Andrey Slotin <[email protected]> * Update instana/agent/google_cloud_run.py Co-authored-by: Andrey Slotin <[email protected]> * Update instana/collector/helpers/google_cloud_run/process.py Co-authored-by: Andrey Slotin <[email protected]> * Update instana/collector/helpers/google_cloud_run/process.py Co-authored-by: Andrey Slotin <[email protected]> * PR review fixes * fix conflict Co-authored-by: Andrey Slotin <[email protected]>
1 parent b7b6882 commit f3d3ab8

26 files changed

+665
-57
lines changed

instana/agent/aws_fargate.py

-11
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,12 @@
1414
from ..version import VERSION
1515

1616

17-
class AWSFargateFrom(object):
18-
""" The source identifier for AWSFargateAgent """
19-
hl = True
20-
cp = "aws"
21-
e = "taskDefinition"
22-
23-
def __init__(self, **kwds):
24-
self.__dict__.update(kwds)
25-
26-
2717
class AWSFargateAgent(BaseAgent):
2818
""" In-process agent for AWS Fargate """
2919
def __init__(self):
3020
super(AWSFargateAgent, self).__init__()
3121

3222
self.options = AWSFargateOptions()
33-
self.from_ = AWSFargateFrom()
3423
self.collector = None
3524
self.report_headers = None
3625
self._can_send = False

instana/agent/aws_lambda.py

-11
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,11 @@
1414
from ..options import AWSLambdaOptions
1515

1616

17-
class AWSLambdaFrom(object):
18-
""" The source identifier for AWSLambdaAgent """
19-
hl = True
20-
cp = "aws"
21-
e = "qualifiedARN"
22-
23-
def __init__(self, **kwds):
24-
self.__dict__.update(kwds)
25-
26-
2717
class AWSLambdaAgent(BaseAgent):
2818
""" In-process agent for AWS Lambda """
2919
def __init__(self):
3020
super(AWSLambdaAgent, self).__init__()
3121

32-
self.from_ = AWSLambdaFrom()
3322
self.collector = None
3423
self.options = AWSLambdaOptions()
3524
self.report_headers = None

instana/agent/google_cloud_run.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# (c) Copyright IBM Corp. 2021
2+
# (c) Copyright Instana Inc. 2021
3+
4+
"""
5+
The Instana agent (for GCR) that manages
6+
monitoring state and reporting that data.
7+
"""
8+
import time
9+
from instana.options import GCROptions
10+
from instana.collector.google_cloud_run import GCRCollector
11+
from instana.log import logger
12+
from instana.util import to_json
13+
from instana.agent.base import BaseAgent
14+
from instana.version import VERSION
15+
16+
17+
class GCRAgent(BaseAgent):
18+
""" In-process agent for Google Cloud Run """
19+
20+
def __init__(self, service, configuration, revision):
21+
super(GCRAgent, self).__init__()
22+
23+
self.options = GCROptions()
24+
self.collector = None
25+
self.report_headers = None
26+
self._can_send = False
27+
28+
# Update log level (if INSTANA_LOG_LEVEL was set)
29+
self.update_log_level()
30+
31+
logger.info("Stan is on the AWS Fargate scene. Starting Instana instrumentation version: %s", VERSION)
32+
33+
if self._validate_options():
34+
self._can_send = True
35+
self.collector = GCRCollector(self, service, configuration, revision)
36+
self.collector.start()
37+
else:
38+
logger.warning("Required INSTANA_AGENT_KEY and/or INSTANA_ENDPOINT_URL environment variables not set. "
39+
"We will not be able monitor this GCR cluster.")
40+
41+
def can_send(self):
42+
"""
43+
Are we in a state where we can send data?
44+
@return: Boolean
45+
"""
46+
return self._can_send
47+
48+
def get_from_structure(self):
49+
"""
50+
Retrieves the From data that is reported alongside monitoring data.
51+
@return: dict()
52+
"""
53+
return {'hl': True, 'cp': 'gcp', 'e': self.collector.get_instance_id()}
54+
55+
def report_data_payload(self, payload):
56+
"""
57+
Used to report metrics and span data to the endpoint URL in self.options.endpoint_url
58+
"""
59+
response = None
60+
try:
61+
if self.report_headers is None:
62+
# Prepare request headers
63+
self.report_headers = {
64+
"Content-Type": "application/json",
65+
"X-Instana-Host": "gcp:cloud-run:revision:{revision}".format(
66+
revision=self.collector.revision),
67+
"X-Instana-Key": self.options.agent_key
68+
}
69+
70+
self.report_headers["X-Instana-Time"] = str(round(time.time() * 1000))
71+
72+
response = self.client.post(self.__data_bundle_url(),
73+
data=to_json(payload),
74+
headers=self.report_headers,
75+
timeout=self.options.timeout,
76+
verify=self.options.ssl_verify,
77+
proxies=self.options.endpoint_proxy)
78+
79+
if response.status_code >= 400:
80+
logger.info("report_data_payload: Instana responded with status code %s", response.status_code)
81+
except Exception as exc:
82+
logger.debug("report_data_payload: connection error (%s)", type(exc))
83+
return response
84+
85+
def _validate_options(self):
86+
"""
87+
Validate that the options used by this Agent are valid. e.g. can we report data?
88+
"""
89+
return self.options.endpoint_url is not None and self.options.agent_key is not None
90+
91+
def __data_bundle_url(self):
92+
"""
93+
URL for posting metrics to the host agent. Only valid when announced.
94+
"""
95+
return "{endpoint_url}/bundle".format(endpoint_url=self.options.endpoint_url)

instana/collector/aws_fargate.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from ..util import DictionaryOfStan, validate_url
1515
from ..singletons import env_is_test
1616

17-
from .helpers.process import ProcessHelper
17+
from .helpers.fargate.process import FargateProcessHelper
1818
from .helpers.runtime import RuntimeHelper
1919
from .helpers.fargate.task import TaskHelper
2020
from .helpers.fargate.docker import DockerHelper
@@ -23,6 +23,7 @@
2323

2424
class AWSFargateCollector(BaseCollector):
2525
""" Collector for AWS Fargate """
26+
2627
def __init__(self, agent):
2728
super(AWSFargateCollector, self).__init__(agent)
2829
logger.debug("Loading AWS Fargate Collector")
@@ -77,7 +78,7 @@ def __init__(self, agent):
7778
# Populate the collection helpers
7879
self.helpers.append(TaskHelper(self))
7980
self.helpers.append(DockerHelper(self))
80-
self.helpers.append(ProcessHelper(self))
81+
self.helpers.append(FargateProcessHelper(self))
8182
self.helpers.append(RuntimeHelper(self))
8283
self.helpers.append(ContainerHelper(self))
8384

@@ -98,7 +99,8 @@ def get_ecs_metadata(self):
9899
return
99100

100101
try:
101-
delta = int(time()) - self.last_ecmu_full_fetch
102+
self.fetching_start_time = int(time())
103+
delta = self.fetching_start_time - self.last_ecmu_full_fetch
102104
if delta > self.ecmu_full_fetch_interval:
103105
# Refetch the ECMU snapshot data
104106
self.last_ecmu_full_fetch = int(time())
@@ -126,10 +128,7 @@ def get_ecs_metadata(self):
126128
logger.debug("AWSFargateCollector.get_ecs_metadata", exc_info=True)
127129

128130
def should_send_snapshot_data(self):
129-
delta = int(time()) - self.snapshot_data_last_sent
130-
if delta > self.snapshot_data_interval:
131-
return True
132-
return False
131+
return int(time()) - self.snapshot_data_last_sent > self.snapshot_data_interval
133132

134133
def prepare_payload(self):
135134
payload = DictionaryOfStan()
@@ -147,7 +146,7 @@ def prepare_payload(self):
147146

148147
plugins = []
149148
for helper in self.helpers:
150-
plugins.extend(helper.collect_metrics(with_snapshot))
149+
plugins.extend(helper.collect_metrics(with_snapshot=with_snapshot))
151150

152151
payload["metrics"]["plugins"] = plugins
153152

@@ -162,6 +161,7 @@ def get_fq_arn(self):
162161
if self._fq_arn is not None:
163162
return self._fq_arn
164163

164+
task_arn = ""
165165
if self.root_metadata is not None:
166166
labels = self.root_metadata.get("Labels", None)
167167
if labels is not None:

instana/collector/base.py

+3
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ def __init__(self, agent):
7070
# Flag to indicate if start/shutdown state
7171
self.started = False
7272

73+
# Startime of fetching metadata
74+
self.fetching_start_time = 0
75+
7376
def is_reporting_thread_running(self):
7477
"""
7578
Indicates if there is a thread running with the name self.THREAD_NAME

instana/collector/google_cloud_run.py

+140
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# (c) Copyright IBM Corp. 2021
2+
# (c) Copyright Instana Inc. 2021
3+
4+
"""
5+
Google Cloud Run Collector: Manages the periodic collection of metrics & snapshot data
6+
"""
7+
import os
8+
from time import time
9+
import requests
10+
11+
from instana.log import logger
12+
from instana.collector.base import BaseCollector
13+
from instana.util import DictionaryOfStan, validate_url
14+
from instana.collector.helpers.google_cloud_run.process import GCRProcessHelper
15+
from instana.collector.helpers.google_cloud_run.instance_entity import InstanceEntityHelper
16+
17+
18+
class GCRCollector(BaseCollector):
19+
""" Collector for Google Cloud Run """
20+
21+
def __init__(self, agent, service, configuration, revision):
22+
super(GCRCollector, self).__init__(agent)
23+
logger.debug("Loading Google Cloud Run Collector")
24+
25+
# Indicates if this Collector has all requirements to run successfully
26+
self.ready_to_start = True
27+
28+
self.revision = revision
29+
self.service = service
30+
self.configuration = configuration
31+
# Prepare the URLS that we will collect data from
32+
self._gcr_md_uri = os.environ.get("GOOGLE_CLOUD_RUN_METADATA_ENDPOINT", "http://metadata.google.internal")
33+
34+
if self._gcr_md_uri == "" or validate_url(self._gcr_md_uri) is False:
35+
logger.warning("GCRCollector: GOOGLE_CLOUD_RUN_METADATA_ENDPOINT not in environment or invalid URL. "
36+
"Instana will not be able to monitor this environment")
37+
self.ready_to_start = False
38+
39+
self._gcr_md_project_uri = self._gcr_md_uri + '/computeMetadata/v1/project/?recursive=true'
40+
self._gcr_md_instance_uri = self._gcr_md_uri + '/computeMetadata/v1/instance/?recursive=true'
41+
42+
# Timestamp in seconds of the last time we fetched all GCR metadata
43+
self.__last_gcr_md_full_fetch = 0
44+
45+
# How often to do a full fetch of GCR metadata
46+
self.__gcr_md_full_fetch_interval = 300
47+
48+
# HTTP client with keep-alive
49+
self._http_client = requests.Session()
50+
51+
# The fully qualified ARN for this process
52+
self._gcp_arn = None
53+
54+
# Response from the last call to
55+
# Instance URI
56+
self.instance_metadata = None
57+
58+
# Response from the last call to
59+
# Project URI
60+
self.project_metadata = None
61+
62+
# Populate the collection helpers
63+
self.helpers.append(GCRProcessHelper(self))
64+
self.helpers.append(InstanceEntityHelper(self))
65+
66+
def start(self):
67+
if self.ready_to_start is False:
68+
logger.warning("Google Cloud Run Collector is missing requirements and cannot monitor this environment.")
69+
return
70+
71+
super(GCRCollector, self).start()
72+
73+
def __get_project_instance_metadata(self):
74+
"""
75+
Get the latest data from the service revision instance entity metadata and store in the class
76+
@return: Boolean
77+
"""
78+
try:
79+
# Refetch the GCR snapshot data
80+
self.__last_gcr_md_full_fetch = int(time())
81+
headers = {"Metadata-Flavor": "Google"}
82+
# Response from the last call to
83+
# ${GOOGLE_CLOUD_RUN_METADATA_ENDPOINT}/computeMetadata/v1/project/?recursive=true
84+
self.project_metadata = self._http_client.get(self._gcr_md_project_uri, timeout=1,
85+
headers=headers).json()
86+
87+
# Response from the last call to
88+
# ${GOOGLE_CLOUD_RUN_METADATA_ENDPOINT}/computeMetadata/v1/instance/?recursive=true
89+
self.instance_metadata = self._http_client.get(self._gcr_md_instance_uri, timeout=1,
90+
headers=headers).json()
91+
except Exception:
92+
logger.debug("GoogleCloudRunCollector.get_project_instance_metadata", exc_info=True)
93+
94+
def should_send_snapshot_data(self):
95+
return int(time()) - self.snapshot_data_last_sent > self.snapshot_data_interval
96+
97+
def prepare_payload(self):
98+
payload = DictionaryOfStan()
99+
payload["spans"] = []
100+
payload["metrics"]["plugins"] = []
101+
102+
try:
103+
104+
if not self.span_queue.empty():
105+
payload["spans"] = self.queued_spans()
106+
107+
self.fetching_start_time = int(time())
108+
delta = self.fetching_start_time - self.__last_gcr_md_full_fetch
109+
if delta < self.__gcr_md_full_fetch_interval:
110+
return payload
111+
112+
with_snapshot = self.should_send_snapshot_data()
113+
114+
# Fetch the latest metrics
115+
self.__get_project_instance_metadata()
116+
if self.instance_metadata is None and self.project_metadata is None:
117+
return payload
118+
119+
plugins = []
120+
for helper in self.helpers:
121+
plugins.extend(
122+
helper.collect_metrics(with_snapshot=with_snapshot, instance_metadata=self.instance_metadata,
123+
project_metadata=self.project_metadata))
124+
125+
payload["metrics"]["plugins"] = plugins
126+
127+
if with_snapshot:
128+
self.snapshot_data_last_sent = int(time())
129+
except Exception:
130+
logger.debug("collect_snapshot error", exc_info=True)
131+
132+
return payload
133+
134+
def get_instance_id(self):
135+
try:
136+
if self.instance_metadata:
137+
return self.instance_metadata.get("id")
138+
except Exception:
139+
logger.debug("get_instance_id error", exc_info=True)
140+
return None

instana/collector/helpers/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,5 @@ def apply_delta(self, source, previous, new, metric, with_snapshot):
7474
if previous_value != new_value or with_snapshot is True:
7575
previous[dst_metric] = new[dst_metric] = new_value
7676

77-
def collect_metrics(self, with_snapshot=False):
77+
def collect_metrics(self, **kwargs):
7878
logger.debug("BaseHelper.collect_metrics must be overridden")

instana/collector/helpers/fargate/container.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
class ContainerHelper(BaseHelper):
1111
""" This class acts as a helper to collect container snapshot and metric information """
12-
def collect_metrics(self, with_snapshot=False):
12+
def collect_metrics(self, **kwargs):
1313
"""
1414
Collect and return metrics (and optionally snapshot data) for every container in this task
1515
@return: list - with one or more plugin entities
@@ -34,7 +34,7 @@ def collect_metrics(self, with_snapshot=False):
3434
plugin_data["data"]["dockerId"] = container.get("DockerId", None)
3535
plugin_data["data"]["taskArn"] = labels.get("com.amazonaws.ecs.task-arn", None)
3636

37-
if with_snapshot is True:
37+
if kwargs.get("with_snapshot"):
3838
plugin_data["data"]["runtime"] = "python"
3939
plugin_data["data"]["dockerName"] = container.get("DockerName", None)
4040
plugin_data["data"]["containerName"] = container.get("Name", None)

0 commit comments

Comments
 (0)