Changes in yarn api client for Enterprise gateway yarn checks. (#39)

IMAM9AIS · kevin-bates · commit 2a4ec010615a · 2019-07-25T09:24:33.000-07:00
* Changes in yarn api client to acess maximum container allocation and partition availability
diff --git a/yarn_api_client/hadoop_conf.py b/yarn_api_client/hadoop_conf.py
@@ -15,6 +15,9 @@ def _get_rm_ids(hadoop_conf_path):
         rm_ids = rm_ids.split(',')
     return rm_ids
 
+def _get_maximum_container_memory(hadoop_conf_path):
+    container_memory = int(parse(os.path.join(hadoop_conf_path,'yarn-site.xml'), 'yarn.nodemanager.resource.memory-mb'))
+    return container_memory
 
 def _get_resource_manager(hadoop_conf_path, rm_id=None):
     prop_name = 'yarn.resourcemanager.webapp.address'
diff --git a/yarn_api_client/resource_manager.py b/yarn_api_client/resource_manager.py
@@ -3,8 +3,9 @@
 from .base import BaseYarnAPI
 from .constants import YarnApplicationState, FinalApplicationStatus
 from .errors import IllegalArgumentError
-from .hadoop_conf import get_resource_manager_host_port, check_is_active_rm, CONF_DIR
-
+from .hadoop_conf import get_resource_manager_host_port,\
+    check_is_active_rm, _get_maximum_container_memory, CONF_DIR
+from collections import deque
 
 class ResourceManager(BaseYarnAPI):
     """
@@ -421,3 +422,74 @@ def cluster_change_application_priority(self, application_id, priority):
         path = '/ws/v1/cluster/apps/{appid}/priority'.format(appid=application_id)
 
         return self.request(path, 'PUT', data={"priority": priority})
+
+    def cluster_node_container_memory(self):
+        """
+        This endpoint allows clients to gather info on the maximum memory that
+        can be allocated per container in the cluster.
+        :returns: integer specifying the maximum memory that can be allocated in
+        a container in the cluster
+        """
+
+        maximum_container_memory = _get_maximum_container_memory(CONF_DIR)
+        return maximum_container_memory
+
+    def cluster_scheduler_queue(self, yarn_queue_name):
+        """
+        Given a queue name, this function tries to locate the given queue in the object
+        returned by scheduler endpoint.
+
+        The queue can be present inside a multilevel structure. This solution tries
+        to locate the queue using breadth-first-search algorithm.
+
+        :param str yarn_queue_name: case sensitive queue name
+        :return: queue Dictionary, None if not found
+        """
+        scheduler = self.cluster_scheduler().data
+        scheduler_info = scheduler['scheduler']['schedulerInfo']
+
+        bfs_deque = deque([scheduler_info])
+        while bfs_deque:
+            vertex = bfs_deque.popleft()
+            if vertex['queueName'] == yarn_queue_name:
+                return vertex
+            elif 'queues' in vertex:
+                for q in vertex['queues']['queue']:
+                    bfs_deque.append(q)
+
+        return None
+
+
+    def cluster_scheduler_queue_availability(self, candidate_partition, availability_threshold):
+        """
+        Checks whether the requested memory satisfies the available space of the queue
+        This solution takes into consideration the node label concept in cluster.
+        Following node labelling, the resources can be available in various partition.
+        Given the partition data it tells you if the used capacity of this partition is spilling
+        the threshold specified.
+
+        :param str candidate_parition: node label partition (case sensitive)
+        :param float availability_threshold: value can range between 0 - 100 .
+        :return: Boolean
+        """
+
+        if candidate_partition['absoluteUsedCapacity'] > availability_threshold:
+            return False
+        return True
+
+
+    def cluster_queue_partition(self, candidate_queue, cluster_node_label):
+        """
+        A queue can be divided into multiple partitions having different node labels.
+        Given the candidate queue and parition node label, this extracts the partition
+        we are interested in
+        :param dict candidate_queue: queue dictionary
+        :param str cluster_node_label: case sensitive node label name
+        :return: partition Dict, None if not Found.
+        """
+        for partition in candidate_queue['capacities']['queueCapacitiesByPartition']:
+            if partition['partitionName'] == cluster_node_label:
+                return partition
+        return None
+
+