|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +# this code follows the methodology given in https://doi.org/10.1002/advs.202100707 |
| 5 | + |
| 6 | + |
| 7 | +import os |
| 8 | +import subprocess |
| 9 | +import re |
| 10 | +import sys |
| 11 | +import socket |
| 12 | +import json |
| 13 | + |
| 14 | + |
| 15 | +def human_readable(size, decimal_places=1): |
| 16 | + for unit in ['gCO2e','kgCO2e','Ton CO2e']: |
| 17 | + if size < 1000.0: |
| 18 | + break |
| 19 | + size /= 1000.0 |
| 20 | + return f"{size:.{decimal_places}f} {unit}" |
| 21 | + |
| 22 | +def progress_bar(ratio, size=10): |
| 23 | + nb_bar = int(ratio*size+0.5) |
| 24 | + return nb_bar*"█" + (size-nb_bar)*"─" |
| 25 | + |
| 26 | + |
| 27 | +def duration2sec(duration): |
| 28 | + d = 0 |
| 29 | + h = 0 |
| 30 | + parse = re.match("([0-9]*)-([0-9]{2}):([0-9]{2}):([0-9]{2})", duration) |
| 31 | + if parse: |
| 32 | + d = parse.group(1) |
| 33 | + h = parse.group(2) |
| 34 | + m = parse.group(3) |
| 35 | + s = parse.group(4) |
| 36 | + else: |
| 37 | + parse = re.match("^([0-9]+):([0-9]{2}):([0-9]{2})$", duration) |
| 38 | + if parse: |
| 39 | + h = parse.group(1) |
| 40 | + m = parse.group(2) |
| 41 | + s = parse.group(3) |
| 42 | + else: |
| 43 | + parse = re.match("^([0-9]+):([0-9]{2})$", duration) |
| 44 | + m = parse.group(1) |
| 45 | + s = parse.group(2) |
| 46 | + return int(d)*3600*24 + int(h)*3600 + int(m)*60 + int(s) |
| 47 | + |
| 48 | + |
| 49 | + |
| 50 | +class Cluster(): |
| 51 | + def __init__(self, cluster_filename): |
| 52 | + with open(cluster_filename, "r") as cluster_file: |
| 53 | + self.cluster_info = json.load(cluster_file) |
| 54 | + |
| 55 | + if "efficiency_coefficient" in self.cluster_info: |
| 56 | + self.efficiency_coefficient = self.cluster_info["efficiency_coefficient"] |
| 57 | + else: |
| 58 | + self.efficiency_coefficient = 1.67 |
| 59 | + |
| 60 | + self.hostnames = self.cluster_info["hostnames"] |
| 61 | + self.carbon_intensity = float(self.cluster_info["carbon_intensity"]) |
| 62 | + |
| 63 | + def load_partitions(self): |
| 64 | + self.partition_dict = {} |
| 65 | + for partition in self.cluster_info["partitions"]: |
| 66 | + for partition_name in partition["partition_names"]: |
| 67 | + self.partition_dict[partition_name] = partition |
| 68 | + |
| 69 | + |
| 70 | +class Job(): |
| 71 | + def __init__(self, squeue_line, cluster): |
| 72 | + squeue_line = squeue_line.split(" ") |
| 73 | + self.jobID = squeue_line[0] |
| 74 | + self.status = squeue_line[1] |
| 75 | + self.is_running = (squeue_line[1] == "RUNNING") |
| 76 | + self.ncores = int(squeue_line[2]) |
| 77 | + self.nnode = int(squeue_line[3]) |
| 78 | + self.job_name = squeue_line[4] |
| 79 | + self.total_duration = duration2sec(squeue_line[5]) |
| 80 | + self.elapsed_time = duration2sec(squeue_line[6]) |
| 81 | + self.partition_name = squeue_line[7] |
| 82 | + self.reason = squeue_line[8] |
| 83 | + self.start_time = squeue_line[9] |
| 84 | + self.directory = squeue_line[-1].split("/")[-1] |
| 85 | + |
| 86 | + self.cluster = cluster |
| 87 | + |
| 88 | + if self.partition_name in self.cluster.partition_dict: |
| 89 | + self.partition = self.cluster.partition_dict[self.partition_name] |
| 90 | + else: |
| 91 | + self.partition = self.cluster.partition_dict[self.cluster.default_partition] |
| 92 | + |
| 93 | + def get_co2(self, time): |
| 94 | + t = time/60/60 #time in hours |
| 95 | + nc = self.ncores # number of cores |
| 96 | + Pc = self.partition["TDP_per_core"] # power draw of a computing core |
| 97 | + uc = 1 |
| 98 | + nm = self.partition["RAM_per_core"] # in GB |
| 99 | + Pm = 0.3725 #in W/GB |
| 100 | + PUE = self.cluster.efficiency_coefficient |
| 101 | + E = t*(nc*Pc*uc + nm*Pm)*PUE*0.001 |
| 102 | + |
| 103 | + return E*self.cluster.carbon_intensity |
| 104 | + |
| 105 | + |
| 106 | +hostname = socket.gethostname() |
| 107 | +for cluster_filename in glob.glob(os.oath.dirname(__file__) + "cluster_data/*.json"): |
| 108 | + cluster = Cluster(cluster_filename) |
| 109 | + if hostname in cluster.hostnames: |
| 110 | + cluster.load_partitions() |
| 111 | + break |
| 112 | + |
| 113 | + |
| 114 | +squeue_raw = subprocess.check_output(["squeue", "-S", "-T,u,i", "-h", "-u", os.getlogin(), "-o", "%A %T %C %D %j %l %M %P %R %S %Z"]).splitlines() |
| 115 | +job_list = [ Job(line.decode(sys.stdout.encoding), cluster) for line in squeue_raw ] |
| 116 | + |
| 117 | +#display |
| 118 | + |
| 119 | +column_names = ["Directory", "Job name", "Job-ID", "Cores", "Status", "co2", "total co2"] |
| 120 | + |
| 121 | +output = [column_names, ] |
| 122 | +for job in job_list: |
| 123 | + output_line = [] |
| 124 | + output_line.append(job.directory) |
| 125 | + output_line.append(job.job_name) |
| 126 | + output_line.append(str(job.jobID)) |
| 127 | + output_line.append(str(job.ncores)) |
| 128 | + if job.is_running: |
| 129 | + output_line.append(progress_bar(job.elapsed_time/job.total_duration)) |
| 130 | + output_line.append(human_readable(job.get_co2(job.elapsed_time))) |
| 131 | + else: |
| 132 | + output_line.append(job.status) |
| 133 | + output_line.append("-") |
| 134 | + output_line.append(human_readable(job.get_co2(job.total_duration))) |
| 135 | + |
| 136 | + output.append(output_line) |
| 137 | + |
| 138 | + |
| 139 | +is_running_list = [ job.is_running for job in job_list ] |
| 140 | + |
| 141 | +header = True |
| 142 | +widths = [max(map(len, col)) for col in zip(*output)] |
| 143 | + |
| 144 | +for i,row in enumerate(output): |
| 145 | + output_string = " ".join((val.ljust(width) for val, width in zip(row, widths))) |
| 146 | + if header: |
| 147 | + header = False |
| 148 | + print(output_string) |
| 149 | + print("─"*len(output_string)) |
| 150 | + else: |
| 151 | + if not is_running_list[i-1]: |
| 152 | + print('\033[2m', end="") |
| 153 | + print(output_string, end="") |
| 154 | + print('\033[0m') |
| 155 | + |
| 156 | +print() |
| 157 | + |
| 158 | + |
0 commit comments