|
| 1 | +#!/bin/env python3 |
| 2 | +# encoding: utf-8 |
| 3 | + |
| 4 | +# Copyright 2022 Elliot Jordan |
| 5 | +# |
| 6 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +# you may not use this file except in compliance with the License. |
| 8 | +# You may obtain a copy of the License at |
| 9 | +# |
| 10 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +# |
| 12 | +# Unless required by applicable law or agreed to in writing, software |
| 13 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +# See the License for the specific language governing permissions and |
| 16 | +# limitations under the License. |
| 17 | + |
| 18 | +"""build_index.py |
| 19 | +
|
| 20 | +Clones all active repos in the AutoPkg organization, then builds an index |
| 21 | +based on the recipes' metadata. |
| 22 | +""" |
| 23 | + |
| 24 | + |
| 25 | +import json |
| 26 | +import os |
| 27 | +import plistlib |
| 28 | +import re |
| 29 | +import subprocess |
| 30 | +from glob import glob |
| 31 | +from xml.parsers.expat import ExpatError |
| 32 | + |
| 33 | +import requests |
| 34 | +import yaml |
| 35 | + |
| 36 | + |
| 37 | +def get_all_repos(): |
| 38 | + """Get API data on all repos in AutoPkg org.""" |
| 39 | + repos = [] |
| 40 | + url = "https://api.github.com/orgs/autopkg/repos" |
| 41 | + headers = { |
| 42 | + "user-agent": "autopkg-search-index/0.0.1", |
| 43 | + "accept": "application/vnd.github.v3+json", |
| 44 | + "authorization": f"token {os.environ['PA_TOKEN']}", |
| 45 | + } |
| 46 | + |
| 47 | + # Loop through paginated results until there are no more pages |
| 48 | + page = 1 |
| 49 | + while True: |
| 50 | + params = {"per_page": "100", "page": page} |
| 51 | + response = requests.get(url, params=params, headers=headers).json() |
| 52 | + if not response: |
| 53 | + break |
| 54 | + repos.extend(response) |
| 55 | + page += 1 |
| 56 | + |
| 57 | + # Filter out repos that are archived, private, or otherwise skippable |
| 58 | + excl_reasons = ("private", "fork", "archived", "disabled", "is_template") |
| 59 | + repos = [x for x in repos if not any([x.get(r) for r in excl_reasons])] |
| 60 | + excl_names = ("autopkg/autopkg", "autopkg/recipe-index") |
| 61 | + repos = [x for x in repos if x["full_name"] not in excl_names] |
| 62 | + |
| 63 | + return repos |
| 64 | + |
| 65 | + |
| 66 | +def clone_all_repos(repos): |
| 67 | + """Clone repos that are not private, archived, or otherwise skippable""" |
| 68 | + for repo in repos: |
| 69 | + if os.path.isdir(f"repos/{repo['full_name']}"): |
| 70 | + continue |
| 71 | + clone_cmd = [ |
| 72 | + "git", |
| 73 | + "clone", |
| 74 | + "--depth=1", |
| 75 | + repo["clone_url"], |
| 76 | + f"repos/{repo['full_name']}", |
| 77 | + ] |
| 78 | + subprocess.run(clone_cmd, check=True) |
| 79 | + |
| 80 | + |
| 81 | +def resolve_var(recipe_dict, var_name): |
| 82 | + """Given a variable name wrapped in percents, resolve to the actual variable value.""" |
| 83 | + |
| 84 | + var_name = var_name.strip("%") |
| 85 | + return recipe_dict.get("Input", {}).get(var_name) |
| 86 | + |
| 87 | + |
| 88 | +def build_search_index(repos): |
| 89 | + """Given a list of repo info from the GitHub API, build recipe search index.""" |
| 90 | + index = { |
| 91 | + "identifiers": {}, |
| 92 | + "shortnames": {}, |
| 93 | + } |
| 94 | + children = [] |
| 95 | + for repo in repos: |
| 96 | + # Find recipe files up to 2 levels deep |
| 97 | + recipes = glob(f"repos/{repo['full_name']}/*/*.recipe") |
| 98 | + recipes += glob(f"repos/{repo['full_name']}/*/*/*.recipe") |
| 99 | + recipes += glob(f"repos/{repo['full_name']}/*/*.recipe.plist") |
| 100 | + recipes += glob(f"repos/{repo['full_name']}/*/*/*.recipe.plist") |
| 101 | + recipes += glob(f"repos/{repo['full_name']}/*/*.recipe.yaml") |
| 102 | + recipes += glob(f"repos/{repo['full_name']}/*/*/*.recipe.yaml") |
| 103 | + |
| 104 | + # Get indexable data from recipe files |
| 105 | + for recipe in recipes: |
| 106 | + index_entry = {} |
| 107 | + if recipe.endswith(".yaml"): |
| 108 | + try: |
| 109 | + with open(recipe, "rb") as openfile: |
| 110 | + recipe_dict = yaml.safe_load(openfile) |
| 111 | + except yaml.scanner.ScannerError: |
| 112 | + print(f"WARNING: Unable to parse {recipe} as yaml") |
| 113 | + else: |
| 114 | + try: |
| 115 | + with open(recipe, "rb") as openfile: |
| 116 | + recipe_dict = plistlib.load(openfile) |
| 117 | + except ExpatError: |
| 118 | + print(f"WARNING: Unable to parse {recipe} as yaml") |
| 119 | + |
| 120 | + # Generally applicable metadata |
| 121 | + input_dict = recipe_dict.get("Input", {}) |
| 122 | + index_entry["name"] = input_dict.get("NAME") |
| 123 | + index_entry["description"] = recipe_dict.get("Description") |
| 124 | + index_entry["repo"] = repo["full_name"] |
| 125 | + index_entry["path"] = os.path.relpath(recipe, f"repos/{repo['full_name']}") |
| 126 | + if recipe_dict.get("ParentRecipe"): |
| 127 | + index_entry["parent"] = recipe_dict["ParentRecipe"] |
| 128 | + children.append( |
| 129 | + (recipe_dict["Identifier"], recipe_dict["ParentRecipe"]) |
| 130 | + ) |
| 131 | + if any( |
| 132 | + [ |
| 133 | + x.get("Processor") == "DeprecationWarning" |
| 134 | + for x in recipe_dict.get("Process", [{}]) |
| 135 | + ] |
| 136 | + ): |
| 137 | + index_entry["deprecated"] = True |
| 138 | + |
| 139 | + # Get inferred type of recipe |
| 140 | + type_pattern = r"\/([\w\- ]+\.([\w\- ]+))\.recipe(\.yaml|\.plist)?$" |
| 141 | + match = re.search(type_pattern, index_entry["path"]) |
| 142 | + if match: |
| 143 | + index_entry["shortname"] = match.group(1) |
| 144 | + index_entry["inferred_type"] = match.group(2) |
| 145 | + |
| 146 | + # Munki-specific metadata |
| 147 | + if index_entry.get("inferred_type") == "munki": |
| 148 | + pkginfo = input_dict.get("pkginfo", {}) |
| 149 | + index_entry["munki_display_name"] = pkginfo.get("display_name") |
| 150 | + index_entry["munki_description"] = pkginfo.get("description") |
| 151 | + |
| 152 | + # Jamf-specific metadata |
| 153 | + if index_entry.get("inferred_type") in ("jss", "jamf"): |
| 154 | + index_entry["jamf_display_name"] = input_dict.get( |
| 155 | + "SELF_SERVICE_DISPLAY_NAME" |
| 156 | + ) |
| 157 | + index_entry["jamf_description"] = input_dict.get( |
| 158 | + "SELF_SERVICE_DESCRIPTION" |
| 159 | + ) |
| 160 | + |
| 161 | + # Resolve any substitution variables in the index entry |
| 162 | + for k, v in index_entry.items(): |
| 163 | + if isinstance(v, str) and v.startswith("%") and v.endswith("%"): |
| 164 | + index_entry[k] = resolve_var(recipe_dict, v) |
| 165 | + |
| 166 | + # Save entry to identifier index |
| 167 | + index["identifiers"][recipe_dict.get("Identifier")] = index_entry |
| 168 | + |
| 169 | + # Save entry to shortnames index |
| 170 | + if index_entry.get("shortname"): |
| 171 | + if index_entry["shortname"] in index["shortnames"]: |
| 172 | + index["shortnames"][index_entry["shortname"]].append( |
| 173 | + recipe_dict.get("Identifier") |
| 174 | + ) |
| 175 | + else: |
| 176 | + index["shortnames"][index_entry["shortname"]] = [ |
| 177 | + recipe_dict.get("Identifier") |
| 178 | + ] |
| 179 | + |
| 180 | + # Add children list to parent recipes' index entries |
| 181 | + for child in children: |
| 182 | + if child[1] not in index["identifiers"]: |
| 183 | + print(f"WARNING: {child[0]} refers to missing parent recipe {child[1]}.") |
| 184 | + else: |
| 185 | + if "children" in index["identifiers"][child[1]]: |
| 186 | + index["identifiers"][child[1]]["children"].append(child[0]) |
| 187 | + else: |
| 188 | + index["identifiers"][child[1]]["children"] = [child[0]] |
| 189 | + |
| 190 | + # Write index file |
| 191 | + with open("index.json", "w", encoding="utf-8") as openfile: |
| 192 | + openfile.write(json.dumps(index, indent=2)) |
| 193 | + |
| 194 | + |
| 195 | +def main(): |
| 196 | + """Main process.""" |
| 197 | + |
| 198 | + # Set http.postBuffer to 1 GB |
| 199 | + gitconfig_cmd = ["git", "config", "--global", "http.postBuffer", "1024M"] |
| 200 | + subprocess.run(gitconfig_cmd, check=False) |
| 201 | + |
| 202 | + # Get repo info from GitHub API |
| 203 | + repos = get_all_repos() |
| 204 | + |
| 205 | + # Clone all repos |
| 206 | + clone_all_repos(repos) |
| 207 | + |
| 208 | + # Build and write search index |
| 209 | + build_search_index(repos) |
| 210 | + |
| 211 | + |
| 212 | +if __name__ == "__main__": |
| 213 | + main() |
0 commit comments