Add json output

Output for upload to S3 Athena now "single line" JSON objects.
mozilla · Oct 1, 2020 · 4284359 · 4284359
1 parent 39f38db
commit 4284359
Show file tree

Hide file tree

Showing 6 changed files with 124 additions and 53 deletions.
diff --git a/github/branches/conftest.py b/github/branches/conftest.py
@@ -53,14 +53,7 @@ def repos_to_check() -> List[str]:
         *in_files,
     ]
 
-    # python 3.6 doesn't support capture_output
     status = subprocess.run(cmd, capture_output=True)  # nosec
-    ## ##    # fmt: off
-    ## ##    status = subprocess.run(  # nosec
-    ## ##        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE  # nosec
-    ## ##    )
-    ## ##    # fmt:on
-    ## ##    # return as array of non-empty, unquoted, "lines"
     return [
         x.translate({ord('"'): None, ord("'"): None})
         for x in status.stdout.decode("utf-8").split("\n")

diff --git a/github/branches/retrieve_github_data.py b/github/branches/retrieve_github_data.py
@@ -6,13 +6,15 @@
 protection guideline compliance."""
 # TODO add doctests
 
-import csv
 from functools import lru_cache
+import csv
+from github import branches
 import logging
 import os
 from dataclasses import dataclass, field
+import json
 import sys
-from typing import Any, List
+from typing import Any, Generator, List
 
 from sgqlc.operation import Operation  # noqa: I900
 from sgqlc.endpoint.http import HTTPEndpoint  # noqa: I900
@@ -39,6 +41,8 @@
 class BranchName:
     name: str
     prefix: str
+    _type: str = "BranchName"
+    _revision: int = 1
 
     @classmethod
     def csv_header(cls) -> List[str]:
@@ -54,6 +58,12 @@ def csv_row(self) -> List[str]:
             self.prefix or None,
         ]
 
+    def flat_json(self) -> Generator:
+        yield self.as_dict()
+
+    def as_dict(self):
+        return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
+
 
 @dataclass
 class BranchProtectionRule:
@@ -64,6 +74,8 @@ class BranchProtectionRule:
     rule_conflict_count: int
     pattern: str
     matching_branches: List[BranchName] = field(default_factory=list)
+    _type: str = "BranchProtectionRule"
+    _revision: int = 1
 
     @classmethod
     def csv_header(cls) -> List[str]:
@@ -96,6 +108,19 @@ def csv_row(self) -> List[str]:
             result.append(my_info + BranchName.cvs_null())
         return result
 
+    def flat_json(self) -> Generator:
+        exportable_dict = self.as_dict()
+        del exportable_dict["matching_branches"]
+        for branch in self.matching_branches:
+            for match in branch.flat_json():
+                copy = exportable_dict.copy()
+                copy.update(match)
+                assert len(copy) == len(exportable_dict) + len(match)
+                yield copy
+
+    def as_dict(self):
+        return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
+
 
 @dataclass
 class RepoBranchProtections:
@@ -105,6 +130,8 @@ class RepoBranchProtections:
     repo_v4id: str
     repo_v3id: str
     protection_rules: List[BranchProtectionRule] = field(default_factory=list)
+    _type: str = "RepoBranchProtections"
+    _revision: int = 1
 
     @classmethod
     def csv_header(cls) -> List[str]:
@@ -130,6 +157,19 @@ def csv_row(self) -> List[str]:
             result.append(my_info + BranchProtectionRule.cvs_null())
         return result
 
+    def flat_json(self) -> Generator:
+        exportable_dict = self.as_dict()
+        del exportable_dict["protection_rules"]
+        for rule in self.protection_rules:
+            for d in rule.flat_json():
+                copy = exportable_dict.copy()
+                copy.update(d)
+                assert len(copy) == len(exportable_dict) + len(d)
+                yield copy
+
+    def as_dict(self):
+        return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
+
 
 def _add_protection_fields(node) -> None:
     """Build in fields we want to query.
@@ -345,6 +385,18 @@ def parse_args():
     ap.add_argument(
         "--headers", help="Add column headers to csv output", action="store_true"
     )
+    ap.add_argument(
+        "--no-csv",
+        help="Do not output to CSV (default True if called via cli).",
+        action="store_true",
+    )
+    ap.add_argument("--no-json", help="Do not output JSON.", action="store_true")
+    ap.add_argument(
+        "--json",
+        help="JSON output file name (default 'org.json')",
+        type=argparse.FileType("w"),
+        default=sys.stdout,
+    )
     ap.add_argument(
         "repo", nargs="+", help='Repository full name, such as "login/repo".'
     )
@@ -419,16 +471,23 @@ def main() -> int:
     if "pytest" in sys.modules:
         return
     args = parse_args()
-    if args.output:
-        csv_out = csv.writer(open(args.output, "w"))
-    else:
-        csv_out = csv.writer(sys.stdout)
     endpoint = get_connection(args.graphql_endpoint, args.token)
-    if args.headers:
+    if not args.no_csv:
+        if args.output:
+            csv_out = csv.writer(open(args.output, "w"))
+        else:
+            csv_out = csv.writer(sys.stdout)
         csv_out.writerow(RepoBranchProtections.csv_header())
-    for repo in args.repo:
-        row_data = get_repo_branch_protections(endpoint, repo)
-        csv_output(row_data, csv_writer=csv_out)
+        for repo in args.repo:
+            row_data = get_repo_branch_protections(endpoint, repo)
+            csv_output(row_data, csv_writer=csv_out)
+
+    with args.json as jf:
+        if not args.no_json:
+            for repo in args.repo:
+                repo_data = get_repo_branch_protections(endpoint, repo)
+                for bprs in repo_data.flat_json():
+                    jf.write(f"{json.dumps(bprs)}\n")
 
 
 if __name__ == "__main__":

diff --git a/github/orgs/__init__.py b/github/orgs/__init__.py
@@ -0,0 +1,3 @@
+from github import github_schema
+
+__all__ = [github_schema]
diff --git a/github/orgs/retrieve_github_data.py b/github/orgs/retrieve_github_data.py
@@ -6,19 +6,19 @@
 protection guideline compliance."""
 
 import csv
-from functools import lru_cache
 import logging
 import os
-from dataclasses import dataclass, field
+from dataclasses import dataclass
+import json
 from pathlib import Path
 import subprocess  # nosec
 import sys
 from typing import Any, List, Optional, Set
 
-from sgqlc.operation import Operation  # noqa: I900
-from sgqlc.endpoint.http import HTTPEndpoint  # noqa: I900
+from sgqlc.operation import Operation
+from sgqlc.endpoint.http import HTTPEndpoint
 
-from github import github_schema as schema  # noqa: I900
+from github import github_schema as schema
 
 DEFAULT_GRAPHQL_ENDPOINT = "https://api.github.com/graphql"
 
@@ -31,19 +31,21 @@ class OrgInfo:
     name: str
     login: str
     requires_two_factor_authentication: bool
-    id_: str
-    database_id: str
+    org_v4id: str
+    org_v3id: str
+    _type: str = "OrgInfo"
+    _revision: int = 1
 
     @staticmethod
     def idfn(val: Any) -> Optional[str]:
         """provide ID for pytest Parametrization."""
         if isinstance(val, (OrgInfo,)):
-            return f"{val.id_}-{val.login}"
+            return f"{val.org_v4id}-{val.login}"
         return None
 
     @classmethod
     def csv_header(cls) -> List[str]:
-        return ["Org Name", "Org Slug", "2FA Required", "v4id", "v3id"]
+        return ["Org Name", "Org Slug", "2FA Required", "org_v4id", "org_v3id"]
 
     @classmethod
     def cvs_null(cls) -> List[Optional[str]]:
@@ -54,10 +56,13 @@ def csv_row(self) -> List[Optional[str]]:
             self.name or None,
             self.login or None,
             str(self.requires_two_factor_authentication) or None,
-            self.id_ or None,
-            self.database_id or None,
+            self.org_v4id or None,
+            self.org_v3id or None,
         ]
 
+    def as_dict(self):
+        return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
+
 
 def create_operation(owner):
     """Create the default Query operation.
@@ -93,8 +98,8 @@ def get_org_info(endpoint: Any, org: str) -> OrgInfo:
             name="",
             login=org,
             requires_two_factor_authentication=False,
-            id_=None,
-            database_id=None,
+            org_v4id=None,
+            org_v3id=None,
         )
 
     orgdata = (op + d).organization
@@ -110,8 +115,8 @@ def extract_org_data(orgdata) -> OrgInfo:
         name=orgdata.name,
         login=orgdata.login,
         requires_two_factor_authentication=orgdata.requires_two_factor_authentication,
-        id_=orgdata.id,
-        database_id=orgdata.database_id,
+        org_v4id=orgdata.id,
+        org_v3id=orgdata.database_id,
     )
     return org_data
 
@@ -140,11 +145,19 @@ def parse_args():
     ap.add_argument(
         "--verbose", "-v", help="Increase verbosity", action="count", default=0
     )
-    # Default to no headers for common automation case of generating for
-    # AWS Athena
     ap.add_argument(
-        "--headers", help="Add column headers to csv output", action="store_true"
+        "--no-csv",
+        help="Do not output to CSV (default True if called via cli).",
+        action="store_true",
     )
+    ap.add_argument("--no-json", help="Do not output JSON.", action="store_true")
+    ap.add_argument(
+        "--json",
+        help="JSON output file name (default 'org.json')",
+        type=argparse.FileType("w"),
+        default=sys.stdout,
+    )
+
     ap.add_argument(
         "orgs", nargs="*", help='Organization slug name, such as "mozilla".'
     )
@@ -205,14 +218,8 @@ def _orgs_to_check() -> Set[str]:
                 """,
         *in_files,
     ]
-
     # python 3.6 doesn't support capture_output
     status = subprocess.run(cmd, capture_output=True)  # nosec
-    ## ##    # fmt: off
-    ## ##    status = subprocess.run(  # nosec
-    ## ##        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE  # nosec
-    ## ##    )
-    ## ##    # fmt:on
     assert not status.stderr.decode("utf-8")
     # return as array of non-empty, unquoted, "lines"
     return {
@@ -273,7 +280,7 @@ def get_connection(base_url: str, token: Optional[str]) -> Any:
     return endpoint
 
 
-def main() -> int:
+def main() -> None:
     # hack to support doctests
     if "pytest" in sys.modules:
         return
@@ -283,10 +290,14 @@ def main() -> int:
     else:
         csv_out = csv.writer(sys.stdout)
     endpoint = get_connection(args.graphql_endpoint, args.token)
-    if args.headers:
+    if not args.no_csv:
         csv_out.writerow(OrgInfo.csv_header())
-    for row in get_all_org_data(endpoint, args.orgs):
-        csv_output(row, csv_writer=csv_out)
+        for row in get_all_org_data(endpoint, args.orgs):
+            csv_output(row, csv_writer=csv_out)
+    if not args.no_json:
+        with args.json as jf:
+            for row in get_all_org_data(endpoint, args.orgs):
+                jf.write(f"{json.dumps(row.as_dict())}\n")
 
     ## csv_out.writerow(OrgInfo.csv_header())
     ## for org in args.orgs:

diff --git a/github/orgs/test_two_factor_required.py b/github/orgs/test_two_factor_required.py
@@ -4,7 +4,7 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-from typing import Any, List, Optional
+from typing import Any, List
 
 import pytest
 

diff --git a/github/vscode-debug-wrapper.py b/github/vscode-debug-wrapper.py
@@ -17,10 +17,15 @@
 from github.orgs import retrieve_github_data as org_retrieve_github_data
 from github.branches import retrieve_github_data as branch_retrieve_github_data
 
-# org will get metadata orgs if none supplied
-org_retrieve_github_data.main()
-
-# branch does not have default, so pass along current command line
-# N.B. since that will also happen in pytest's doctest mode, that
-#      special case is dealt with in the parse_args function
-branch_retrieve_github_data.main()
+sub_command = sys.argv[1]
+del sys.argv[1]
+if sub_command == "orgs":
+    # org will get metadata orgs if none supplied
+    org_retrieve_github_data.main()
+elif sub_command == "branches":
+    # branch does not have default, so pass along current command line
+    # N.B. since that will also happen in pytest's doctest mode, that
+    #      special case is dealt with in the parse_args function
+    branch_retrieve_github_data.main()
+else:
+    raise SystemError(f"Unknown sub command '{sub_command}'")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from github import github_schema

		__all__ = [github_schema]