Skip to content

Commit

Permalink
Add json output
Browse files Browse the repository at this point in the history
Output for upload to S3 Athena now "single line" JSON objects.
  • Loading branch information
Hal Wine committed Oct 1, 2020
1 parent 39f38db commit 4284359
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 53 deletions.
7 changes: 0 additions & 7 deletions github/branches/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,7 @@ def repos_to_check() -> List[str]:
*in_files,
]

# python 3.6 doesn't support capture_output
status = subprocess.run(cmd, capture_output=True) # nosec
## ## # fmt: off
## ## status = subprocess.run( # nosec
## ## cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE # nosec
## ## )
## ## # fmt:on
## ## # return as array of non-empty, unquoted, "lines"
return [
x.translate({ord('"'): None, ord("'"): None})
for x in status.stdout.decode("utf-8").split("\n")
Expand Down
79 changes: 69 additions & 10 deletions github/branches/retrieve_github_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
protection guideline compliance."""
# TODO add doctests

import csv
from functools import lru_cache
import csv
from github import branches
import logging
import os
from dataclasses import dataclass, field
import json
import sys
from typing import Any, List
from typing import Any, Generator, List

from sgqlc.operation import Operation # noqa: I900
from sgqlc.endpoint.http import HTTPEndpoint # noqa: I900
Expand All @@ -39,6 +41,8 @@
class BranchName:
name: str
prefix: str
_type: str = "BranchName"
_revision: int = 1

@classmethod
def csv_header(cls) -> List[str]:
Expand All @@ -54,6 +58,12 @@ def csv_row(self) -> List[str]:
self.prefix or None,
]

def flat_json(self) -> Generator:
yield self.as_dict()

def as_dict(self):
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}


@dataclass
class BranchProtectionRule:
Expand All @@ -64,6 +74,8 @@ class BranchProtectionRule:
rule_conflict_count: int
pattern: str
matching_branches: List[BranchName] = field(default_factory=list)
_type: str = "BranchProtectionRule"
_revision: int = 1

@classmethod
def csv_header(cls) -> List[str]:
Expand Down Expand Up @@ -96,6 +108,19 @@ def csv_row(self) -> List[str]:
result.append(my_info + BranchName.cvs_null())
return result

def flat_json(self) -> Generator:
exportable_dict = self.as_dict()
del exportable_dict["matching_branches"]
for branch in self.matching_branches:
for match in branch.flat_json():
copy = exportable_dict.copy()
copy.update(match)
assert len(copy) == len(exportable_dict) + len(match)
yield copy

def as_dict(self):
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}


@dataclass
class RepoBranchProtections:
Expand All @@ -105,6 +130,8 @@ class RepoBranchProtections:
repo_v4id: str
repo_v3id: str
protection_rules: List[BranchProtectionRule] = field(default_factory=list)
_type: str = "RepoBranchProtections"
_revision: int = 1

@classmethod
def csv_header(cls) -> List[str]:
Expand All @@ -130,6 +157,19 @@ def csv_row(self) -> List[str]:
result.append(my_info + BranchProtectionRule.cvs_null())
return result

def flat_json(self) -> Generator:
exportable_dict = self.as_dict()
del exportable_dict["protection_rules"]
for rule in self.protection_rules:
for d in rule.flat_json():
copy = exportable_dict.copy()
copy.update(d)
assert len(copy) == len(exportable_dict) + len(d)
yield copy

def as_dict(self):
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}


def _add_protection_fields(node) -> None:
"""Build in fields we want to query.
Expand Down Expand Up @@ -345,6 +385,18 @@ def parse_args():
ap.add_argument(
"--headers", help="Add column headers to csv output", action="store_true"
)
ap.add_argument(
"--no-csv",
help="Do not output to CSV (default True if called via cli).",
action="store_true",
)
ap.add_argument("--no-json", help="Do not output JSON.", action="store_true")
ap.add_argument(
"--json",
help="JSON output file name (default 'org.json')",
type=argparse.FileType("w"),
default=sys.stdout,
)
ap.add_argument(
"repo", nargs="+", help='Repository full name, such as "login/repo".'
)
Expand Down Expand Up @@ -419,16 +471,23 @@ def main() -> int:
if "pytest" in sys.modules:
return
args = parse_args()
if args.output:
csv_out = csv.writer(open(args.output, "w"))
else:
csv_out = csv.writer(sys.stdout)
endpoint = get_connection(args.graphql_endpoint, args.token)
if args.headers:
if not args.no_csv:
if args.output:
csv_out = csv.writer(open(args.output, "w"))
else:
csv_out = csv.writer(sys.stdout)
csv_out.writerow(RepoBranchProtections.csv_header())
for repo in args.repo:
row_data = get_repo_branch_protections(endpoint, repo)
csv_output(row_data, csv_writer=csv_out)
for repo in args.repo:
row_data = get_repo_branch_protections(endpoint, repo)
csv_output(row_data, csv_writer=csv_out)

with args.json as jf:
if not args.no_json:
for repo in args.repo:
repo_data = get_repo_branch_protections(endpoint, repo)
for bprs in repo_data.flat_json():
jf.write(f"{json.dumps(bprs)}\n")


if __name__ == "__main__":
Expand Down
3 changes: 3 additions & 0 deletions github/orgs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from github import github_schema

__all__ = [github_schema]
67 changes: 39 additions & 28 deletions github/orgs/retrieve_github_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@
protection guideline compliance."""

import csv
from functools import lru_cache
import logging
import os
from dataclasses import dataclass, field
from dataclasses import dataclass
import json
from pathlib import Path
import subprocess # nosec
import sys
from typing import Any, List, Optional, Set

from sgqlc.operation import Operation # noqa: I900
from sgqlc.endpoint.http import HTTPEndpoint # noqa: I900
from sgqlc.operation import Operation
from sgqlc.endpoint.http import HTTPEndpoint

from github import github_schema as schema # noqa: I900
from github import github_schema as schema

DEFAULT_GRAPHQL_ENDPOINT = "https://api.github.com/graphql"

Expand All @@ -31,19 +31,21 @@ class OrgInfo:
name: str
login: str
requires_two_factor_authentication: bool
id_: str
database_id: str
org_v4id: str
org_v3id: str
_type: str = "OrgInfo"
_revision: int = 1

@staticmethod
def idfn(val: Any) -> Optional[str]:
"""provide ID for pytest Parametrization."""
if isinstance(val, (OrgInfo,)):
return f"{val.id_}-{val.login}"
return f"{val.org_v4id}-{val.login}"
return None

@classmethod
def csv_header(cls) -> List[str]:
return ["Org Name", "Org Slug", "2FA Required", "v4id", "v3id"]
return ["Org Name", "Org Slug", "2FA Required", "org_v4id", "org_v3id"]

@classmethod
def cvs_null(cls) -> List[Optional[str]]:
Expand All @@ -54,10 +56,13 @@ def csv_row(self) -> List[Optional[str]]:
self.name or None,
self.login or None,
str(self.requires_two_factor_authentication) or None,
self.id_ or None,
self.database_id or None,
self.org_v4id or None,
self.org_v3id or None,
]

def as_dict(self):
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}


def create_operation(owner):
"""Create the default Query operation.
Expand Down Expand Up @@ -93,8 +98,8 @@ def get_org_info(endpoint: Any, org: str) -> OrgInfo:
name="",
login=org,
requires_two_factor_authentication=False,
id_=None,
database_id=None,
org_v4id=None,
org_v3id=None,
)

orgdata = (op + d).organization
Expand All @@ -110,8 +115,8 @@ def extract_org_data(orgdata) -> OrgInfo:
name=orgdata.name,
login=orgdata.login,
requires_two_factor_authentication=orgdata.requires_two_factor_authentication,
id_=orgdata.id,
database_id=orgdata.database_id,
org_v4id=orgdata.id,
org_v3id=orgdata.database_id,
)
return org_data

Expand Down Expand Up @@ -140,11 +145,19 @@ def parse_args():
ap.add_argument(
"--verbose", "-v", help="Increase verbosity", action="count", default=0
)
# Default to no headers for common automation case of generating for
# AWS Athena
ap.add_argument(
"--headers", help="Add column headers to csv output", action="store_true"
"--no-csv",
help="Do not output to CSV (default True if called via cli).",
action="store_true",
)
ap.add_argument("--no-json", help="Do not output JSON.", action="store_true")
ap.add_argument(
"--json",
help="JSON output file name (default 'org.json')",
type=argparse.FileType("w"),
default=sys.stdout,
)

ap.add_argument(
"orgs", nargs="*", help='Organization slug name, such as "mozilla".'
)
Expand Down Expand Up @@ -205,14 +218,8 @@ def _orgs_to_check() -> Set[str]:
""",
*in_files,
]

# python 3.6 doesn't support capture_output
status = subprocess.run(cmd, capture_output=True) # nosec
## ## # fmt: off
## ## status = subprocess.run( # nosec
## ## cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE # nosec
## ## )
## ## # fmt:on
assert not status.stderr.decode("utf-8")
# return as array of non-empty, unquoted, "lines"
return {
Expand Down Expand Up @@ -273,7 +280,7 @@ def get_connection(base_url: str, token: Optional[str]) -> Any:
return endpoint


def main() -> int:
def main() -> None:
# hack to support doctests
if "pytest" in sys.modules:
return
Expand All @@ -283,10 +290,14 @@ def main() -> int:
else:
csv_out = csv.writer(sys.stdout)
endpoint = get_connection(args.graphql_endpoint, args.token)
if args.headers:
if not args.no_csv:
csv_out.writerow(OrgInfo.csv_header())
for row in get_all_org_data(endpoint, args.orgs):
csv_output(row, csv_writer=csv_out)
for row in get_all_org_data(endpoint, args.orgs):
csv_output(row, csv_writer=csv_out)
if not args.no_json:
with args.json as jf:
for row in get_all_org_data(endpoint, args.orgs):
jf.write(f"{json.dumps(row.as_dict())}\n")

## csv_out.writerow(OrgInfo.csv_header())
## for org in args.orgs:
Expand Down
2 changes: 1 addition & 1 deletion github/orgs/test_two_factor_required.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

from typing import Any, List, Optional
from typing import Any, List

import pytest

Expand Down
19 changes: 12 additions & 7 deletions github/vscode-debug-wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@
from github.orgs import retrieve_github_data as org_retrieve_github_data
from github.branches import retrieve_github_data as branch_retrieve_github_data

# org will get metadata orgs if none supplied
org_retrieve_github_data.main()

# branch does not have default, so pass along current command line
# N.B. since that will also happen in pytest's doctest mode, that
# special case is dealt with in the parse_args function
branch_retrieve_github_data.main()
sub_command = sys.argv[1]
del sys.argv[1]
if sub_command == "orgs":
# org will get metadata orgs if none supplied
org_retrieve_github_data.main()
elif sub_command == "branches":
# branch does not have default, so pass along current command line
# N.B. since that will also happen in pytest's doctest mode, that
# special case is dealt with in the parse_args function
branch_retrieve_github_data.main()
else:
raise SystemError(f"Unknown sub command '{sub_command}'")

0 comments on commit 4284359

Please sign in to comment.