diff --git a/clean_ttl_hook/main.py b/clean_ttl_hook/main.py index 2ea6b41..273590b 100644 --- a/clean_ttl_hook/main.py +++ b/clean_ttl_hook/main.py @@ -1,24 +1,27 @@ import argparse +import os from pathlib import Path import rdflib from rdflib import Graph +from rdfx import File -def clean_ttl(input_file_path: Path): - # get a list of all leading comments in the file +# removes unused namespace entries and re-serializes a graph with the prefixes in sorted order +def clean_ttl(input_file_path:Path): + #get a list of all leading comments in the file comments_list = [] comment_flag = False - with open(input_file_path, "r", encoding="utf-8", errors='ignore') as f: + with open(input_file_path, "r", encoding='utf-8', errors='ignore') as f: for index, line in enumerate(f): - if len(line.strip()) > 0 and line.strip()[0] == "#" and index == 0: - comments_list.append(line) + if len(line.strip()) > 0 and line.strip()[0] == '#' and index == 0: + comments_list.append(line.strip()[1:]) comment_flag = True - elif len(line.strip()) > 0 and line.strip()[0] == "#" and comment_flag: - comments_list.append(line) + elif len(line.strip()) > 0 and line.strip()[0] == '#' and comment_flag: + comments_list.append(line.strip()[1:]) - elif len(line.strip()) > 0 and line.strip()[0] != "#": + elif len(line.strip()) > 0 and line.strip()[0] != '#': comment_flag = False elif not comment_flag: @@ -45,15 +48,9 @@ def clean_ttl(input_file_path: Path): for s, p, o in g: f.add((s, p, o)) - f.serialize(destination=input_file_path, format="turtle") - - with open(input_file_path, "r", encoding="utf-8", errors='ignore') as f: - lines = f.readlines() - - comments_list.extend(lines) - with open(input_file_path, "w") as f: - comments_list = "".join(comments_list) - f.write(comments_list) + os.remove(input_file_path) + ps = File(directory=os.getcwd()) + ps.write(g=g, filename=input_file_path.stem, leading_comments=comments_list) def main(): diff --git a/poetry.lock b/poetry.lock index 59cc7fe..2579001 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,3 +1,123 @@ +[[package]] +name = "anyio" +version = "3.5.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +category = "main" +optional = false +python-versions = ">=3.6.2" + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["packaging", "sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"] +test = ["coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "pytest (>=6.0)", "pytest-mock (>=3.6.1)", "trustme", "contextlib2", "uvloop (<0.15)", "mock (>=4)", "uvloop (>=0.15)"] +trio = ["trio (>=0.16)"] + +[[package]] +name = "boto3" +version = "1.21.42" +description = "The AWS SDK for Python" +category = "main" +optional = false +python-versions = ">= 3.6" + +[package.dependencies] +botocore = ">=1.24.42,<1.25.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.5.0,<0.6.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.24.42" +description = "Low-level, data-driven core of boto 3." +category = "main" +optional = false +python-versions = ">= 3.6" + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.13.8)"] + +[[package]] +name = "certifi" +version = "2021.10.8" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "charset-normalizer" +version = "2.0.12" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.5.0" + +[package.extras] +unicode_backport = ["unicodedata2"] + +[[package]] +name = "h11" +version = "0.12.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "httpcore" +version = "0.13.7" +description = "A minimal low-level HTTP client." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +anyio = ">=3.0.0,<4.0.0" +h11 = ">=0.11,<0.13" +sniffio = ">=1.0.0,<2.0.0" + +[package.extras] +http2 = ["h2 (>=3,<5)"] + +[[package]] +name = "httpx" +version = "0.20.0" +description = "The next generation HTTP client." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +certifi = "*" +charset-normalizer = "*" +httpcore = ">=0.13.3,<0.14.0" +rfc3986 = {version = ">=1.3,<2", extras = ["idna2008"]} +sniffio = "*" + +[package.extras] +brotli = ["brotlicffi", "brotli"] +cli = ["click (>=8.0.0,<9.0.0)", "rich (>=10.0.0,<11.0.0)", "pygments (>=2.0.0,<3.0.0)"] +http2 = ["h2 (>=3,<5)"] + +[[package]] +name = "idna" +version = "3.3" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "isodate" version = "0.6.1" @@ -9,6 +129,14 @@ python-versions = "*" [package.dependencies] six = "*" +[[package]] +name = "jmespath" +version = "1.0.0" +description = "JSON Matching Expressions" +category = "main" +optional = false +python-versions = ">=3.7" + [[package]] name = "pyparsing" version = "3.0.7" @@ -20,6 +148,17 @@ python-versions = ">=3.6" [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + [[package]] name = "rdflib" version = "6.1.1" @@ -37,6 +176,69 @@ docs = ["sphinx (<5)", "sphinxcontrib-apidoc"] html = ["html5lib"] tests = ["berkeleydb", "html5lib", "networkx", "pytest", "pytest-cov", "pytest-subtests"] +[[package]] +name = "rdfx" +version = "0.4.4" +description = "Tools for converting, merging, persisting and reading RDF data in different formats." +category = "main" +optional = false +python-versions = ">=3.8,<4.0" + +[package.dependencies] +boto3 = ">=1.20.20,<2.0.0" +httpx = ">=0.20.0,<0.21.0" +rdflib = ">=6.0.2,<7.0.0" +requests = ">=2.26.0,<3.0.0" + +[package.extras] +app = ["streamlit (>=1.2.0,<2.0.0)", "python-dotenv (>=0.19.2,<0.20.0)"] + +[[package]] +name = "requests" +version = "2.27.1" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""} +idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""} +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] + +[[package]] +name = "rfc3986" +version = "1.5.0" +description = "Validating URI References per RFC 3986" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +idna = {version = "*", optional = true, markers = "extra == \"idna2008\""} + +[package.extras] +idna2008 = ["idna"] + +[[package]] +name = "s3transfer" +version = "0.5.2" +description = "An Amazon S3 Transfer Manager" +category = "main" +optional = false +python-versions = ">= 3.6" + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + [[package]] name = "six" version = "1.16.0" @@ -45,25 +247,114 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +[[package]] +name = "sniffio" +version = "1.2.0" +description = "Sniff out which async library your code is running under" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "urllib3" +version = "1.26.9" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + [metadata] lock-version = "1.1" -python-versions = "^3.9" -content-hash = "ae2dc17562a97ca5c73a4830befe0eb6107f9d86f007eb64d2c1e80c0d4070ca" +python-versions = "^3.8" +content-hash = "ea715c6800ad4d9b22469f58474481c6361e357a9dfab38ac5413cbabc07f436" [metadata.files] +anyio = [ + {file = "anyio-3.5.0-py3-none-any.whl", hash = "sha256:b5fa16c5ff93fa1046f2eeb5bbff2dad4d3514d6cda61d02816dba34fa8c3c2e"}, + {file = "anyio-3.5.0.tar.gz", hash = "sha256:a0aeffe2fb1fdf374a8e4b471444f0f3ac4fb9f5a5b542b48824475e0042a5a6"}, +] +boto3 = [ + {file = "boto3-1.21.42-py3-none-any.whl", hash = "sha256:895fb88c69be78f82cfee58a79c97a3ad8d4a2a1209041a411d7d6b9fc5393e4"}, + {file = "boto3-1.21.42.tar.gz", hash = "sha256:bcb541175a7d190dd919a0af0e807ee6e9d26f135551e741b10d94343f2d7588"}, +] +botocore = [ + {file = "botocore-1.24.42-py3-none-any.whl", hash = "sha256:14aee41c8bf59d2dd2d89e8751fa37d3c95dcb92707d1966aa02697e914c1417"}, + {file = "botocore-1.24.42.tar.gz", hash = "sha256:a2baa9484bbaee96ef312c049b8e360badcab58329e487b57567644a571b5f4a"}, +] +certifi = [ + {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, + {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, +] +charset-normalizer = [ + {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"}, + {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"}, +] +h11 = [ + {file = "h11-0.12.0-py3-none-any.whl", hash = "sha256:36a3cb8c0a032f56e2da7084577878a035d3b61d104230d4bd49c0c6b555a9c6"}, + {file = "h11-0.12.0.tar.gz", hash = "sha256:47222cb6067e4a307d535814917cd98fd0a57b6788ce715755fa2b6c28b56042"}, +] +httpcore = [ + {file = "httpcore-0.13.7-py3-none-any.whl", hash = "sha256:369aa481b014cf046f7067fddd67d00560f2f00426e79569d99cb11245134af0"}, + {file = "httpcore-0.13.7.tar.gz", hash = "sha256:036f960468759e633574d7c121afba48af6419615d36ab8ede979f1ad6276fa3"}, +] +httpx = [ + {file = "httpx-0.20.0-py3-none-any.whl", hash = "sha256:33af5aad9bdc82ef1fc89219c1e36f5693bf9cd0ebe330884df563445682c0f8"}, + {file = "httpx-0.20.0.tar.gz", hash = "sha256:09606d630f070d07f9ff28104fbcea429ea0014c1e89ac90b4d8de8286c40e7b"}, +] +idna = [ + {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, + {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, +] isodate = [ {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, ] +jmespath = [ + {file = "jmespath-1.0.0-py3-none-any.whl", hash = "sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04"}, + {file = "jmespath-1.0.0.tar.gz", hash = "sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e"}, +] pyparsing = [ {file = "pyparsing-3.0.7-py3-none-any.whl", hash = "sha256:a6c06a88f252e6c322f65faf8f418b16213b51bdfaece0524c1c1bc30c63c484"}, {file = "pyparsing-3.0.7.tar.gz", hash = "sha256:18ee9022775d270c55187733956460083db60b37d0d0fb357445f3094eed3eea"}, ] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] rdflib = [ {file = "rdflib-6.1.1-py3-none-any.whl", hash = "sha256:fc81cef513cd552d471f2926141396b633207109d0154c8e77926222c70367fe"}, {file = "rdflib-6.1.1.tar.gz", hash = "sha256:8dbfa0af2990b98471dacbc936d6494c997ede92fd8ed693fb84ee700ef6f754"}, ] +rdfx = [ + {file = "rdfx-0.4.4-py3-none-any.whl", hash = "sha256:f8747236921162b634f199ba27193d577546629a11e4654930562c4ea9c4f14c"}, + {file = "rdfx-0.4.4.tar.gz", hash = "sha256:3967439b8859176712b1c1e7208461614e117a7eedf0f748e458250a72cd18e9"}, +] +requests = [ + {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"}, + {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"}, +] +rfc3986 = [ + {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"}, + {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"}, +] +s3transfer = [ + {file = "s3transfer-0.5.2-py3-none-any.whl", hash = "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971"}, + {file = "s3transfer-0.5.2.tar.gz", hash = "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed"}, +] six = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +sniffio = [ + {file = "sniffio-1.2.0-py3-none-any.whl", hash = "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663"}, + {file = "sniffio-1.2.0.tar.gz", hash = "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"}, +] +urllib3 = [ + {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, + {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, +] diff --git a/pyproject.toml b/pyproject.toml index 3ba2aed..3d6666e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,9 @@ description = "" authors = ["adamdavis2 "] [tool.poetry.dependencies] -python = "^3.6" +python = "^3.8" rdflib = "^6.1.1" +rdfx = "^0.4.4" [tool.poetry.dev-dependencies]