Skip to content

Commit

Permalink
Remove flake8, add black to pre-commit hook and lint
Browse files Browse the repository at this point in the history
  • Loading branch information
afwolfe committed Jan 1, 2023
1 parent 1a8b0ae commit c99abb2
Show file tree
Hide file tree
Showing 11 changed files with 340 additions and 332 deletions.
5 changes: 0 additions & 5 deletions .flake8

This file was deleted.

1 change: 0 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ pytesseract = "~=0.3.9"
[dev-packages]
black = "*"
build = "*"
flake8 = "*"
mypy = "*"
pytest = "*"
types-dateparser = "*"
Expand Down
441 changes: 199 additions & 242 deletions Pipfile.lock

Large diffs are not rendered by default.

16 changes: 13 additions & 3 deletions cardvisionpy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,27 @@

from cardvisionpy import cardvisionpy


def main():
argparser = argparse.ArgumentParser()
argparser.add_argument("--input-path", default="images/", help="The directory to read image files from. Defaults to ./images/")
argparser.add_argument("--output-file", default="transactions.csv", help="The CSV file to write transactions to. Defaults to ./transactions.csv")
argparser.add_argument(
"--input-path",
default="images/",
help="The directory to read image files from. Defaults to ./images/",
)
argparser.add_argument(
"--output-file",
default="transactions.csv",
help="The CSV file to write transactions to. Defaults to ./transactions.csv",
)

args = argparser.parse_args()
input_path = os.path.abspath(args.input_path)
output_file = os.path.abspath(args.output_file)

transactions = cardvisionpy.get_processed_transactions(input_path)
cardvisionpy.write_to_csv(transactions, output_file)



if __name__ == "__main__":
main()
26 changes: 16 additions & 10 deletions cardvisionpy/cardvisionpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,47 @@
import os
from csv import DictWriter

import cv2 # type: ignore
import pytesseract # type: ignore
import cv2 # type: ignore
import pytesseract # type: ignore

from cardvisionpy.models.transaction import Transaction
from cardvisionpy.logic.transactionparser import TransactionParser

TESSERACT_CONFIG = r'--oem 3 --psm 11'
TESSERACT_CONFIG = r"--oem 3 --psm 11"

logger = logging.getLogger()


def get_grayscale(image: cv2.Mat):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)


def get_processed_transactions(input_path: os.PathLike):
transactions: list[Transaction] = []
files = os.listdir(input_path)
for file in files:

img = cv2.imread(os.path.join(input_path, file))
img = get_grayscale(img)

text = pytesseract.image_to_string(img, config=TESSERACT_CONFIG)
strs = [t.strip() for t in text.split("\n") if t]
logger.debug(strs)
tp = TransactionParser(strs)
tp = TransactionParser(strs)
transactions += tp.get_transactions()

# Sort by date
return sorted(transactions, key=lambda x: getattr(x, 'date'))
return sorted(transactions, key=lambda x: getattr(x, "date"))


def write_to_csv(transactions: list[Transaction], output_file: os.PathLike):
# Exclude internal variables and functions from headers
headers = {k: Transaction.__dict__[k] for k in Transaction.__dict__.keys() if "__" not in k and not callable(Transaction.__dict__[k])}

with open(output_file, 'w') as csvfile:
headers = {
k: Transaction.__dict__[k]
for k in Transaction.__dict__.keys()
if "__" not in k and not callable(Transaction.__dict__[k])
}

with open(output_file, "w") as csvfile:
dw = DictWriter(csvfile, fieldnames=headers)
dw.writeheader()
dw.writerows([t.__dict__ for t in transactions])
31 changes: 17 additions & 14 deletions cardvisionpy/logic/transactionparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import cardvisionpy.logic.transactionutil as transactionutil
from cardvisionpy.models.transaction import Transaction


class TransactionParser:
"""Can iterate over and return a processed list of Transactions."""

Expand All @@ -25,27 +26,29 @@ def get_transactions(self) -> list[Transaction]:
if next_transaction:
transactions.append(next_transaction)
return transactions

def next_transaction(self) -> Optional[Transaction]:
"""Processes and removes the next transaction from the list"""
try:
self.logger.debug(f"Beginning next transaction.\nraw_transactions: {self.raw_transactions}")
self.logger.debug(
f"Beginning next transaction.\nraw_transactions: {self.raw_transactions}"
)
new_transaction = Transaction()
next_field = self.raw_transactions.pop(0)

# Sometimes payee names get broken into additional lines
# Keep iterating until we find a valid transaction amount
while new_transaction.amount is None:
while new_transaction.amount is None:
if transactionutil.is_amount(next_field):
new_transaction.amount = transactionutil.amount_in_cents(next_field)
else:
if new_transaction.payee is None:
if new_transaction.payee is None:
new_transaction.payee = next_field
else:
else:
new_transaction.payee += f" {next_field}"
next_field = self.raw_transactions.pop(0)

if new_transaction.payee is None:
if new_transaction.payee is None:
new_transaction.payee = next_field
next_field = self.raw_transactions.pop(0)

Expand All @@ -58,7 +61,7 @@ def next_transaction(self) -> Optional[Transaction]:
time_description = third_ba_line
new_transaction.set_memo(new_transaction.payee)
else:
if "%" in next_field: # Sometimes Daily Cash percent is first.
if "%" in next_field: # Sometimes Daily Cash percent is first.
new_transaction.dailyCash = next_field
next_field = self.raw_transactions.pop(0)
new_transaction.set_memo(next_field)
Expand All @@ -67,7 +70,7 @@ def next_transaction(self) -> Optional[Transaction]:

if new_transaction.is_daily_cash() and new_transaction.dailyCash is None:
daily_cash = next_field
while ("%" not in daily_cash):
while "%" not in daily_cash:
daily_cash = self.raw_transactions.pop(0)
new_transaction.dailyCash = daily_cash
next_field = self.raw_transactions.pop(0)
Expand All @@ -78,27 +81,27 @@ def next_transaction(self) -> Optional[Transaction]:
time_description = next_field
while not transactionutil.is_timestamp(time_description):
time_description += " " + self.raw_transactions.pop(0)
time_description = time_description.replace("-"," ").replace("•"," ")

time_description = time_description.replace("-", " ").replace("•", " ")

# Attempt to remove family member's name from description when using Family Sharing.
# ex. "NAME - Yesterday"
# If the description contains spaces and does not start with a number, it likely starts with the family member's name.
if " " in time_description and re.match("^[0-9]", time_description) == None:
if " " in time_description and re.match("^[0-9]", time_description) == None:
time_description_split = time_description.split(" ", 1)
familyMember = time_description_split[0]
new_transaction.set_memo(f"{familyMember} - {new_transaction.memo}")
time_description = time_description_split[1].strip()

parsed_date = dateparser.parse(time_description)
if parsed_date is None:
self.logger.warn("Exception while parsing date, defaulting to today.")
new_transaction.date = date.today()
else:
new_transaction.date = parsed_date.date()

self.logger.debug(f"New transaction created:\n{new_transaction}")
return new_transaction
except IndexError:
self.logger.error("Ran out of text elements while generating transaction.")
return None
return None
20 changes: 15 additions & 5 deletions cardvisionpy/logic/transactionutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,26 @@ def is_amount(amountCandidate: str) -> bool:
"""Determines if the given amount string is a valid monetary amount"""
return re.search("^\+*\$[\d,]*\.\d\d$", amountCandidate) != None


def is_timestamp(candidate: str) -> bool:
"""Determines if the given string contains a valid timestamp"""
return (re.search("[0-9]{1,2} (?:minute|hour)s{0,1} ago", candidate) != None or # relative timestamp
re.search("\\d{1,2}\\/\\d{1,2}\\/\\d{2}", candidate) != None or # mm/dd/yy date stamp
re.search("(?i)W*(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun|Yester)day\\b[sS]*", candidate) != None) # day of week including Yesterday
return (
re.search("[0-9]{1,2} (?:minute|hour)s{0,1} ago", candidate) != None
or re.search("\\d{1,2}\\/\\d{1,2}\\/\\d{2}", candidate) # relative timestamp
!= None
or re.search( # mm/dd/yy date stamp
"(?i)W*(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun|Yester)day\\b[sS]*", candidate
)
!= None
) # day of week including Yesterday


def amount_in_cents(amount: str) -> Optional[int]:
"""Converts a dollar amount to cents"""
cents_str = amount.replace("+","").replace("$","").replace(".","").replace(",","")
cents_str = (
amount.replace("+", "").replace("$", "").replace(".", "").replace(",", "")
)
if cents_str.isnumeric():
cents = int(cents_str)
return cents if "+" in amount else -cents
return None
return None
6 changes: 3 additions & 3 deletions cardvisionpy/models/transaction.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from datetime import date
from typing import Optional


class Transaction:
"""
Holds a Transaction representing an Apple Card transaction.
"""

# The date of the transaction
date: Optional[date] = None
# The payee (or payer)
Expand Down Expand Up @@ -38,7 +39,6 @@ def is_daily_cash(self):

return not ("refund" in self.memo.lower() or self.is_declined())


def is_declined(self) -> bool:
"""Determines if the given transaction string is 'Declined'"""
if self.memo:
Expand All @@ -52,7 +52,7 @@ def is_pending(self) -> bool:
return "pending" in self.memo.lower()
else:
return False

def __str__(self) -> str:
return f"""Payee: {self.payee}
Amount: {self.amount}
Expand Down
2 changes: 1 addition & 1 deletion install-hook.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh

echo -e "#!/bin/sh\nmypy cardvisionpy\npytest tests/*" > .git/hooks/pre-commit
echo -e "#!/bin/sh\nblack --check .\nmypy cardvisionpy\npytest tests/*test.py" > .git/hooks/pre-commit
chmod +x .git/hooks/pre-commit
97 changes: 53 additions & 44 deletions tests/transactionparsertest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,68 +2,77 @@

from cardvisionpy.logic.transactionparser import TransactionParser


def test_next_transaction():
raw_transaction = [
'Amazon', '$50.00', 'Card Number Used', '1%', 'Yesterday'
]

tp = TransactionParser(raw_transaction)
raw_transaction = ["Amazon", "$50.00", "Card Number Used", "1%", "Yesterday"]

tp = TransactionParser(raw_transaction)
actual = tp.next_transaction()

assert(actual.payee == "Amazon")
assert(actual.amount == -5000)
assert(actual.memo == "Card Number Used")
assert(actual.dailyCash == "1%")
assert(actual.is_daily_cash())
assert(not actual.is_declined())
assert(not actual.is_pending())
assert(actual.date == dateparser.parse('Yesterday').date())
assert actual.payee == "Amazon"
assert actual.amount == -5000
assert actual.memo == "Card Number Used"
assert actual.dailyCash == "1%"
assert actual.is_daily_cash()
assert not actual.is_declined()
assert not actual.is_pending()
assert actual.date == dateparser.parse("Yesterday").date()


def test_transaction_amount_first():
raw_transaction = [
'$100.00', 'Grocery Store', 'Card Number Used', '1%', '5 hours ago'
"$100.00",
"Grocery Store",
"Card Number Used",
"1%",
"5 hours ago",
]
tp = TransactionParser(raw_transaction)

tp = TransactionParser(raw_transaction)
actual = tp.next_transaction()

assert(actual.payee == "Grocery Store")
assert(actual.amount == -10000)
assert(actual.memo == "Card Number Used")
assert(actual.dailyCash == "1%")
assert(actual.is_daily_cash())
assert(not actual.is_declined())
assert(not actual.is_pending())
assert(actual.date == dateparser.parse('5 hours ago').date())
assert actual.payee == "Grocery Store"
assert actual.amount == -10000
assert actual.memo == "Card Number Used"
assert actual.dailyCash == "1%"
assert actual.is_daily_cash()
assert not actual.is_declined()
assert not actual.is_pending()
assert actual.date == dateparser.parse("5 hours ago").date()


def test_transaction_daily_cash_before_memo():
raw_transaction = [
'Movie Theater', '$25.00', '2%', 'Pending - Card Number Used', '5 minutes ago'
"Movie Theater",
"$25.00",
"2%",
"Pending - Card Number Used",
"5 minutes ago",
]
tp = TransactionParser(raw_transaction)

tp = TransactionParser(raw_transaction)
actual = tp.next_transaction()

assert(actual.payee == "Movie Theater")
assert(actual.amount == -2500)
assert(actual.memo == "Pending - Card Number Used")
assert(actual.dailyCash == "2%")
assert(actual.is_daily_cash())
assert(not actual.is_declined())
assert(actual.is_pending())
assert(actual.date == dateparser.parse('5 minutes ago').date())
assert actual.payee == "Movie Theater"
assert actual.amount == -2500
assert actual.memo == "Pending - Card Number Used"
assert actual.dailyCash == "2%"
assert actual.is_daily_cash()
assert not actual.is_declined()
assert actual.is_pending()
assert actual.date == dateparser.parse("5 minutes ago").date()


def test_daily_cash_adjustment():
raw_transaction = ['Daily Cash Adjustment', '$0.25', 'From Refund', 'Yesterday']
raw_transaction = ["Daily Cash Adjustment", "$0.25", "From Refund", "Yesterday"]

tp = TransactionParser(raw_transaction)
tp = TransactionParser(raw_transaction)
actual = tp.next_transaction()

assert(actual.payee == "Daily Cash Adjustment")
assert(actual.amount == -25)
assert(actual.memo == "From Refund")
assert(not actual.is_daily_cash())
assert(not actual.is_declined())
assert(not actual.is_pending())
assert(actual.date == dateparser.parse('Yesterday').date())
assert actual.payee == "Daily Cash Adjustment"
assert actual.amount == -25
assert actual.memo == "From Refund"
assert not actual.is_daily_cash()
assert not actual.is_declined()
assert not actual.is_pending()
assert actual.date == dateparser.parse("Yesterday").date()
Loading

0 comments on commit c99abb2

Please sign in to comment.