Skip to content

Commit b747851

Browse files
Add files via upload
1 parent 1ee3618 commit b747851

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

PdfToCsvConversion.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import pandas as pd
2+
import zipfile
3+
import pypdf2
4+
import os
5+
6+
# Name of the ZIP file
7+
zip_filename = "DAR_ICMS_NORMAL_AND_FUNCEP_062024_173135.zip"
8+
9+
# Open the ZIP file
10+
with zipfile.ZipFile(zip_filename, "r") as zip:
11+
# Extract files from the ZIP file
12+
zip.extractall("extracted_files")
13+
14+
# Convert PDF files to CSV format
15+
def convert_pdf_to_csv(pdf_file):
16+
with open(pdf_file, "rb") as pdf:
17+
reader = pypdf2.PdfFileReader(pdf)
18+
return reader.getPage(0).extractText().split("\n")
19+
20+
# Names of the PDF files (generic names)
21+
receipt_transmission_filename = "RECEIPT_TRANSMISSION_062024.pdf"
22+
dar_funcep_filename = "DAR_FUNCEP_062024.pdf"
23+
dar_normal_filename = "DAR_ICMS_NORMAL_062024.pdf"
24+
25+
# Full path of the extracted PDF files
26+
receipt_transmission_path = os.path.join("extracted_files", receipt_transmission_filename)
27+
dar_funcep_path = os.path.join("extracted_files", dar_funcep_filename)
28+
dar_normal_path = os.path.join("extracted_files", dar_normal_filename)
29+
30+
# Open the files
31+
receipt_transmission_values = convert_pdf_to_csv(receipt_transmission_path)
32+
dar_funcep_values = convert_pdf_to_csv(dar_funcep_path)
33+
dar_normal_values = convert_pdf_to_csv(dar_normal_path)
34+
35+
# Extract ICMS guide values
36+
receipt_transmission_values = [line.split("|")[7] for line in receipt_transmission_values]
37+
dar_funcep_values = [line.split("|")[5] for line in dar_funcep_values]
38+
dar_values = [line.split("|")[5] for line in dar_normal_values]
39+
40+
# Compare the values of the three files
41+
for i in range(len(receipt_transmission_values)):
42+
if receipt_transmission_values[i] != dar_funcep_values[i] or receipt_transmission_values[i] != dar_values[i]:
43+
print("Error: ICMS guide values do not match.")
44+
break
45+
46+
# Write the extracted values to an existing Excel spreadsheet
47+
data = {'dar_funcep_value': dar_funcep_values, 'dar_icms_value': dar_values}
48+
df = pd.DataFrame(data)
49+
50+
# Path to the existing Excel file
51+
excel_path = 'existing_excel.xlsx'
52+
53+
# Load the existing Excel file
54+
existing_df = pd.read_excel(excel_path)
55+
56+
# Merge the existing DataFrame with the new data (appending the new data)
57+
result_df = pd.concat([existing_df, df], ignore_index=True)
58+
59+
# Save the merged DataFrame back to the Excel file
60+
result_df.to_excel(excel_path, index=False)

0 commit comments

Comments
 (0)