Skip to content

Commit e61d744

Browse files
madhu
1 parent 29c0d69 commit e61d744

10 files changed

+394
-19
lines changed

awscliv2.zip

27.4 MB
Binary file not shown.

convest.py

+324
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
import cv2
2+
import numpy as np
3+
import pytesseract
4+
import os
5+
import datetime
6+
import base64
7+
8+
# C:\Program Files\Tesseract-OCR\tesseract.exe
9+
# poppler_path=r'D:\downloads\poppler-0.68.0_x86\poppler-0.68.0\bin'
10+
poppler_path=r'D:\downloads\poppler-0.68.0_x86\poppler-0.68.0\bin'
11+
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\madhu\AppData\Local\Programs\Tesseract-OCR\tesseract.exe"
12+
13+
# Mongodb
14+
from pymongo import MongoClient
15+
16+
# output_file = r'D:\projects\pytesseract\images' ## give images folder path
17+
output_file = r'D:\downloads\img'
18+
folder_path = r'D:\downloads\pdf' ## give pdf's folder path
19+
# creation of MongoClient
20+
client = MongoClient()
21+
he = []
22+
import urllib.parse
23+
24+
Username = 'devops_admin'
25+
Password = 'Devops1234'
26+
username = urllib.parse.quote_plus(Username)
27+
password = urllib.parse.quote_plus(Password)
28+
# Connect with the portnumber and host
29+
client = MongoClient('mongodb://localhost:27017/')
30+
# client = MongoClient('mongodb://%s:%[email protected]:27017' % (username,password))
31+
32+
# Access database
33+
mydatabase = client['Data_conversion']
34+
35+
# Access collection of the database
36+
mycollection = mydatabase['test']
37+
mycollection2 = mydatabase['testimage']
38+
myimage = mydatabase['test2']
39+
40+
# dictionary to be added in the database
41+
rec = {
42+
'title': 'MongoDB and Python',
43+
'description': 'MongoDB is no SQL database',
44+
'tags': ['mongodb', 'database', 'NoSQL'],
45+
'viewers': 104
46+
}
47+
48+
49+
# Time kosam
50+
51+
dt = datetime.datetime.now()
52+
print(dt)
53+
lis = ["",""]
54+
55+
count = 0
56+
def imgetobase(file):
57+
global count
58+
f =f"sample{count}.png"
59+
import boto3
60+
b_name = "ivin-pro-data-conversion"
61+
s3 = boto3.client("s3")
62+
b_res = s3.list_buckets()
63+
# for i in b_res['Buckets']:
64+
# print(i)
65+
66+
cv2.imshow('Resized', file)
67+
img = cv2.imwrite(f, file)
68+
cv2.waitKey(3)
69+
count=1+count
70+
with open(f,'rb') as img:
71+
# s3.upload_fileobj(img,b_name,f)
72+
return f
73+
74+
# def pagetobase(image):
75+
# image = open(image, 'rb')
76+
# image_read = image.read()
77+
# image_64_encode = base64.encodebytes(image_read) #encodestring also works aswell as decodestring
78+
# # print('This is the image in base64: ' + str(image_64_encode))
79+
# return str(image_64_encode)
80+
81+
82+
83+
def page(image):
84+
text = pytesseract.pytesseract.image_to_string(image)
85+
# print(text)
86+
text.replace('Photo', "")
87+
# print(text)
88+
f = open('text.txt', 'w')
89+
f.write(text + "\n")
90+
f.close()
91+
fi = open('text.txt', 'r')
92+
x = fi.readlines()
93+
for i in x:
94+
if "Assembly" in i:
95+
lis.append(i)
96+
elif "Section" in i:
97+
lis.append(i)
98+
fi.close()
99+
# print(lis)
100+
101+
102+
103+
c = 1
104+
def covert(x, y, w, h, im2, img_file):
105+
# print(h)
106+
# try:
107+
width = int(w / 3)
108+
if h >= 400 and h < 1000:
109+
# print(h,w)
110+
width = int(w / 3)
111+
# print(width,'--------')
112+
f = open('test.txt', 'a')
113+
rect = cv2.rectangle(im2, (x, y), (x + width, y + h), (0, 255, 0), 5)
114+
115+
if x == 0 and y == 0:
116+
pass
117+
else:
118+
cropped = im2[y:y + h, x:x + width]
119+
120+
# print(cropped)
121+
# print(w, '----------')
122+
img = cv2.resize(rect, (1020, 750))
123+
cv2.imshow('d', cropped)
124+
cv2.waitKey(3)
125+
# count=count+1
126+
127+
128+
b = imgetobase(cropped)
129+
text = pytesseract.image_to_string(cropped)
130+
f = open('text.txt', 'a')
131+
f.write(text)
132+
f.close()
133+
# cv2.destroyAllWindows()
134+
# print('data base error')
135+
136+
rec =mycollection2.insert_one({
137+
138+
'Voter_file_tracker': img_file,
139+
"Voter_Image": b,})
140+
rec = mycollection.insert_one({
141+
'Voter_file_tracker': img_file,
142+
"votere_slip":b,
143+
"Assembly Name": lis[0],
144+
"Section": lis[1],
145+
'details': text,
146+
'Created on': dt})
147+
148+
# except:
149+
# print('convert function exception')
150+
def covert2(x, y, w, h, im2, img_file):
151+
if h >= 400 and h < 1000 and w > 400:
152+
# print(h,w)
153+
width2 = int(w / 3)
154+
# print(width,'--------')
155+
156+
f = open('test.txt', 'a')
157+
rect = cv2.rectangle(im2, (x, y), (x + width2, y + h), (0, 255, 255), 3)
158+
# print(h,width)
159+
if x == 0 and y == 0:
160+
pass
161+
else:
162+
cropped = im2[y:y + h, x:x + width2]
163+
b = imgetobase(cropped)
164+
# print(w,'----------')
165+
img = cv2.resize(rect, (1020, 750))
166+
cv2.imshow('d', cropped)
167+
cv2.waitKey(3)
168+
# cv2.destroyWindow()
169+
text = pytesseract.image_to_string(cropped)
170+
f = open('text.txt', 'a')
171+
f.write(text)
172+
f.close()
173+
# cv2.destroyAllWindows()
174+
# if len(lis)>1:
175+
176+
177+
178+
rec = mycollection2.insert_one({
179+
'Voter_file_tracker': img_file,
180+
"Voter_Image": b, })
181+
rec = mycollection.insert_one({
182+
'Voter_file_tracker': img_file,
183+
"votere_slip":b,
184+
"Assembly Name": lis[0],
185+
"Section": lis[1],
186+
'details': text,
187+
'Created on': dt})
188+
189+
190+
def covert3(x, y, w, h, im2, img_file):
191+
if h >= 400 and h < 1000 and w > 400:
192+
# print(h,w)
193+
width1 = int(w / 3)
194+
# print(width,'--------')
195+
f = open('test.txt', 'a')
196+
rect = cv2.rectangle(im2, (x, y), (x + width1, y + h), (255, 255, 255), 3)
197+
# print(h,width)
198+
if x == 0 and y == 0:
199+
pass
200+
else:
201+
cropped = im2[y:y + h, x:x + width1]
202+
203+
# #print(w,'----------')
204+
img = cv2.resize(rect, (1020, 750))
205+
cv2.imshow('d', cropped)
206+
cv2.waitKey(3)
207+
cv2.imwrite("cropped.png", im2)
208+
b = imgetobase(cropped)
209+
# cv2.destroyWindow()
210+
text = pytesseract.image_to_string(cropped)
211+
f = open('text.txt', 'a')
212+
f.write(text)
213+
f.close()
214+
215+
216+
217+
rec = mycollection2.insert_one({
218+
'Voter_file_tracker': img_file,
219+
"Voter_Image": b, })
220+
rec = mycollection.insert_one({
221+
'Voter_file_tracker': img_file,
222+
"votere_slip":b,
223+
"Assembly Name": lis[0],
224+
"Section": lis[1],
225+
'details': text,
226+
'Created on': dt})
227+
228+
229+
230+
def ima(x, y, w, h, im2, img_file):
231+
232+
# #print(x,y)
233+
im2 = cv2.imread(im2)
234+
cv2.putText(im2, 'Rectangle', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
235+
# rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 5)
236+
# img = cv2.resize(rect, (1020, 750))
237+
# cv2.imshow('d', img)
238+
#print(x, y)
239+
covert(x, y, w, h, im2, img_file)
240+
W = int(w / 3)
241+
covert(x + W, y, w, h, im2, img_file)
242+
covert(x + W + W, y, w, h, im2, img_file)
243+
244+
245+
246+
area = []
247+
value = []
248+
249+
250+
# def img_detect(img_path, img_file):
251+
# lis.clear()
252+
# page(img_path)
253+
#
254+
def img_detect(img_path, img_file):
255+
256+
img = cv2.imread(img_path)
257+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
258+
ret, thresh = cv2.threshold(gray, 50, 255, 0)
259+
contours, hierarchy = cv2.findContours(thresh, 1, 2)
260+
for cnt in contours:
261+
x1, y1 = cnt[0][0]
262+
approx = cv2.approxPolyDP(cnt, 0.01 * cv2.arcLength(cnt, True), True)
263+
if len(approx) == 4:
264+
x, y, w, h = cv2.boundingRect(cnt)
265+
266+
ratio = float(w) / h
267+
if ratio >= 0.9 and ratio <= 1.1:
268+
pass
269+
else:
270+
area.append((h * w))
271+
value.append((h, w, x, y))
272+
he.append(h)
273+
for i in range(len(area)):
274+
# if value[i][1]>=0 and value[i][1]>100:
275+
xa = value[i][2]
276+
ya = value[i][3]
277+
l = value[i][0]
278+
w = value[i][1]
279+
# cv2.rectangle(img, (xa, ya), (xa + w, ya + l), (0, 255, 0), 2)
280+
ima(x=xa, y=ya, w=w, h=l, im2=img_path, img_file=img_file)
281+
282+
283+
import os
284+
285+
286+
def image_upload(image_file):
287+
print("count of images :",len(os.listdir(folder_path)))
288+
l = os.listdir(image_file)
289+
path = image_file
290+
for k in range(len(l)):
291+
print(k, 'started')
292+
area.clear()
293+
value.clear()
294+
he.clear()
295+
img_detect(img_path=f"{path}\{l[k]}", img_file=l[k])
296+
# os.remove(f"{path}\{l[k]}")
297+
print(k, 'done')
298+
299+
300+
def converting_pdftoimg(pdf_path):
301+
from pdf2image import convert_from_path
302+
# print(pdf_path)
303+
images = convert_from_path(pdf_path, 500, poppler_path=poppler_path)
304+
name = pdf_path.split("\\")
305+
print(name)
306+
for i, image in enumerate(images):
307+
print(i, 'is pdf converting into images')
308+
fname = f'{name[-1]}' + str(i) + '.png'
309+
# print(fname)
310+
image.save(f"{output_file}\{fname}", "PNG")
311+
312+
313+
314+
l = os.listdir(folder_path)
315+
print(l)
316+
# for i in range(len(l)):
317+
# print(l[i])
318+
# converting_pdftoimg(pdf_path=f'{folder_path}\{l[i]}')
319+
# yesy()
320+
321+
image_upload(f'{output_file}')
322+
323+
324+
# dt = datetime.datetime.now()

generatepdf/base64data.txt

+1
Large diffs are not rendered by default.
Binary file not shown.
Binary file not shown.

generatepdf/pdf/htmltopdfconverter.py

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from django.http import HttpResponse, StreamingHttpResponse
22
from wsgiref.util import FileWrapper
33
from .s3file import *
4-
5-
64
from .serializers import htmltopdfserializer
75
import pdfkit
86
import os

generatepdf/pdf/s3file.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def upload_file(image_path):
99
'Authorization': f'Bearer {token}'
1010
}
1111
import PyPDF2
12-
12+
1313
pdfFileObj = open(image_path, 'rb')
1414
# pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
1515
files = {'UploadDoc': pdfFileObj}
@@ -25,4 +25,18 @@ def upload_file(image_path):
2525

2626
# image_path = r"D:\projects\New folder (2)\PDF-Generator-Python\generatepdf\Lifeeazy-HealthSummaryl.pdf"
2727
# # image_path = r"D:\projects\New folder (2)\test.txt"
28-
# upload_file(image_path=image_path)
28+
# upload_file(image_path=image_path)
29+
30+
31+
32+
33+
import boto3
34+
b_name = "ivin-pro-data-conversion"
35+
s3 = boto3.client("s3")
36+
b_res = s3.list_buckets()
37+
# for i in b_res['Buckets']:
38+
# print(i)
39+
with open(r"C:\Users\anves\Pictures\as.png",'rb') as img:
40+
s3.upload_fileobj(img,b_name,"testfile.jpg")
41+
#
42+
# s3.download_file(b_name,"testfile.jpg ","download.jpg")

0 commit comments

Comments
 (0)