1

My requirement: Need to extract license plates without duplicates and store images in a folder,then apply ocr to extract text from images.

What i have achieved: Iam able to detect license plates properly and with the help of tracking mechanism i was able to avoid dupicate plates.

Pending item: Unable to format image properly which is causing ocr unable to read text

I have created below code which will detect license plates from vehicles and store images in a folder and ocr them to get text in to a separate file,In my case image which is being stored in folder is not straightening/cropping properly after processing and even image is looking dull,Because of this ocr is not recognizing text properly.Please help me resolve this,once ocr recognizes text from image my work is done

license_plate_after_processing license_plate_detected

from ultralytics import YOLO
import cv2
import torch
import pytesseract
import os
import numpy as np
from collections import defaultdict

class_counts = defaultdict(int) processed_track_ids = set()

pytesseract.pytesseract.tesseract_cmd = 'C:\Program Files\Tesseract-OCR\tesseract.exe'

model = YOLO("license_plate_detector.pt") video_path='video.MP4' cap = cv2.Videocapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) delay = int(1000/fps)

output_dir = 'extracted_dir' os.makedirs(output_dir,exist_ok=True)

def resize_frame(frame, size=(640,640)): height,width = frame.shape[:2] return cv2.resize(frame, size, interpolation=cv2.INTER_LINEAR)

license_plates = []

def straighten_crop_image(image): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) edges = cv2.canny(gray,50,150) lines = cv2.Houghlines(edges,1,np.pi/180,200) if lines is not None: for rho,theta in lines[:,0]: a = np.cos(theta) b = np.sin(theta) x0 = a * rho y0 = b * rho x1 = int(x0 + 1000 * (-b)) y1 = int(y0 + 1000 * (a)) x2 = int(x0 - 1000 * (-b)) y2 = int(y0 - 1000 * (a)) cv2.line(image,(x1,y1),(x2,y2),(0,0,255),2) contours,_ = cv2.findContours(edges, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True) if len(contours) == 0: print("No contours found") return image license_plate_contours = contours[0] x,y,w,h = cv2.boundingRect(license_plate_contour) cropped_image = image[y:y+h-10, x:x+w] rect = cv2.minAreaRect(license_plate_contour) box = cv2.boxPoints(rect) box = np.int0(box) width = int(rect[1][0]) height = int(rect[1][1]) if abs(width - height) > 10: src_pts = box.astype("float32") dst_pts = np.array([[0, height-1],[0,0],[width-1,0],[width-1, height-1]],dtype="float32") M = cv2.getPerspectiveTransform(src_pts, dst_pts) straightened_image = cv2.warpPerspective(cropped_image,M,(width, height)) else: straightened_image = cropped_image if straightened_image.size == 0: print("Straightened image is empty") return image kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)) straightened_image = cv2.dilate(straightened_image,kernel,iterations=1) straightened_image = cv2.convertScaleAbs(straightened_image, alpha=1.5,beta=50) return straightened_image

def enchance_image(image): straightened_image = straighten_crop_image(image) gray = cv2.cvtColor(straightened_image, cv2.COLOR_BGR2GRAY) return gray

while cap.isOpened(): ret,frame = cap.read() if not ret: break

frame_resized = resize_frame(frame)
frame_tensor = torch.tensor(frame_resized).permute(2,0,1).unsqueeze(0).float() / 255.0
results = model.track(frame_sensor,persist=True,conf=0.4,iou=0.4)

for result in results:
    if result.boxes.id is not None:        
       for bbox,track_id in zip(result.boxes.xyxy,result.boxes.id):           
        bbox = bbox.int().tolist()
        license_plate = frame_resized[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        enhanced_license_plate = enhance_image(license_plate)
        if track_id not in processed_track_ids:
            ocr_text = pytesseract.image_to_string(enhanced_license_plate)
            print(f'OCR text: {ocr_text}')
            processed_track_ids.add(track_id)
            plate_filename = os.path.join(output_dir, f'track_id_{track_id}_plate.png')
            cv2.imwrite(plate_filename, enhanced_license_plate)
            ocr_filename = os.path.join(output_dir,f'track_{track_id}_plate.txt')
            with open(ocr_filename, 'w') as f:
                f.write(ocr_text)
            print(f'Text saved to :{ocr_filename}')

        cv2.rectangle(frame_resized, (bbox[0],bbox[1]),(bbox[2],bbox[3]), (0,255,0),2)
        cv2.putText(frame_resized, 'License Plate',(bbox[0],bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX,0.9,(0,255,0),2)

cv2.imshow('License plate detection', frame_resized)

if cv2.waitKey(1) & 0xFF == ord('q'): break

cap.release() cv2.destroyAllWindows()

df = pd.DataFrame(license_plates, columns=['License Plate']) df.to_excel('extracted_license_plates.xlsx'. index=False)

Raj
  • 11
  • 2

1 Answers1

0
from ultralytics import YOLO
import cv2
import torch
import paddleocr
import os
import numpy as np
import math


model = YOLO("license_plate_detector.pt")
video_path='video.MP4'
cap = cv2.Videocapture(video_path)
with open("c:\\coco1.txt","r") as f: #coco1.txt should contain numberplate
    class_names = f.read().splitlines()

output_dir = 'extracted_dir'
os.makedirs(output_dir,exist_ok=True)

def perform_ocr(image_array):
    if image_array is None:
         raise ValueError("Image is none")
    results = ocr.ocr(image_array, rec=True)
    detected_text = []
    if results[0] is not None:
        for result in results[0]:
            text = result[1][0]
            detected_text.append(text)
   return ''.join(detected_text)


license_plates = []

def rotate_image(image, angle):
    if abs(angle) < 1e-6 or angle in [90, 80, 100]:
        return image
    h, w = image.shape[:2]
    new_w = int(w *abs(np.cos(np.radians(angle))) + h * abs(np.sin(np.radians(angle))))
    new_h = int(h * abs(np.cos(np.radians(angle))) + w * abs(np.sin(np.radians(angle))))
    canvas = np.zeros((new_h,new_w,3), dtype=np.uint8)
    canvas_center = (new_w // 2, new_h // 2)
    rot_mat = cv2.getRotationMatrix2D((w// 2, h // 2), angle, 1.0)
    rot_mat[0, 2] += (new_w - w) / 2
    rot_mat[1, 2] += (new_h - h) / 2
    result = cv2.warpAffine(image, rot_mat, (new_w,new_h),flags=cv2.INTER_LINEAR)
    return result
    def compute_skew(src_img):
        if len(src_img.shape) == 3:
            h, w, _ = src_img.shape
        elif len(src_img.shape) == 2:
            h, w = src_img.shape
        else:
            print('unsupported image type')
            return 0.0
        img = cv2.medianBlur(src_img, 3)
        edges = cv2.canny(img, threshold1=20, threshold2=100, apertureSize=3, L2gradient=True)
    lines - cv2.HoughLinesP(edges, 1,math.pi/180,30,minLineLength=w / 4.0, maxLineGap=h / 4.0)
    angle = 0.0
    cnt = 0
    if lines is not None:
        for line in lines:
            for x1,y1,x2,y2 in line:
                ang = np.arctan2(y2 - y1, x2 - x1)
                if math.fabs(ang) <= math.radians(30):
                     angle += ang
                     cnt += 1
     if cnt == 0:
         return 0.0
     return (angle / cnt) * 180 / math.pi

def deskew(src_img):
    return rotate_image(src_img, compute_skew(src_img))

def RGB(event,x,y,flags,param):
    if event == cv2.EVENT_MOUSEMOVE:
        point = [x, y]
        print(point)
cv2.namedWindow('RGB')
cv2.setMouseCallback('RGB',RGB)

count = 0
area = [(540,522),(28,462),(3,507),(553,599)]
counter = []
save_folder = "sav_images"
df = pd.DataFrame(columns=["Date", "License Plate"])



while cap.isOpened():
    ret,frame = cap.read()
    if not ret:
    break

    frame = cv2.resize(frame, (640,640)
    frame_tensor = torch.tensor(frame_resized).permute(2,0,1).unsqueeze(0).float() / 255.0
    results = model.track(frame_sensor,persist=True,imgsz=240)

    for result in results:
        if results[0].boxes.id is not None and results[0].boxes.id is not None: 
            boxes = results[0].boxes.xyxy.int().cpu().tolist()
            class_ids = results[0].boxes.xyxy.int().cpu().tolist()
            track_ids = results[0].boxes.id.int().cpu().tolist()
            confidences = results[0].boxes.conf.cpu().tolist()       
           for bbox,class_id,track_id .conf in zip(boxes,class_ids,track_ids,confidences):           
            c = class_names[class_id]
            x1,y1,x2,y2 = box
            height, width = frame.shape[:2]
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(width, x2)
            y2 = min(height, y2)
            cx=int(x1+x2)//2
            cy=int(y1+y2)//2
            result = cv2.pointPolygonTest(np.array(area,np.int32),((cx, cy)), False)
            if result >= 0:
                if track_id not in counter:
                    counter.apppend(track_id)
                    crop = frame[y1:y2, x1:x2]
                    crop = cv2.resize(crop,(120,100))

                ocr_text = pytesseract.image_to_string(enhanced_license_plate)
                print(f'OCR text: {ocr_text}')
                processed_track_ids.add(track_id)
                plate_filename = os.path.join(output_dir, f'track_id_{track_id}_plate.png')
                cv2.imwrite(plate_filename,crop)
                text = perform_ocr(crop)
                print(text)
                text = text.replace('(','').replace('),'').replace(',','').replace(']','').replace('-',' ')
                df.loc[len(df)] = [datetime.now().strftime("%Y=%m-%d %H:%M:%S"), text]
    mycounter = len(counter)
    cvzone.putTextRect(frame,f'{mycounter}',(50,60),1,1)
    cv2.polylines(frames,[np.array(area, np.int32)], True, (255,0,0),2)           
    cv2.imshow('RGB', frame)

if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


df.to_excel('extracted_license_plates.xlsx'. index=False)
Raj
  • 11
  • 2