Tracking people problem

Question

I am trying to implement a system for counting people in a building using Python and OpenCV, but I have one problem. The system for tracking people doesn't work very well; it can't track a person all the time. Here is the code:

from mylib.centroidtracker import CentroidTracker
from mylib.trackableobject import TrackableObject
from imutils.video import VideoStream
from imutils.video import FPS
#from mylib.mailer import Mailer
from mylib import config, thread
import time, schedule, csv
import numpy as np
import argparse, imutils
import time, dlib, cv2, datetime
from itertools import zip_longest
#from pykalman import KalmanFilter
#python main.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt --model mobilenet_ssd/MobileNetSSD_deploy.caffemodel --input videos/video.mp4
#kf = KalmanFilter(initial_state_mean=[0, 0], n_dim_obs=2)
t0 = time.time()
def is_above_boundary_line(x, y, slope, intercept):
    return y < (slope * x + intercept)
def run():
    ap = argparse.ArgumentParser()
    ap.add_argument("-p", "--prototxt", required=False,
        help="path to Caffe 'deploy' prototxt file")
    ap.add_argument("-m", "--model", required=True,
        help="path to Caffe pre-trained model")
    ap.add_argument("-i", "--input", type=str,
        help="path to optional input video file")
    ap.add_argument("-o", "--output", type=str,
        help="path to optional output video file")
    ap.add_argument("-c", "--confidence", type=float, default=1,
        help="minimum probability to filter weak detections")
    ap.add_argument("-s", "--skip-frames", type=int, default=4,
        help="# of skip frames between detections")
    args = vars(ap.parse_args())
CLASSES = [&quot;background&quot;, &quot;aeroplane&quot;, &quot;bicycle&quot;, &quot;bird&quot;, &quot;boat&quot;,
    &quot;bottle&quot;, &quot;bus&quot;, &quot;car&quot;, &quot;cat&quot;, &quot;chair&quot;, &quot;cow&quot;, &quot;diningtable&quot;,
    &quot;dog&quot;, &quot;horse&quot;, &quot;motorbike&quot;, &quot;person&quot;, &quot;pottedplant&quot;, &quot;sheep&quot;,
    &quot;sofa&quot;, &quot;train&quot;, &quot;tvmonitor&quot;]

net = cv2.dnn.readNetFromCaffe(args[&quot;prototxt&quot;], args[&quot;model&quot;])

if not args.get(&quot;input&quot;, False):
    print(&quot;[INFO] Starting the live stream..&quot;)
    vs = VideoStream(config.url).start()
    time.sleep(2.0)
else:
    print(&quot;[INFO] Starting the video..&quot;)
    vs = cv2.VideoCapture(args[&quot;input&quot;])

writer = None
W = None
H = None

ct = CentroidTracker(maxDisappeared=300, maxDistance=200)
trackers = []
trackableObjects = {}

totalFrames = 0
totalDown = 0
totalUp = 0
x = []
empty = []
empty1 = []

fps = FPS().start()

if config.Thread:
    vs = thread.ThreadingClass(config.url)

while True:
    frame = vs.read()
    frame = frame[1] if args.get(&quot;input&quot;, False) else frame
    if frame is None:
        break

    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    frame = imutils.resize(frame, width=500)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    if W is None or H is None:
        (H, W) = frame.shape[:2]
        x1, y1 = 0, int(H * 0.9)
        x2, y2 = int(W * 0.85), 0
        boundary_line_slope = (y2 - y1) / (x2 - x1)
        boundary_line_intercept = y1 - boundary_line_slope * x1

    if args[&quot;output&quot;] is not None and writer is None:
        fourcc = cv2.VideoWriter_fourcc(*&quot;MJPG&quot;)
        writer = cv2.VideoWriter(args[&quot;output&quot;], fourcc, 30, (W, H), True)

    status = &quot;Waiting&quot;
    rects = []

    if totalFrames % args[&quot;skip_frames&quot;] == 0:
        status = &quot;Detecting&quot;
        trackers = []

        blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5)
        net.setInput(blob)
        detections = net.forward()

        for i in np.arange(0, detections.shape[2]):
            confidence = detections[0, 0, i, 2]

            if confidence &gt; 0.2:
                idx = int(detections[0, 0, i, 1])

                if CLASSES[idx] != &quot;person&quot;:
                    continue

                box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
                (startX, startY, endX, endY) = box.astype(&quot;int&quot;)

                tracker = dlib.correlation_tracker()
                rect = dlib.rectangle(startX, startY, endX, endY)
                tracker.start_track(rgb, rect)

                trackers.append(tracker)
    else:
        for tracker in trackers:
            status = &quot;Tracking&quot;
            tracker.update(rgb)
            pos = tracker.get_position()

            startX = int(pos.left())
            startY = int(pos.top())
            endX = int(pos.right())
            endY = int(pos.bottom())

            rects.append((startX, startY, endX, endY))

    cv2.line(frame, (0, int(H * 0.9)), (int(W * 0.85), 0), (255, 0, 0), 3)

    objects = ct.update(rects)

    for (objectID, centroid) in objects.items():
        to = trackableObjects.get(objectID, None)

        if to is None:
            to = TrackableObject(objectID, centroid)
        else:
           #y = [c[1] for c in to.centroids]
           #direction = centroid[1] - np.mean(y)
            to.centroids.append(centroid)

            if not to.counted:
                if len(to.centroids) &gt; 1:
                    prev_position = to.centroids[-2]
                    if is_above_boundary_line(prev_position[0], prev_position[1], boundary_line_slope, boundary_line_intercept) and not is_above_boundary_line(centroid[0], centroid[1], boundary_line_slope, boundary_line_intercept):
                        totalDown += 1
                        empty1.append(totalDown)
                        to.counted = True
                    elif not is_above_boundary_line(prev_position[0], prev_position[1], boundary_line_slope, boundary_line_intercept) and is_above_boundary_line(centroid[0], centroid[1], boundary_line_slope, boundary_line_intercept):
                        totalUp += 1
                        empty.append(totalUp)
                        to.counted = True

        trackableObjects[objectID] = to

        text = &quot;ID {}&quot;.format(objectID)
        cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)

    info = [
        (&quot;Exit&quot;, totalUp),
        (&quot;Enter&quot;, totalDown),
       # (&quot;Status&quot;, status),
    ]

    for (i, (k, v)) in enumerate(info):
        text = &quot;{}: {}&quot;.format(k, v)
        cv2.putText(frame, text, (10, H - ((i * 20) + 20)),
            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    if writer is not None:
        writer.write(frame)


    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    cv2.imshow(&quot;Frame&quot;, frame)
    key = cv2.waitKey(1) &amp; 0xFF

    if key == ord(&quot;q&quot;):
        break

    totalFrames += 1
    fps.update()

fps.stop()
print(&quot;[INFO] elapsed time: {:.2f}&quot;.format(fps.elapsed()))
print(&quot;[INFO] approx. FPS: {:.2f}&quot;.format(fps.fps()))

if writer is not None:
    writer.release()

if not args.get(&quot;input&quot;, False):
    vs.stop()
else:
    vs.release()

cv2.destroyAllWindows()

d = [datetime.datetime.now()]
dts = [ts.strftime(&quot;%A %d %B %Y %I:%M:%S%p&quot;) for ts in d]
export_data = zip_longest(*[dts, empty, empty1], fillvalue='')

with open('Log.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow((&quot;End Time&quot;, &quot;In&quot;, &quot;Out&quot;))
    writer.writerows(export_data)


run()

score 0 · Answer 1 · answered Jun 15 '24 at 21:45

Why not use a more advanced and simpler model?

Here, take a look at this code:

from ultralytics import YOLO
import cv2
load the model (it will download the weights automatically)
model = YOLO('yolov8m.pt')
open webcam
cap = cv2.VideoCapture(0)
while True:
    # read frame from webcam
ret, frame = cap.read()
if not ret:
    break

# show the image as a mirror 
frame = cv2.flip(frame, 1)

results = model.predict(
    source=frame,
    conf=0.25             #cut off confindent
)
# coco class names
classes_dict = model.names
for i in range(len(results)):
    boxes = results[i].boxes.xyxy
    boxes = boxes.cuda().cpu().to(&quot;cpu&quot;).numpy()

    confs = results[i].boxes.conf
    confs = confs.cuda().cpu().to(&quot;cpu&quot;).numpy()

    classes = results[i].boxes.cls
    classes = classes.cuda().cpu().to(&quot;cpu&quot;).numpy()


    # draw bounding boxes on the image
    for i, box in enumerate(boxes):
        x1, y1, x2, y2 = box
        w, h = x2 - x1, y2 - y1
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
        cv2.putText(frame, f'{classes_dict[int(classes[i])]}: {confs[i]:.2f}', (int(x1), int(y1)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

cv2.imshow('frame', frame)

if cv2.waitKey(1) &amp; 0xFF == ord('q'):
    break

I snapped a photo of me holding a book, which you can see below:

It works perfectly well on my laptop in real-time (30 FPS), which I consider to be pretty good.

Tracking people problem

1 Answers1

load the model (it will download the weights automatically)

open webcam