Using Python script to track moving objects

Q: Is it possible to control my Full-HD Pan&Tilt camera with a Python script so that it follows moving objects?

A: You can use the Python library Requests to send cgi commands to trigger a pan or tilt movement to your camera.

In the example, we use the face recognition library Face Recognition to locate a face in the live image of the camera. Then the camera is panned so that the face is in the centre of the image.

import face_recognition 
import cv2
import requests
import argparse


def move(direction,ip,user,pw,speed=0):
    #es ist denkbar, bei grossen Abweichungen groessere Geschwindigkeiten zu waehlen um schneller die richtige Position zu finden
    #in diesem einfachen Beispiel verzichten wir darauf
    r = requests.get("http://"+username+":"+pw+"@"+ip_address+"/ptzctrl.cgi?-step=&-act="+direction+"&-speed="+str(speed))
    r = requests.get("http://"+username+":"+pw+"@"+ip_address+"/ptzctrl.cgi?-step=&-act=stop")

def get_boxes(frame):
    #'cnn' gibt bessere Ergebnisse, braucht aber eine Grafikkarte. Fuer Berechnungen auf der CPU: model='hog' 
    #der Inhalt dieser Funktion kann ersetzt werden, um andere Objekte zu detektieren, solange Bounding Boxen im Format (top,right,bottom,left) zurueckgegeben werden
    return face_recognition.face_locations(frame,model='cnn')


if __name__=="__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("-ip","--IP",required=True)
    ap.add_argument("-u","--user",required=True)
    ap.add_argument("-p","--password",required=True)
    ap.add_argument("-rtsp","--rtsp_port",default=554)
    ap.add_argument("-res","--resolution",default=12)
    ap.add_argument("-show","--show",type=int,default=1)
    args = vars(ap.parse_args())

    username = args["user"]
    pw = args["password"]
    ip_address = args["IP"]

    show = bool(args["show"])

    #Wie viele Pixel Abweichung von der Bildmitte werden toleriert
    box_tolerance = 20

    capture = cv2.VideoCapture("rtsp://"+username+":"+pw+"@"+ip_address+":"+str(args["rtsp_port"])+"/"+str(args["resolution"]))
    
    while True:
        ret,frame = capture.read()
        h,w,c = frame.shape
        midX_Soll = w/2
        midY_Soll = h/2

        boxes = get_boxes(frame)

        if show:
            for (top,right,bottom,left) in boxes:
                cv2.rectangle(frame,(left,top),(right,bottom),(0,255,0),2)
            cv2.imshow("",frame)
            cv2.waitKey(1)

        while boxes:
            midX = (boxes[0][1]+boxes[0][3])/2
            midY = (boxes[0][2] + boxes[0][0])/2

            if (midX-midX_Soll)<(-box_tolerance):
                move("left",ip_address,username,pw)
            elif midX-midX_Soll>box_tolerance:
                move("right",ip_address,username,pw)
            if midY - midY_Soll< (-box_tolerance):
                move("up",ip_address,username,pw)
            elif midY - midY_Soll> box_tolerance:
                move("down",ip_address,username,pw)
            ret,frame = capture.read()
            boxes = get_boxes(frame)
    
            if show:
                for (top,right,bottom,left) in boxes:
                    cv2.rectangle(frame,(left,top),(right,bottom),(0,255,0),2)
                cv2.imshow("",frame)
                cv2.waitKey(1)