Normally, we use computer mouse but we can use it with our hand remotely.
In Python, this is possible by using some libraries such as mediapipe, pyautogui and opencv. Here, mediapipe provides us to track our hand points that we use for actions which are left, right and double clicks.
pyautogui is used for mouse events which are moving and click actions. But the project can be optimized well.
from concurrent.futures import ThreadPoolExecutor
from math import sqrt
import cv2
import mediapipe as mp
import psutil
from entities.mouse import Mouse
from win32api import GetSystemMetrics
grabSize = 50
pool = ThreadPoolExecutor(max_workers=psutil.cpu_count())
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
mp_draw = mp.solutions.drawing_utils
mouse = Mouse(GetSystemMetrics(0), GetSystemMetrics(1))
def resolution(cap, w, h):
cap.set(3, w)
cap.set(4, h)
def isExist(value):
return bool(value)
def getDistance(a, b, hands_landmarks, scale=False, w=0, h=0):
dx = hands_landmarks[0].landmark[a].x - hands_landmarks[0].landmark[b].x
dy = hands_landmarks[0].landmark[a].y - hands_landmarks[0].landmark[b].y
if (not scale):
return sqrt(dx ** 2 + dy ** 2)
else:
return sqrt((dx * w) ** 2 + (dy * h) ** 2)
def draw(frame, hands_landmarks):
h, w, c = frame.shape
distance_three_to_seventeen = round(getDistance(3, 17, hands_landmarks, True, w, h))
distance_zero_to_ten = round(getDistance(0, 10, hands_landmarks, True, w, h))
distance_left = getDistance(4, 8, hands_landmarks)
distance_double_click = getDistance(4, 12, hands_landmarks)
distance_point = getDistance(9, 0, hands_landmarks)
distance_right = getDistance(16, 4, hands_landmarks)
distance_width = getDistance(4, 20, hands_landmarks)
distance_height = getDistance(0, 12, hands_landmarks)
wrapped_frame = frame[int(distance_zero_to_ten):int(h - distance_zero_to_ten),
int(distance_three_to_seventeen):int(w - distance_three_to_seventeen)]
wh, ww, wc = wrapped_frame.shape
wx = w * (hands_landmarks[0].landmark[0].x + hands_landmarks[0].landmark[9].x) / 2 - distance_three_to_seventeen
wy = h * (hands_landmarks[0].landmark[0].y + hands_landmarks[0].landmark[9].y) / 2 - distance_zero_to_ten
if (wx < 0):
wx = 1
if (wy < 0):
wy = 1
try:
x = round((wx) * (mouse._swidth / (ww)))
y = round((wy) * (mouse._sheight / (wh)))
except ZeroDivisionError:
return
mouse.move(mouse._swidth - x + (0 if (mouse._swidth - x) < mouse._swidth / 2 else distance_width), y +
(0 if (mouse._sheight - y) < mouse._sheight / 2 else distance_height))
if (distance_left * 100 < distance_point * 100 / 5):
mouse.click()
elif (distance_right * 100 < distance_point * 100 / 5):
mouse.click(button="right")
elif (distance_double_click * 100 < distance_point * 100 / 5):
mouse.click(button="double")
def start(cam_code=0):
cap = cv2.VideoCapture(cam_code)
while (True):
ret, frame = cap.read()
if not ret:
print("failed to grab frame")
break
frame = cv2.GaussianBlur(frame, (3, 3), 0)
grappedImage = frame[grabSize:frame.shape[0] - grabSize, grabSize:frame.shape[1] - grabSize]
rgb_img = cv2.cvtColor(grappedImage, cv2.COLOR_BGR2RGB)
results = hands.process(rgb_img)
hands_landmarks = results.multi_hand_landmarks
if (isExist(hands_landmarks)):
try:
cv2.imshow("hand", grappedImage[int(grappedImage.shape[0] * hands_landmarks[0].landmark[12].y):
int(grappedImage.shape[0] * hands_landmarks[0].landmark[12].y + int(
grappedImage.shape[0] * hands_landmarks[0].landmark[0].y) - int(
grappedImage.shape[0] * hands_landmarks[0].landmark[12].y)),
int(grappedImage.shape[1] * hands_landmarks[0].landmark[20].x):int(
grappedImage.shape[1] * hands_landmarks[0].landmark[20].x) + int(
grappedImage.shape[1] * hands_landmarks[0].landmark[4].x) - int(
grappedImage.shape[1] * hands_landmarks[0].landmark[20].x)])
except:
pass;
pool.submit(draw, grappedImage, hands_landmarks)
cv2.imshow("Hand tracer", grappedImage)
if (cv2.waitKey(1) == ord('q')):
break
cap.release()
cv2.destroyAllWindows()
cap.py is used for grabbing frames. Frames contains the values of pixels’ color which is RGB format and hands_landmarks contains our hands and its’ points. Thread pool is used here to make the render time fast but maybe, this is not good solution so we can improve this.
pool = ThreadPoolExecutor(max_workers=psutil.cpu_count())
The pool size of threads was specified this according to gpu’s power.
Euclidean distance is very important here because I used it to calculate the distance between hand marks. This is for mouse actions.
if (distance_left * 100 < distance_point * 100 / 5):
mouse.click()
elif (distance_right * 100 < distance_point * 100 / 5):
mouse.click(button="right")
elif (distance_double_click * 100 < distance_point * 100 / 5):
mouse.click(button="double")
I specified a technique to make a decision. On the screen, the position of mouse is specified by scaling from hand point calculated to our computer screen position.
import pyautogui,time,threading
class Mouse():
def __init__(self,screen_width,screen_height):
self._swidth=screen_width;
self._sheight=screen_height;
def move(self,x,y):
if(y>0 and x>0 and y<self._sheight and x<self._swidth):
pyautogui.moveTo((x,y))
def start_click(self,button):
if(self._swidth>pyautogui.position()[0] and pyautogui.position()[1]<self._sheight):
if(button=="left"):
pyautogui.click(pyautogui.position()[0],pyautogui.position()[1])
elif(button=="right"):
pyautogui.click(pyautogui.position()[0],pyautogui.position()[1],button="right")
else:
pyautogui.doubleClick(pyautogui.position()[0],pyautogui.position()[1])
def click(self,button="left"):
time.sleep(0.15)
thread=threading.Thread(target=self.start_click,args=(button,),daemon=False)
thread.start();
def move(self,x,y):
if(y>0 and x>0 and y<self._sheight and x<self._swidth):
pyautogui.moveTo((x,y))
The position taken from frame is moved there. We can see the other actions such as left and right click.
def cam_control(code):
cap=__import__("cv2").VideoCapture(code);
res,frame=cap.read()
__import__("cv2").imshow("test",frame)
__import__("cv2").destroyAllWindows();
cap.release();
You can check if your camera is working
from core.cap import start
if __name__ == '__main__':
start(0)
Finally, we can start the application. The application can be improved because there are some problem such as clicking itself or being difficult to click in some positions.
Python Computer Vision - Mouse Control Remotely
I showed you that It is possible to use mouse with olur handa. There is also a different choice that lt is eyes with…
Example Video:
https://www.youtube.com/shorts/8_0IBCoxzPg
https://github.com/gokhanergen-tech/Remote-Computer-Control-With-Hand
Top comments (0)