1+ import os
12import tensorflow as tf
23from object_detection .utils import config_util
3- import os
44from object_detection .utils import label_map_util
55from object_detection .utils import visualization_utils as viz_utils
66from object_detection .builders import model_builder
7- import cv2
7+ import cv2
88import numpy as np
9+ import keyboard
910from playsound import playsound
1011
1112WORKSPACE_PATH = 'RealTimeObjectDetection/Tensorflow/workspace'
1718PRETRAINED_MODEL_PATH = WORKSPACE_PATH + '/pre-trained-models'
1819CONFIG_PATH = MODEL_PATH + '/my_ssd_mobnet/pipeline.config'
1920CHECKPOINT_PATH = MODEL_PATH + '/my_ssd_mobnet/'
20- CUSTOM_MODEL_NAME = 'my_ssd_mobnet'
21+ CUSTOM_MODEL_NAME = 'my_ssd_mobnet'
2122CONFIG_PATH = MODEL_PATH + '/' + CUSTOM_MODEL_NAME + '/pipeline.config'
2223
2324config = config_util .get_configs_from_pipeline_file (CONFIG_PATH )
3031height = int (cap .get (cv2 .CAP_PROP_FRAME_HEIGHT ))
3132print ("camera created" )
3233
34+ def read_label_map (label_map_path ):
35+ item_id = None
36+ item_name = None
37+ items = {}
38+
39+ with open (label_map_path , "r" ) as file :
40+ for line in file :
41+ line .replace (" " , "" )
42+ if line == "item{" :
43+ pass
44+ elif line == "}" :
45+ pass
46+ elif "id" in line :
47+ item_id = int (line .split (":" , 1 )[1 ].strip ())- 1
48+ elif "name" in line :
49+ item_name = line .split (":" , 1 )[1 ].replace ("'" , "" ).strip ()
50+
51+ if item_id is not None and item_name is not None :
52+ items [item_id ] = item_name
53+ item_id = None
54+ item_name = None
55+
56+ return items
57+
3358ckpt = tf .compat .v2 .train .Checkpoint (model = detection_model )
3459ckpt .restore (os .path .join (CHECKPOINT_PATH , 'ckpt-6' )).expect_partial ()
3560category_index = label_map_util .create_category_index_from_labelmap (ANNOTATION_PATH + '/label_map.pbtxt' )
36- print ("checkpoints reached" )
61+ label_map = read_label_map (ANNOTATION_PATH + '/label_map.pbtxt' )
62+
63+ class myWords :
64+ wordList = []
65+ modelRunning = 0
66+
67+ def addWord (self , word ):
68+ self .wordList .append (word )
69+ def resetList (self ):
70+ self .wordList = []
3771
3872def getCamera ():
3973 return cap
@@ -45,12 +79,14 @@ def detect_fn(image):
4579 detections = detection_model .postprocess (prediction_dict , shapes )
4680 return detections
4781
48- def activateModel ():
82+ def activateModel (m : myWords ):
83+ oldWord = ''
84+
4985 while True :
5086 ret , frame = cap .read ()
51- frame = cv2 .flip (frame , 1 )
87+ frame2 = cv2 .flip (frame , 1 )
5288
53- image_np = np .array (frame )
89+ image_np = np .array (frame2 )
5490 input_tensor = tf .convert_to_tensor (np .expand_dims (image_np , 0 ), dtype = tf .float32 )
5591 detections = detect_fn (input_tensor )
5692 num_detections = int (detections .pop ('num_detections' ))
@@ -60,48 +96,59 @@ def activateModel():
6096 detections ['detection_classes' ] = detections ['detection_classes' ].astype (np .int64 )
6197 label_id_offset = 1
6298 image_np_with_detections = image_np .copy ()
63-
6499 res = [(i , j ) for i , j in zip (detections ['detection_classes' ], detections ['detection_scores' ])]
100+
65101 for i in res :
66102 if i [1 ] >= .50 :
67103 gesture_id = int (i [0 ])
68- if gesture_id == 0 :
69- playsound ("RealTimeObjectDetection/hello_en.mp3" )
70- if gesture_id == 1 :
71- playsound ("RealTimeObjectDetection/help_en.mp3" )
72- if gesture_id == 2 :
73- playsound ("RealTimeObjectDetection/yes_en.mp3" )
74- if gesture_id == 3 :
75- playsound ("RealTimeObjectDetection/no_en.mp3" )
76- if gesture_id == 4 :
77- playsound ("RealTimeObjectDetection/i_en.mp3" )
78- if gesture_id == 5 :
79- playsound ("RealTimeObjectDetection/i_love_you_en.mp3" )
80- if gesture_id == 6 :
81- playsound ("RealTimeObjectDetection/stand_en.mp3" )
82- if gesture_id == 7 :
83- playsound ("RealTimeObjectDetection/telephone_en.mp3" )
84- if gesture_id == 8 :
85- playsound ("RealTimeObjectDetection/mom_en.mp3" )
86- if gesture_id == 9 :
87- playsound ("RealTimeObjectDetection/thank_you_en.mp3" )
104+
105+ newWord = label_map .get (gesture_id )
106+ if oldWord != newWord :
107+ m .addWord (newWord )
108+ oldWord = newWord
109+
110+
111+ # if gesture_id == 0:
112+ # playsound("hello_en.mp3")
113+ # if gesture_id == 1:
114+ # playsound("help_en.mp3")
115+ # if gesture_id == 2:
116+ # playsound("yes_en.mp3")
117+ # if gesture_id == 3:
118+ # playsound("no_en.mp3")
119+ # if gesture_id == 4:
120+ # playsound("i_en.mp3")
121+ # if gesture_id == 5:
122+ # playsound("i_love_you_en.mp3")
123+ # if gesture_id == 6:
124+ # playsound("stand_en.mp3")
125+ # if gesture_id == 7:
126+ # playsound("telephone_en.mp3")
127+ # if gesture_id == 8:
128+ # playsound("mom_en.mp3")
129+ # if gesture_id == 9:
130+ # playsound("thank_you_en.mp3")
88131
89132 viz_utils .visualize_boxes_and_labels_on_image_array (
90- image_np_with_detections ,
91- detections ['detection_boxes' ],
92- detections ['detection_classes' ]+ label_id_offset ,
93- detections ['detection_scores' ],
94- category_index ,
95- use_normalized_coordinates = True ,
96- max_boxes_to_draw = 5 ,
97- min_score_thresh = .5 ,
98- agnostic_mode = False )
133+ image_np_with_detections ,
134+ detections ['detection_boxes' ],
135+ detections ['detection_classes' ] + label_id_offset ,
136+ detections ['detection_scores' ],
137+ category_index ,
138+ use_normalized_coordinates = True ,
139+ max_boxes_to_draw = 5 ,
140+ min_score_thresh = .5 ,
141+ agnostic_mode = False )
99142
100143 # cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))
101144 ret , buffer = cv2 .imencode ('.jpg' , image_np_with_detections )
102- frame = buffer .tobytes ()
103- yield (b'--frame\r \n ' b'Content-Type: image/jpeg\r \n \r \n ' + frame + b'\r \n ' )
145+ frame3 = buffer .tobytes ()
146+ yield (b'--frame\r \n ' b'Content-Type: image/jpeg\r \n \r \n ' + frame3 + b'\r \n ' )
104147
105- if cv2 .waitKey (1 ) & 0xFF == ord ('q' ):
106- cap .release ()
107- break
148+ try :
149+ if keyboard .is_pressed ('q' ):
150+ print ("exiting model" )
151+ cap .release ()
152+ return m
153+ except :
154+ pass
0 commit comments