From 74c39419b205d245102e66c7ab1b58de6307996b Mon Sep 17 00:00:00 2001 From: Mengyuan Liu Date: Mon, 12 Aug 2024 22:12:48 -0700 Subject: [PATCH] feat(hand-recognition) --- Makefile | 4 + config.js | 4 + react/features/base/config/configType.ts | 1 + react/features/base/config/functions.any.ts | 10 ++ .../face-landmarks/FaceLandmarksDetector.ts | 27 ++++- .../face-landmarks/FaceLandmarksHelper.ts | 112 ++++++++++++++---- react/features/face-landmarks/constants.ts | 17 ++- .../face-landmarks/faceLandmarksWorker.ts | 5 +- react/features/face-landmarks/types.ts | 1 + 9 files changed, 148 insertions(+), 33 deletions(-) diff --git a/Makefile b/Makefile index 68a06923e6cf..c92b0f6f2ea8 100644 --- a/Makefile +++ b/Makefile @@ -108,6 +108,10 @@ deploy-face-landmarks: $(FACE_MODELS_DIR)/blazeface-front.json \ $(FACE_MODELS_DIR)/emotion.bin \ $(FACE_MODELS_DIR)/emotion.json \ + $(FACE_MODELS_DIR)/handlandmark-lite.bin \ + $(FACE_MODELS_DIR)/handlandmark-lite.json \ + $(FACE_MODELS_DIR)/handtrack.bin \ + $(FACE_MODELS_DIR)/handtrack.json \ $(DEPLOY_DIR) deploy-css: diff --git a/config.js b/config.js index 741f047bb11a..4f80e2f442d4 100644 --- a/config.js +++ b/config.js @@ -248,6 +248,10 @@ var config = { // // Specifies whether there is a notification when you are the next speaker in line. // disableNextSpeakerNotification: false, + // // Specifies whether the raised hand recognition through the video stream will be enabled. + // // The FaceLandmarks configuration needs to be enabled for this to work. + // disableRaisedHandRecognition: false, + // // Specifies whether the raised hand will hide when someone becomes a dominant speaker or not. // disableRemoveRaisedHandOnFocus: false, // }, diff --git a/react/features/base/config/configType.ts b/react/features/base/config/configType.ts index 6a183e8ca9cc..8cd3772edeec 100644 --- a/react/features/base/config/configType.ts +++ b/react/features/base/config/configType.ts @@ -491,6 +491,7 @@ export interface IConfig { disableLowerHandByModerator?: boolean; disableLowerHandNotification?: boolean; disableNextSpeakerNotification?: boolean; + disableRaisedHandRecognition?: boolean; disableRemoveRaisedHandOnFocus?: boolean; }; readOnlyName?: boolean; diff --git a/react/features/base/config/functions.any.ts b/react/features/base/config/functions.any.ts index 918b9eb7b970..edc0d0c21a86 100644 --- a/react/features/base/config/functions.any.ts +++ b/react/features/base/config/functions.any.ts @@ -122,6 +122,16 @@ export function getDisableNextSpeakerNotification(state: IReduxState) { return state['features/base/config']?.raisedHands?.disableNextSpeakerNotification || false; } +/** + * Selector used to get the disableRaisedHandRecognition. + * + * @param {Object} state - The global state. + * @returns {boolean} + */ +export function getDisableRaisedHandRecognition(state: IReduxState) { + return Boolean(state['features/base/config']?.raisedHands?.disableRaisedHandRecognition); +} + /** * Selector used to get the endpoint used for fetching the recording. * diff --git a/react/features/face-landmarks/FaceLandmarksDetector.ts b/react/features/face-landmarks/FaceLandmarksDetector.ts index 3c53cdef812e..94d3678383c3 100644 --- a/react/features/face-landmarks/FaceLandmarksDetector.ts +++ b/react/features/face-landmarks/FaceLandmarksDetector.ts @@ -1,7 +1,10 @@ import 'image-capture'; import './createImageBitmap'; import { IStore } from '../app/types'; +import { getDisableRaisedHandRecognition } from '../base/config/functions.any'; import { isMobileBrowser } from '../base/environment/utils'; +import { raiseHand } from '../base/participants/actions'; +import { getLocalParticipant, hasRaisedHand } from '../base/participants/functions'; import { getLocalVideoTrack } from '../base/tracks/functions'; import { getBaseUrl } from '../base/util/helpers'; @@ -11,12 +14,13 @@ import { newFaceBox } from './actions'; import { + DETECT, DETECTION_TYPES, - DETECT_FACE, FACE_LANDMARKS_DETECTION_ERROR_THRESHOLD, INIT_WORKER, NO_DETECTION, NO_FACE_DETECTION_THRESHOLD, + RAISED_HAND_DURATION, WEBHOOK_SEND_TIME_INTERVAL } from './constants'; import { @@ -35,6 +39,7 @@ class FaceLandmarksDetector { private worker: Worker | null = null; private lastFaceExpression: string | null = null; private lastFaceExpressionTimestamp: number | null = null; + private lastRaisedHandTimestamp: number | null = null; private webhookSendInterval: number | null = null; private detectionInterval: number | null = null; private recognitionActive = false; @@ -107,8 +112,21 @@ class FaceLandmarksDetector { workerUrl = window.URL.createObjectURL(workerBlob); this.worker = new Worker(workerUrl, { name: 'Face Landmarks Worker' }); this.worker.onmessage = ({ data }: MessageEvent) => { - const { faceExpression, faceBox, faceCount } = data; + const { faceExpression, faceBox, faceCount, raisedHand } = data; const messageTimestamp = Date.now(); + const localParticipant = getLocalParticipant(getState()); + + if (raisedHand && !hasRaisedHand(localParticipant)) { + if (!this.lastRaisedHandTimestamp) { + this.lastRaisedHandTimestamp = messageTimestamp; + } + if (messageTimestamp - this.lastRaisedHandTimestamp >= RAISED_HAND_DURATION) { + dispatch(raiseHand(true)); + this.lastRaisedHandTimestamp = null; + } + } else { + this.lastRaisedHandTimestamp = null; + } // if the number of faces detected is different from 1 we do not take into consideration that detection if (faceCount !== 1) { @@ -155,7 +173,8 @@ class FaceLandmarksDetector { const { faceLandmarks } = state['features/base/config']; const detectionTypes = [ faceLandmarks?.enableFaceCentering && DETECTION_TYPES.FACE_BOX, - faceLandmarks?.enableFaceExpressionsDetection && DETECTION_TYPES.FACE_EXPRESSIONS + faceLandmarks?.enableFaceExpressionsDetection && DETECTION_TYPES.FACE_EXPRESSIONS, + !getDisableRaisedHandRecognition(getState()) && DETECTION_TYPES.RAISED_HAND ].filter(Boolean); this.worker.postMessage({ @@ -341,7 +360,7 @@ class FaceLandmarksDetector { } this.worker.postMessage({ - type: DETECT_FACE, + type: DETECT, image, threshold: faceCenteringThreshold }); diff --git a/react/features/face-landmarks/FaceLandmarksHelper.ts b/react/features/face-landmarks/FaceLandmarksHelper.ts index 015f0850265f..0a0cc9c48123 100644 --- a/react/features/face-landmarks/FaceLandmarksHelper.ts +++ b/react/features/face-landmarks/FaceLandmarksHelper.ts @@ -1,13 +1,19 @@ import { setWasmPaths } from '@tensorflow/tfjs-backend-wasm'; -import { Config, FaceResult, Human } from '@vladmandic/human'; - -import { DETECTION_TYPES, FACE_DETECTION_SCORE_THRESHOLD, FACE_EXPRESSIONS_NAMING_MAPPING } from './constants'; +import { Config, FaceResult, HandResult, Human } from '@vladmandic/human'; + +import { + DETECTION_TYPES, + FACE_DETECTION_SCORE_THRESHOLD, + FACE_EXPRESSIONS_NAMING_MAPPING, + HAND_DETECTION_SCORE_THRESHOLD +} from './constants'; import { DetectInput, DetectOutput, FaceBox, FaceExpression, InitInput } from './types'; export interface IFaceLandmarksHelper { detect: ({ image, threshold }: DetectInput) => Promise; getDetectionInProgress: () => boolean; - getDetections: (image: ImageBitmap | ImageData) => Promise>; + getDetections: (image: ImageBitmap | ImageData) => + Promise<{ faceDetections: FaceResult[]; handDetections: HandResult[]; }>; getFaceBox: (detections: Array, threshold: number) => FaceBox | undefined; getFaceCount: (detections: Array) => number; getFaceExpression: (detections: Array) => FaceExpression | undefined; @@ -19,7 +25,7 @@ export interface IFaceLandmarksHelper { */ export class HumanHelper implements IFaceLandmarksHelper { protected human: Human | undefined; - protected faceDetectionTypes: string[]; + protected detectionTypes: string[]; protected baseUrl: string; private detectionInProgress = false; private lastValidFaceBox: FaceBox | undefined; @@ -52,7 +58,17 @@ export class HumanHelper implements IFaceLandmarksHelper { }, description: { enabled: false } }, - hand: { enabled: false }, + hand: { + enabled: false, + rotation: false, + maxDetected: 1, + detector: { + modelPath: 'handtrack.json' + }, + skeleton: { + modelPath: 'handlandmark-lite.json' + } + }, gesture: { enabled: false }, body: { enabled: false }, segmentation: { enabled: false } @@ -65,7 +81,7 @@ export class HumanHelper implements IFaceLandmarksHelper { * @returns {void} */ constructor({ baseUrl, detectionTypes }: InitInput) { - this.faceDetectionTypes = detectionTypes; + this.detectionTypes = detectionTypes; this.baseUrl = baseUrl; this.init(); } @@ -85,18 +101,24 @@ export class HumanHelper implements IFaceLandmarksHelper { setWasmPaths(this.baseUrl); } - if (this.faceDetectionTypes.length > 0 && this.config.face) { + if ((this.detectionTypes.includes(DETECTION_TYPES.FACE_BOX) + || this.detectionTypes.includes(DETECTION_TYPES.FACE_EXPRESSIONS)) + && this.config.face) { this.config.face.enabled = true; } - if (this.faceDetectionTypes.includes(DETECTION_TYPES.FACE_BOX) && this.config.face?.detector) { + if (this.detectionTypes.includes(DETECTION_TYPES.FACE_BOX) && this.config.face?.detector) { this.config.face.detector.enabled = true; } - if (this.faceDetectionTypes.includes(DETECTION_TYPES.FACE_EXPRESSIONS) && this.config.face?.emotion) { + if (this.detectionTypes.includes(DETECTION_TYPES.FACE_EXPRESSIONS) && this.config.face?.emotion) { this.config.face.emotion.enabled = true; } + if (this.detectionTypes.includes(DETECTION_TYPES.RAISED_HAND) && this.config.hand) { + this.config.hand.enabled = true; + } + const initialHuman = new Human(this.config); try { @@ -159,6 +181,34 @@ export class HumanHelper implements IFaceLandmarksHelper { } } + /** + * Check whether the hand is raised from the hand detection result. + * + * @param {Array} handDetections - The array with the hand detections. + * @returns {boolean} + */ + isRaisedHand(handDetections: Array): boolean { + // Only take the fingers with the hand of the max confidence score + const [ { landmarks: fingers = undefined, label: handLabel = undefined } = {} ] = handDetections; + + if (handLabel !== 'hand') { + return false; + } + + const validDirections = [ 'verticalUp', 'diagonalUpRight', 'diagonalUpLeft' ]; + let counter = 0; + + if (fingers) { + Object.values(fingers).forEach(value => { + if (value.curl === 'none' && validDirections.includes(value.direction)) { + counter += 1; + } + }); + } + + return counter > 3; + } + /** * Gets the face count from the detections, which is the number of detections. * @@ -178,21 +228,29 @@ export class HumanHelper implements IFaceLandmarksHelper { * * @param {ImageBitmap | ImageData} image - The image captured from the track, * if OffscreenCanvas available it will be ImageBitmap, otherwise it will be ImageData. - * @returns {Promise>} + * @returns {Promise<{ faceDetections: Array, handDetections: Array }>} */ - async getDetections(image: ImageBitmap | ImageData): Promise> { - if (!this.human || !this.faceDetectionTypes.length) { - return []; + async getDetections(image: ImageBitmap | ImageData): + Promise<{ faceDetections: Array; handDetections: Array; } > { + if (!this.human || !this.detectionTypes.length) { + return { faceDetections: [], + handDetections: [] }; } this.human.tf.engine().startScope(); const imageTensor = this.human.tf.browser.fromPixels(image); - const { face: detections } = await this.human.detect(imageTensor, this.config); + const { face: faceDetections, hand: handDetections } = await this.human.detect(imageTensor, this.config); this.human.tf.engine().endScope(); - return detections.filter(detection => detection.score > FACE_DETECTION_SCORE_THRESHOLD); + const faceDetection = faceDetections.filter(detection => detection.score > FACE_DETECTION_SCORE_THRESHOLD); + const handDetection = handDetections.filter(detection => detection.score > HAND_DETECTION_SCORE_THRESHOLD); + + return { + faceDetections: faceDetection, + handDetections: handDetection + }; } /** @@ -204,19 +262,20 @@ export class HumanHelper implements IFaceLandmarksHelper { public async detect({ image, threshold }: DetectInput): Promise { let faceExpression; let faceBox; + let raisedHand; this.detectionInProgress = true; - const detections = await this.getDetections(image); + const { faceDetections, handDetections } = await this.getDetections(image); - if (this.faceDetectionTypes.includes(DETECTION_TYPES.FACE_EXPRESSIONS)) { - faceExpression = this.getFaceExpression(detections); + if (this.detectionTypes.includes(DETECTION_TYPES.FACE_EXPRESSIONS)) { + faceExpression = this.getFaceExpression(faceDetections); } - if (this.faceDetectionTypes.includes(DETECTION_TYPES.FACE_BOX)) { + if (this.detectionTypes.includes(DETECTION_TYPES.FACE_BOX)) { // if more than one face is detected the face centering will be disabled. - if (this.getFaceCount(detections) > 1) { - this.faceDetectionTypes.splice(this.faceDetectionTypes.indexOf(DETECTION_TYPES.FACE_BOX), 1); + if (this.getFaceCount(faceDetections) > 1) { + this.detectionTypes.splice(this.detectionTypes.indexOf(DETECTION_TYPES.FACE_BOX), 1); // face-box for re-centering faceBox = { @@ -225,17 +284,22 @@ export class HumanHelper implements IFaceLandmarksHelper { width: 100 }; } else { - faceBox = this.getFaceBox(detections, threshold); + faceBox = this.getFaceBox(faceDetections, threshold); } } + if (this.detectionTypes.includes(DETECTION_TYPES.RAISED_HAND)) { + raisedHand = this.isRaisedHand(handDetections); + } + this.detectionInProgress = false; return { faceExpression, faceBox, - faceCount: this.getFaceCount(detections) + faceCount: this.getFaceCount(faceDetections), + raisedHand }; } diff --git a/react/features/face-landmarks/constants.ts b/react/features/face-landmarks/constants.ts index 81d4cff7f6dd..57fd5848acc1 100644 --- a/react/features/face-landmarks/constants.ts +++ b/react/features/face-landmarks/constants.ts @@ -26,6 +26,11 @@ export const FACE_EXPRESSIONS_NAMING_MAPPING = { */ export const WEBHOOK_SEND_TIME_INTERVAL = 15000; +/** + * Time is ms used for checking raised hand duration. + */ +export const RAISED_HAND_DURATION = 2000; + /** * Type of message sent from main thread to worker that contains init information: * such as models directory and window screen size. @@ -49,16 +54,17 @@ export const SEND_IMAGE_INTERVAL_MS = 1000; /** * Type of message sent from main thread to worker that contain image data and - * will trigger a response message from the worker containing the detected face(s) info. + * will trigger a response message from the worker containing the detected info. */ -export const DETECT_FACE = 'DETECT_FACE'; +export const DETECT = 'DETECT'; /** * Available detection types. */ export const DETECTION_TYPES = { FACE_BOX: 'face-box', - FACE_EXPRESSIONS: 'face-expressions' + FACE_EXPRESSIONS: 'face-expressions', + RAISED_HAND: 'raised-hand' }; /** @@ -66,6 +72,11 @@ export const DETECTION_TYPES = { */ export const FACE_DETECTION_SCORE_THRESHOLD = 0.75; +/** + * Threshold for detection score of hand. + */ +export const HAND_DETECTION_SCORE_THRESHOLD = 0.8; + /** * Threshold for stopping detection after a certain number of consecutive errors have occurred. */ diff --git a/react/features/face-landmarks/faceLandmarksWorker.ts b/react/features/face-landmarks/faceLandmarksWorker.ts index a71c7f005288..9d4dfe496201 100644 --- a/react/features/face-landmarks/faceLandmarksWorker.ts +++ b/react/features/face-landmarks/faceLandmarksWorker.ts @@ -1,15 +1,16 @@ import { HumanHelper, IFaceLandmarksHelper } from './FaceLandmarksHelper'; -import { DETECT_FACE, INIT_WORKER } from './constants'; +import { DETECT, INIT_WORKER } from './constants'; let helper: IFaceLandmarksHelper; onmessage = async function({ data }: MessageEvent) { switch (data.type) { - case DETECT_FACE: { + case DETECT: { if (!helper || helper.getDetectionInProgress()) { return; } + // detections include both face detections and hand detections const detections = await helper.detect(data); if (detections) { diff --git a/react/features/face-landmarks/types.ts b/react/features/face-landmarks/types.ts index 876754eb2896..0b48f5bcaf5b 100644 --- a/react/features/face-landmarks/types.ts +++ b/react/features/face-landmarks/types.ts @@ -20,6 +20,7 @@ export type DetectOutput = { faceBox?: FaceBox; faceCount: number; faceExpression?: FaceExpression; + raisedHand?: boolean; }; export type FaceExpression = {