Merge pull request #70 from SkalskiP/develop

1.5.0-alpha relese
SkalskiP · Sep 30, 2019 · a91f750 · a91f750
2 parents 97e5742 + 4a309e3
commit a91f750
Show file tree

Hide file tree

Showing 82 changed files with 1,744 additions and 626 deletions.
diff --git a/README.md b/README.md
@@ -21,17 +21,33 @@ Andrew Ng
 ## Sneak Peek
 
 <p align="center"> 
-    <img width="1000" src=".//examples/alfa-demo.gif" alt="alfa-demo">
+    <img width="1000" src=".//examples/demo-base.gif" alt="alfa-demo">
 </p>
 
+**Figure 1.** Basic version of the application - without AI support
+
 ## Advanced AI functionalities
 
-[makesense.ai][1] strives to significantly reduce the time we have to spend on labeling photos. To achieve this, we are going to use many different AI models that will be able to give you recommendations as well as automate repetitive and tedious activities. The first step on this journey is to use a [SSD model][8] pretrained on the [COCO dataset][9], which will do some of the work for you in drawing bboxes on photos and - in future versions of the application - will also suggest a label. We also plan to add, among other things, models that classify photos, detect characteristic features of faces, whole faces, and also human pose. The engine that drives our AI functionalities is [TensorFlow.js][10] - JS version of the most popular framework for training neural networks. This choice allows us not only to speed up your work but also to care about the privacy of your data, because unlike with other commercial and open source tools, your photos do not have to be transferred to the server. This time AI comes to your device!
+[makesense.ai][1] strives to significantly reduce the time we have to spend on labeling photos. To achieve this, we are going to use many different AI models that will be able to give you recommendations as well as automate repetitive and tedious activities.
+
+* [SSD model][8] pretrained on the [COCO dataset][9], which will do some of the work for you in drawing bboxes on photos and  also (in some cases) suggest a label. 
+* [PoseNet model][11] is a vision model that can be used to estimate the pose of a person in an image or video by estimating where key body joints are.
+
+In the future, we also plan to add, among other things, models that classify photos, detect characteristic features of faces as well as whole faces. The engine that drives our AI functionalities is [TensorFlow.js][10] - JS version of the most popular framework for training neural networks. This choice allows us not only to speed up your work but also to care about the privacy of your data, because unlike with other commercial and open source tools, your photos do not have to be transferred to the server. This time AI comes to your device!
 
 <p align="center"> 
-    <img width="1000" src=".//examples/ai-demo.gif" alt="ai-demo">
+    <img width="1000" src=".//examples/demo-ssd.gif" alt="ai-demo">
 </p>
 
+**Figure 2.** SSD model - allows you to detect multiple objects, speeding up the bbox labeling process
+
+
+<p align="center"> 
+    <img width="1000" src=".//examples/demo-posenet.gif" alt="ai-demo">
+</p>
+
+**Figure 3.** PoseNet model - allows you to detect people's poses in photos, automating point labeling in some usecases
+
 ## Set Up the Project Locally
 
 ```bash
@@ -119,3 +135,4 @@ Copyright (c) 2019-present, Piotr Skalski
 [8]: https://arxiv.org/abs/1512.02325
 [9]: http://cocodataset.org
 [10]: https://www.tensorflow.org/js
+[11]: https://www.tensorflow.org/lite/models/pose_estimation/overview
diff --git a/examples/demo-base.gif b/examples/demo-base.gif
diff --git a/examples/demo-base.mp4 b/examples/demo-base.mp4
diff --git a/examples/demo-posenet.gif b/examples/demo-posenet.gif
diff --git a/examples/demo-posenet.mp4 b/examples/demo-posenet.mp4
diff --git a/examples/demo-ssd.gif b/examples/demo-ssd.gif
diff --git a/examples/demo-ssd.mp4 b/examples/demo-ssd.mp4
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -5,6 +5,7 @@
   "dependencies": {
     "@material-ui/core": "^4.1.1",
     "@tensorflow-models/coco-ssd": "^2.0.0",
+    "@tensorflow-models/posenet": "^2.1.3",
     "@tensorflow/tfjs": "^1.2.9",
     "@types/jest": "24.0.14",
     "@types/node": "12.0.8",

diff --git a/public/ico/accept-all.png b/public/ico/accept-all.png
diff --git a/public/ico/checkbox-checked.png b/public/ico/checkbox-checked.png
diff --git a/public/ico/checkbox-unchecked.png b/public/ico/checkbox-unchecked.png
diff --git a/public/ico/left_.png b/public/ico/left_.png
diff --git a/public/ico/reject-all.png b/public/ico/reject-all.png
diff --git a/public/ico/right_.png b/public/ico/right_.png
diff --git a/src/App.scss b/src/App.scss
@@ -2,10 +2,12 @@
 
 .App {
 	--leading-color: #{$secondaryColor};
+	--button-text-color: #{white};
 	--hue-value: 172deg;
 }
 
 .App.AI {
 	--leading-color: #{$primaryColor};
+	--button-text-color: #{$darkThemeSecondColor};
 	--hue-value: 120deg;
 }
diff --git a/src/App.tsx b/src/App.tsx
@@ -16,10 +16,11 @@ import classNames from "classnames";
 interface IProps {
     projectType: ProjectType;
     windowSize: ISize;
-    AIMode: boolean;
+    ObjectDetectorLoaded: boolean;
+    PoseDetectionLoaded: boolean;
 }
 
-const App: React.FC<IProps> = ({projectType, windowSize, AIMode}) => {
+const App: React.FC<IProps> = ({projectType, windowSize, ObjectDetectorLoaded, PoseDetectionLoaded}) => {
     const selectRoute = () => {
         if (!!PlatformModel.mobileDeviceData.manufacturer && !!PlatformModel.mobileDeviceData.os)
             return <MobileMainView/>;
@@ -35,7 +36,7 @@ const App: React.FC<IProps> = ({projectType, windowSize, AIMode}) => {
     };
 
       return (
-        <div className={classNames("App", {"AI": AIMode})}
+        <div className={classNames("App", {"AI": ObjectDetectorLoaded || PoseDetectionLoaded})}
             draggable={false}
         >
             {selectRoute()}
@@ -47,7 +48,8 @@ const App: React.FC<IProps> = ({projectType, windowSize, AIMode}) => {
 const mapStateToProps = (state: AppState) => ({
     projectType: state.general.projectData.type,
     windowSize: state.general.windowSize,
-    AIMode: state.ai.isObjectDetectorLoaded
+    ObjectDetectorLoaded: state.ai.isObjectDetectorLoaded,
+    PoseDetectionLoaded: state.ai.isPoseDetectorLoaded
 });
 
 export default connect(

diff --git a/src/ai/ObjectDetector.ts b/src/ai/ObjectDetector.ts
@@ -1,9 +1,11 @@
 import * as cocoSsd from '@tensorflow-models/coco-ssd';
-import {ObjectDetection} from "@tensorflow-models/coco-ssd";
-import {DetectedObject} from "@tensorflow-models/coco-ssd";
+import {DetectedObject, ObjectDetection} from '@tensorflow-models/coco-ssd';
 import {store} from "../index";
 import {updateObjectDetectorStatus} from "../store/ai/actionCreators";
-import {AIActions} from "../logic/actions/AIActions";
+import {LabelType} from "../data/enums/LabelType";
+import {LabelsSelector} from "../store/selectors/LabelsSelector";
+import {AIObjectDetectionActions} from "../logic/actions/AIObjectDetectionActions";
+import {updateActiveLabelType} from "../store/labels/actionCreators";
 
 export class ObjectDetector {
     private static model: ObjectDetection;
@@ -14,7 +16,9 @@ export class ObjectDetector {
             .then((model: ObjectDetection) => {
                 ObjectDetector.model = model;
                 store.dispatch(updateObjectDetectorStatus(true));
-                AIActions.detectRectsForActiveImage();
+                store.dispatch(updateActiveLabelType(LabelType.RECTANGLE));
+                const activeLabelType: LabelType = LabelsSelector.getActiveLabelType();
+                activeLabelType === LabelType.RECTANGLE && AIObjectDetectionActions.detectRectsForActiveImage();
                 callback && callback();
             })
             .catch((error) => {

diff --git a/src/ai/PoseDetector.ts b/src/ai/PoseDetector.ts
@@ -0,0 +1,49 @@
+import * as posenet from '@tensorflow-models/posenet';
+import {PoseNet} from "@tensorflow-models/posenet";
+import {Pose} from "@tensorflow-models/posenet";
+import {store} from "../index";
+import {updatePoseDetectorStatus} from "../store/ai/actionCreators";
+import {AIPoseDetectionActions} from "../logic/actions/AIPoseDetectionActions";
+import {LabelType} from "../data/enums/LabelType";
+import {LabelsSelector} from "../store/selectors/LabelsSelector";
+import {updateActiveLabelType} from "../store/labels/actionCreators";
+
+export class PoseDetector {
+    private static model: PoseNet;
+
+    public static loadModel(callback?: () => any) {
+        posenet
+            .load({
+                architecture: 'ResNet50',
+                outputStride: 32,
+                inputResolution: 257,
+                quantBytes: 2
+            })
+            .then((model: PoseNet) => {
+                PoseDetector.model = model;
+                store.dispatch(updatePoseDetectorStatus(true));
+                store.dispatch(updateActiveLabelType(LabelType.POINT));
+                const activeLabelType: LabelType = LabelsSelector.getActiveLabelType();
+                activeLabelType === LabelType.POINT && AIPoseDetectionActions.detectPoseForActiveImage();
+                callback && callback();
+            })
+            .catch((error) => {
+                // TODO
+                throw new Error(error);
+            })
+    }
+
+    public static predict(image: HTMLImageElement, callback?: (predictions: Pose[]) => any) {
+        if (!PoseDetector.model) return;
+
+        PoseDetector.model
+            .estimateMultiplePoses(image)
+            .then((predictions: Pose[]) => {
+                callback && callback(predictions)
+            })
+            .catch((error) => {
+                // TODO
+                throw new Error(error);
+            })
+    }
+}
diff --git a/src/data/ImageButtonDropDownData.ts b/src/data/ImageButtonDropDownData.ts
diff --git a/src/data/enums/AIModel.ts b/src/data/enums/AIModel.ts
@@ -0,0 +1,4 @@
+export enum AIModel {
+    OBJECT_DETECTION = "OBJECT_DETECTION",
+    POSE_DETECTION = "POSE_DETECTION"
+}
diff --git a/src/data/enums/PopupWindowType.ts b/src/data/enums/PopupWindowType.ts
@@ -1,8 +1,11 @@
 export enum PopupWindowType {
     LOAD_LABEL_NAMES = "LOAD_LABEL_NAMES",
+    UPDATE_LABEL_NAMES = "UPDATE_LABEL_NAMES",
+    SUGGEST_LABEL_NAMES = "SUGGEST_LABEL_NAMES",
     LOAD_IMAGES = "LOAD_IMAGES",
     LOAD_AI_MODEL = "LOAD_AI_MODEL",
     EXPORT_LABELS = "EXPORT_LABELS",
     INSERT_LABEL_NAMES = 'INSERT_LABEL_NAMES',
-    EXIT_PROJECT = 'EXIT_PROJECT'
+    EXIT_PROJECT = 'EXIT_PROJECT',
+    LOADER = 'LOADER'
 }
diff --git a/src/logic/actions/AIActions.ts b/src/logic/actions/AIActions.ts
@@ -1,53 +1,54 @@
-import {DetectedObject} from "@tensorflow-models/coco-ssd";
-import {ImageData, LabelRect} from "../../store/labels/types";
+import {LabelType} from "../../data/enums/LabelType";
 import {LabelsSelector} from "../../store/selectors/LabelsSelector";
-import uuidv1 from 'uuid/v1';
-import {store} from "../../index";
-import {updateImageDataById} from "../../store/labels/actionCreators";
-import {ObjectDetector} from "../../ai/ObjectDetector";
-import {ImageRepository} from "../imageRepository/ImageRepository";
-import {LabelStatus} from "../../data/enums/LabelStatus";
+import {AIObjectDetectionActions} from "./AIObjectDetectionActions";
+import {AIPoseDetectionActions} from "./AIPoseDetectionActions";
+import {ImageData} from "../../store/labels/types";
 
 export class AIActions {
-    public static detectRectsForActiveImage(): void {
-        const activeImageData: ImageData = LabelsSelector.getActiveImageData();
-        AIActions.detectRects(activeImageData.id, ImageRepository.getById(activeImageData.id))
+    public static excludeRejectedLabelNames(suggestedLabels: string[], rejectedLabels: string[]): string[] {
+        return suggestedLabels.reduce((acc: string[], label: string) => {
+            if (!rejectedLabels.includes(label)) {
+                acc.push(label)
+            }
+            return acc;
+        }, [])
     }
 
-    public static detectRects(imageId: string, image: HTMLImageElement): void {
-        if (LabelsSelector.getImageDataById(imageId).isVisitedByObjectDetector)
-            return;
+    public static detect(imageId: string, image: HTMLImageElement): void {
+        const activeLabelType: LabelType = LabelsSelector.getActiveLabelType();
 
-        ObjectDetector.predict(image, (predictions: DetectedObject[]) => {
-            AIActions.savePredictions(imageId, predictions);
-        })
+        switch (activeLabelType) {
+            case LabelType.RECTANGLE:
+                AIObjectDetectionActions.detectRects(imageId, image);
+                break;
+            case LabelType.POINT:
+                AIPoseDetectionActions.detectPoses(imageId, image);
+                break;
+        }
     }
 
-    public static savePredictions(imageId: string, predictions: DetectedObject[]) {
-        const imageData: ImageData = LabelsSelector.getImageDataById(imageId);
-        const predictedLabels: LabelRect[] = AIActions.mapPredictionsToRectLabels(predictions);
-        const nextImageData: ImageData = {
-            ...imageData,
-            labelRects: imageData.labelRects.concat(predictedLabels),
-            isVisitedByObjectDetector: true
-        };
-        store.dispatch(updateImageDataById(imageData.id, nextImageData));
+    public static rejectAllSuggestedLabels(imageData: ImageData) {
+        const activeLabelType: LabelType = LabelsSelector.getActiveLabelType();
+
+        switch (activeLabelType) {
+            case LabelType.RECTANGLE:
+                AIObjectDetectionActions.rejectAllSuggestedRectLabels(imageData);
+                break;
+            case LabelType.POINT:
+                AIPoseDetectionActions.rejectAllSuggestedPointLabels(imageData);
+                break;
+        }
     }
 
-    public static mapPredictionsToRectLabels(predictions: DetectedObject[]): LabelRect[] {
-        return predictions.map((prediction: DetectedObject) => {
-            return {
-                id: uuidv1(),
-                labelIndex: null,
-                rect: {
-                    x: prediction.bbox[0],
-                    y: prediction.bbox[1],
-                    width: prediction.bbox[2],
-                    height: prediction.bbox[3],
-                },
-                isCreatedByAI: true,
-                status: LabelStatus.UNDECIDED
-            }
-        })
+    public static acceptAllSuggestedLabels(imageData: ImageData) {
+        const activeLabelType: LabelType = LabelsSelector.getActiveLabelType();
+        switch (activeLabelType) {
+            case LabelType.RECTANGLE:
+                AIObjectDetectionActions.acceptAllSuggestedRectLabels(imageData);
+                break;
+            case LabelType.POINT:
+                AIPoseDetectionActions.acceptAllSuggestedPointLabels(imageData);
+                break;
+        }
     }
 }