#include "main_code.h" #include #include #include #include #include "include/Engine.h" #include "main_code.h" //#define USE_PREDICTION #define SMOOTH_LANDMARKS cv::Point flipVertical(cv::Point point) { return cv::Point(point.x, 720 - point.y); } FaceStruct::FaceStruct() { modelPoints.push_back(cv::Point3d(6.825897, 6.760612, 4.402142)); // left brow left corner modelPoints.push_back(cv::Point3d(1.330353, 7.122144, 6.903745)); // left brow right corner modelPoints.push_back(cv::Point3d(-1.330353, 7.122144, 6.903745)); // right brow left corner modelPoints.push_back(cv::Point3d(-6.825897, 6.760612, 4.402142)); // right brow right corner modelPoints.push_back(cv::Point3d(5.311432, 5.485328, 3.987654)); // left eye left corner modelPoints.push_back(cv::Point3d(1.789930, 5.393625, 4.413414)); // left eye right corner modelPoints.push_back(cv::Point3d(-1.789930, 5.393625, 4.413414)); // right eye left corner modelPoints.push_back(cv::Point3d(-5.311432, 5.485328, 3.987654)); // right eye right corner modelPoints.push_back(cv::Point3d(2.005628, 1.409845, 6.165652)); // nose left corner modelPoints.push_back(cv::Point3d(-2.005628, 1.409845, 6.165652)); // nose right corner modelPoints.push_back(cv::Point3d(2.774015, -2.080775, 5.048531)); // mouth left corner modelPoints.push_back(cv::Point3d(-2.774015, -2.080775, 5.048531)); // mouth right corner modelPoints.push_back(cv::Point3d(0.000000, -3.116408, 6.097667)); // mouth central bottom corner modelPoints.push_back(cv::Point3d(0.000000, -7.415691, 4.070434)); // chin corner poseMatrix = cv::Mat(3, 4, CV_64FC1); eulerAngles = cv::Mat(3, 1, CV_64FC1); boxModelSrc.push_back(cv::Point3d(10.0, 10.0, 10.0)); boxModelSrc.push_back(cv::Point3d(10.0, 10.0, -10.0)); boxModelSrc.push_back(cv::Point3d(10.0, -10.0, -10.0)); boxModelSrc.push_back(cv::Point3d(10.0, -10.0, 10.0)); boxModelSrc.push_back(cv::Point3d(-10.0, 10.0, 10.0)); boxModelSrc.push_back(cv::Point3d(-10.0, 10.0, -10.0)); boxModelSrc.push_back(cv::Point3d(-10.0, -10.0, -10.0)); boxModelSrc.push_back(cv::Point3d(-10.0, -10.0, 10.0)); } FaceStruct::FaceStruct(const std::array& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) { FaceStruct(); ApplyPreds(fromPreds, 0.0, cameraMatrix, distortionCoefficients); } void FaceStruct::CalcFromPreds(double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) { #ifdef SMOOTH_LANDMARKS for(size_t i = 0; i < LANDMARK_POINT_COUNT; i++) { if(similarity < 0.01) { preds[i] = Vector2f( landmarkSmoothers[i * 2].responsiveAnalogReadSimple(preds[i](0)), landmarkSmoothers[i * 2 + 1].responsiveAnalogReadSimple(preds[i](1)) ); } else { landmarkSmoothers[i * 2].reset(); landmarkSmoothers[i * 2 + 1].reset(); } } #endif float minX = preds[0](0); float maxX = preds[0](0); float minY = preds[0](1); float maxY = preds[0](1); float sumX = preds[0](0); float sumY = preds[0](1); for (size_t i = 1; i < LANDMARK_POINT_COUNT; i++) { if (minX > preds[i](0)) { minX = preds[i](0); } if (minY > preds[i](1)) { minY = preds[i](1); } if (maxX < preds[i](0)) { maxX = preds[i](0); } if (maxY < preds[i](1)) { maxY = preds[i](1); } sumX += preds[i](0); sumY += preds[i](1); } float avgX = sumX / static_cast(LANDMARK_POINT_COUNT); float avgY = sumY / static_cast(LANDMARK_POINT_COUNT); std::vector imagePoints; imagePoints.push_back(cv::Point2f(preds[17](0), preds[17](1))); // #17 left brow left corner imagePoints.push_back(cv::Point2f(preds[21](0), preds[21](1))); // #21 left brow right corner imagePoints.push_back(cv::Point2f(preds[22](0), preds[22](1))); // #22 right brow left corner imagePoints.push_back(cv::Point2f(preds[26](0), preds[26](1))); // #26 right brow right corner imagePoints.push_back(cv::Point2f(preds[36](0), preds[36](1))); // #36 left eye left corner imagePoints.push_back(cv::Point2f(preds[39](0), preds[39](1))); // #39 left eye right corner imagePoints.push_back(cv::Point2f(preds[42](0), preds[42](1))); // #42 right eye left corner imagePoints.push_back(cv::Point2f(preds[45](0), preds[45](1))); // #45 right eye right corner imagePoints.push_back(cv::Point2f(preds[31](0), preds[31](1))); // #31 nose left corner imagePoints.push_back(cv::Point2f(preds[35](0), preds[35](1))); // #35 nose right corner imagePoints.push_back(cv::Point2f(preds[48](0), preds[48](1))); // #48 mouth left corner imagePoints.push_back(cv::Point2f(preds[54](0), preds[54](1))); // #54 mouth right corner imagePoints.push_back(cv::Point2f(preds[57](0), preds[57](1))); // #57 mouth central bottom corner imagePoints.push_back(cv::Point2f(preds[8](0), preds[8](1))); // #8 chin corner cv::solvePnP(modelPoints, imagePoints, cameraMatrix, distortionCoefficients, rotationVector, translationVector); //if(similarity > 0.01) { // for (size_t i = 0; i < 3; i++) { // rotationVectorSmoothers[i].reset(); // translationVectorSmoothers[i].reset(); // } //} /*for(size_t i = 0; i < 3; i++) { rotationVector.at(i) = rotationVectorSmoothers[i].responsiveAnalogReadSimple(rotationVector.at(i)); translationVector.at(i) = translationVectorSmoothers[i].responsiveAnalogReadSimple(translationVector.at(i)); }*/ cv::projectPoints(boxModelSrc, rotationVector, translationVector, cameraMatrix, distortionCoefficients, boxModelDst); // calc Euler angles cv::Rodrigues(rotationVector, rotationMatrix); cv::hconcat(rotationMatrix, translationVector, poseMatrix); cv::Mat outIntrinsics = cv::Mat(3, 3, CV_64FC1); cv::Mat outRotation = cv::Mat(3, 3, CV_64FC1); cv::Mat outTranslation = cv::Mat(3, 1, CV_64FC1); cv::decomposeProjectionMatrix(poseMatrix, outIntrinsics, outRotation, outTranslation, cv::noArray(), cv::noArray(), cv::noArray(), eulerAngles); center = {avgX, avgY}; size = {maxX- minX, maxY - minY}; } void FaceStruct::ApplyPreds(const std::array& fromPreds, double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) { InnerApplyPreds(fromPreds); CalcFromPreds(similarity, cameraMatrix, distortionCoefficients); } void FaceStruct::InnerApplyPreds(const std::array& fromPreds) { for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++) { preds[i] = { fromPreds[i].x, 720 - fromPreds[i].y}; } } cv::VideoCapture cap; FaceLandmarkDetector faceLandmarkDetector; GLuint lastFrameTexture; std::array faceLandmarkArr; cv::Mat frame; std::vector frameArr; int currentIndex = -1; int lastProcessedIndex = 0; int lineShift = 0; int prevPassedIndex = 0; int indexStep = 0; std::array faceStruct; const std::string CONST_LOADING_BACKGROUND_BLACK = "loading_background_black"; TMyApplication* Application; void TMyApplication::InnerInit() { Application = this; #ifdef TARGET_WIN32 #ifdef NDEBUG //ST::PathToResources = "resources/"; ST::PathToResources = "../../../assets/"; #else ST::PathToResources = "../../../assets/"; #endif #endif #ifdef TARGET_IOS ST::PathToResources = "assets/"; #endif if (Console != NULL) { *Console<<"APP INIT\n"; } srand (static_cast(time(NULL))); ResourceManager->ShaderManager.AddShader("DefaultShader", "shader1vertex.txt", "shader1fragment.txt"); ResourceManager->ShaderManager.AddShader("FrameShader", "frameshader_vertex.txt", "frameshader_fragment.txt"); Renderer->PushShader("DefaultShader"); ResourceManager->TexList.AddTexture(CONST_LOADING_BACKGROUND_BLACK + ".png", CONST_LOADING_BACKGROUND_BLACK); auto texid = ResourceManager->TexList.AddTexture("emoji/Asset 3@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 4@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 5@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 6@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 7@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 8@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 9@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 10@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 11@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 13@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 14@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 15@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 16@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 17@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 18@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 19@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 20@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 21@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 22@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 23@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 24@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 25@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 26@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 27@20x.png"); ResourceManager->TexList.AddTexture("emoji/Asset 28@20x.png"); ResourceManager->FontManager.AddFont("droid_sans14", "droid_sans14_font_bitmap.png", "droid_sans14_font_charmap.txt"); ResourceManager->FontManager.PushFont("droid_sans14"); lastFrameTexture = ResourceManager->TexList.AddEmptyTexture("lastFrameTexture", 1280, 720); Renderer->SetOrthoProjection(); Renderer->SetFullScreenViewport(); for (size_t i = 0; i < MAX_FACE_COUNT; i++) { faceRenderPair[i].first.SamplerMap[CONST_STRING_TEXTURE_UNIFORM] = "Asset "+boost::lexical_cast(i+3)+"@20x.png"; faceRenderPair[i].second.Data.Vec3CoordArr[CONST_STRING_POSITION_ATTRIB].push_back(Vector3f(0, 0, 0)); faceRenderPair[i].second.Data.Vec3CoordArr[CONST_STRING_POSITION_ATTRIB].push_back(Vector3f(0, 512, 0)); faceRenderPair[i].second.Data.Vec3CoordArr[CONST_STRING_POSITION_ATTRIB].push_back(Vector3f(512, 512, 0)); faceRenderPair[i].second.Data.Vec3CoordArr[CONST_STRING_POSITION_ATTRIB].push_back(Vector3f(512, 512, 0)); faceRenderPair[i].second.Data.Vec3CoordArr[CONST_STRING_POSITION_ATTRIB].push_back(Vector3f(512, 0, 0)); faceRenderPair[i].second.Data.Vec3CoordArr[CONST_STRING_POSITION_ATTRIB].push_back(Vector3f(0, 0, 0)); faceRenderPair[i].second.Data.Vec2CoordArr[CONST_STRING_TEXCOORD_ATTRIB].push_back(Vector2f(0, 0)); faceRenderPair[i].second.Data.Vec2CoordArr[CONST_STRING_TEXCOORD_ATTRIB].push_back(Vector2f(0, 1)); faceRenderPair[i].second.Data.Vec2CoordArr[CONST_STRING_TEXCOORD_ATTRIB].push_back(Vector2f(1, 1)); faceRenderPair[i].second.Data.Vec2CoordArr[CONST_STRING_TEXCOORD_ATTRIB].push_back(Vector2f(1, 1)); faceRenderPair[i].second.Data.Vec2CoordArr[CONST_STRING_TEXCOORD_ATTRIB].push_back(Vector2f(1, 0)); faceRenderPair[i].second.Data.Vec2CoordArr[CONST_STRING_TEXCOORD_ATTRIB].push_back(Vector2f(0, 0)); faceRenderPair[i].second.RefreshBuffer(); } cap = cv::VideoCapture(0); //cap = cv::VideoCapture("video.mp4"); //cap = cv::VideoCapture("bp.mp4"); // Check if camera opened successfully if (!cap.isOpened()) { std::cout << "Error opening video stream or file" << std::endl; } cap.set(CV_CAP_PROP_FRAME_WIDTH, 1280); cap.set(CV_CAP_PROP_FRAME_HEIGHT, 720); faceLandmarkDetector.StartTrackProcess(); Inited = true; } void TMyApplication::InnerDeinit() { faceLandmarkDetector.StopTrackProcess(); cap.release(); Inited = false; if (Console != NULL) { *Console<<"APP DEINIT\n"; } } void TMyApplication::InnerOnTapDown(Vector2f p) { } void TMyApplication::InnerOnTapUp(Vector2f p) { } void TMyApplication::InnerOnTapUpAfterMove(Vector2f p) { } void TMyApplication::InnerOnMove(Vector2f p, Vector2f shift) { } void TMyApplication::OnFling(Vector2f v) { } void TMyApplication::InnerDraw() { glDisable(GL_DEPTH_TEST); //glBindTexture(GL_TEXTURE_2D, ResourceManager->TexList[CONST_LOADING_BACKGROUND_BLACK]); glBindTexture(GL_TEXTURE_2D, lastFrameTexture); Renderer->DrawRect(Vector2f(0.f, 0.f), Vector2f(Renderer->GetMatrixWidth(), Renderer->GetMatrixHeight())); /* for (size_t i = 0; i < MAX_FACE_COUNT; i++) { TRenderParamsSetter params(faceRenderPair[i].first); Renderer->DrawTriangleList(faceRenderPair[i].second); }*/ } void TMyApplication::InnerUpdate(size_t dt) { cap >> frame; // get a new frame from camera/video or read image if (frame.empty()) { return; } cv::Mat image = frame.clone(); currentIndex++; faceLandmarkArr = faceLandmarkDetector.GetFaceLandmarks(frame, currentIndex); #ifdef USE_PREDICTION frameArr.push_back(image); while (frameArr.size() > 40) { lineShift++; frameArr.erase(frameArr.begin()); } currentIndex++; faceLandmarkArr = faceLandmarkDetector.GetFaceLandmarks(frame, currentIndex); bool newProcessedIndexFound = false; int newProcessedIndex; for (size_t i = 0; i < faceLandmarkArr.size(); i++) { if (faceLandmarkArr[i].valid) { if (!newProcessedIndexFound) { newProcessedIndexFound = true; newProcessedIndex = faceLandmarkArr[i].frameIndex; } else { if (newProcessedIndex < faceLandmarkArr[i].frameIndex) { newProcessedIndex = faceLandmarkArr[i].frameIndex; } } } } if (newProcessedIndexFound && newProcessedIndex > lastProcessedIndex) { indexStep = 0; lastProcessedIndex = newProcessedIndex; prevPassedIndex = currentIndex; } else { if (prevPassedIndex > lastProcessedIndex + indexStep) { indexStep++; } } //std::cout << currentIndex << " " << lastProcessedIndex << " stepped:" << lastProcessedIndex + indexStep << " indexStep: " << indexStep << std::endl; cv::Mat renderImage; if (lastProcessedIndex + indexStep - lineShift < 0) { renderImage = frameArr[0]; } else if (lastProcessedIndex + indexStep - lineShift >= frameArr.size()) { renderImage = frameArr[frameArr.size() - 1]; } else { renderImage = frameArr[lastProcessedIndex + indexStep - lineShift]; } renderImage = renderImage.clone(); #else cv::Mat renderImage = image; #endif cv::Mat cameraMatrix = (cv::Mat_(3, 3) << 9.5434798118121421e+02, 0.0, 640.0, 0.0, 9.5434798118121421e+02, 360.0, 0.0, 0.0, 1.0); cv::Mat distortionCoefficients = (cv::Mat_(5, 1) << 3.5475494652047600e-02, - 5.9818881494543774e-01, 0.0, 0.0, 1.4642154802454528e+00); for (size_t i = 0; i < faceLandmarkArr.size(); i++) { if (faceLandmarkArr[i].valid) { cv::rectangle(renderImage, faceLandmarkArr[i].faceRect, cv::Scalar(0, 0, 255), 1, 4, 0); for (size_t j = 0; j < faceLandmarkArr[i].landmarkArr.size(); j++) { //cv::circle(renderImage, faceLandmarkArr[i].landmarkArr[j], 0.1, cv::Scalar(0, 255, 255), 4, 8, 0); } } } for (size_t index = 0; index < faceLandmarkArr.size(); index++) { if (faceLandmarkArr[index].valid) { faceStruct[index].ApplyPreds(faceLandmarkArr[index].landmarkArr, faceLandmarkArr[index].similarity, cameraMatrix, distortionCoefficients); std::ostringstream similarityText; similarityText << std::setprecision(3) << faceLandmarkArr[index].similarity; cv::putText(renderImage, similarityText.str(), cv::Point(50, index * 60 + 60), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(255, 255, 255)); for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++) { cv::ellipse(renderImage, cv::Point2f(faceStruct[index].preds[i](0), 720 - faceStruct[index].preds[i](1)), cv::Size(2, 2), 0, 0, 360, cv::Scalar(255, 128, 128), 4, 8, 0); } cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[0]), flipVertical(faceStruct[index].boxModelDst[1]), cv::Scalar(0, 0, 255)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[1]), flipVertical(faceStruct[index].boxModelDst[2]), cv::Scalar(0, 0, 255)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[2]), flipVertical(faceStruct[index].boxModelDst[3]), cv::Scalar(0, 0, 255)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[3]), flipVertical(faceStruct[index].boxModelDst[0]), cv::Scalar(0, 255, 0)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[4]), flipVertical(faceStruct[index].boxModelDst[5]), cv::Scalar(0, 0, 255)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[5]), flipVertical(faceStruct[index].boxModelDst[6]), cv::Scalar(0, 0, 255)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[6]), flipVertical(faceStruct[index].boxModelDst[7]), cv::Scalar(0, 0, 255)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[7]), flipVertical(faceStruct[index].boxModelDst[4]), cv::Scalar(0, 255, 0)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[0]), flipVertical(faceStruct[index].boxModelDst[4]), cv::Scalar(0, 255, 0)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[1]), flipVertical(faceStruct[index].boxModelDst[5]), cv::Scalar(0, 0, 255)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[2]), flipVertical(faceStruct[index].boxModelDst[6]), cv::Scalar(0, 0, 255)); cv::line(renderImage, flipVertical(faceStruct[index].boxModelDst[3]), flipVertical(faceStruct[index].boxModelDst[7]), cv::Scalar(0, 255, 0)); Vector2f posFrom = faceStruct[index].center - faceStruct[index].size*0.5f* 1.35f; Vector2f posTo = faceStruct[index].center + faceStruct[index].size*0.5f* 1.35f; ApplyVertexCoordVec(faceRenderPair[index].second.Data.Vec3CoordArr[CONST_STRING_POSITION_ATTRIB], posFrom, posTo, 0); faceRenderPair[index].second.RefreshBuffer(); } else { ApplyVertexCoordVec(faceRenderPair[index].second.Data.Vec3CoordArr[CONST_STRING_POSITION_ATTRIB], { 0,0 }, { 0,0 }, 0); faceRenderPair[index].second.RefreshBuffer(); } } glBindTexture(GL_TEXTURE_2D, lastFrameTexture); static std::array buffer; int step = renderImage.step; int height = renderImage.rows; int width = renderImage.cols; int channels = 3; char * data = (char *)renderImage.data; for (int i = 0; i < height; i++) { int ci = height - i - 1; memcpy(&buffer[i*width*channels], &(data[ci*step]), width*channels); } glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, 1280, 720, 0, GL_BGR_EXT, GL_UNSIGNED_BYTE, &buffer[0]); } bool TMyApplication::IsInited() { return Inited; }