From 15febc465183ce25e54d7a9569d22063943fdc51 Mon Sep 17 00:00:00 2001 From: Alexander Biryukov Date: Fri, 1 Jun 2018 22:31:55 +0500 Subject: [PATCH] Similarity --- game/main_code.cpp | 89 +++++-------------- game/main_code.h | 11 +-- .../DoubleHitBalls-win.vcxproj | 2 +- .../FaceLandmarkDetector.cpp | 39 +++----- .../DoubleHitBalls-win/FaceLandmarkDetector.h | 12 +-- 5 files changed, 39 insertions(+), 114 deletions(-) diff --git a/game/main_code.cpp b/game/main_code.cpp index c9ae714..f3b4623 100755 --- a/game/main_code.cpp +++ b/game/main_code.cpp @@ -9,8 +9,6 @@ #include "main_code.h" - - //#define USE_PREDICTION cv::Point flipVertical(cv::Point point) { @@ -50,10 +48,10 @@ FaceStruct::FaceStruct() FaceStruct::FaceStruct(const std::array& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) { FaceStruct(); - ApplyPreds(fromPreds, cameraMatrix, distortionCoefficients); + ApplyPreds(fromPreds, 0.0, cameraMatrix, distortionCoefficients); } -void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) +void FaceStruct::CalcFromPreds(double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) { float minX = preds[0](0); float maxX = preds[0](0); @@ -109,13 +107,17 @@ void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoeffic cv::solvePnP(modelPoints, imagePoints, cameraMatrix, distortionCoefficients, rotationVector, translationVector); - rotationVector.at(0) = rotationVectorSmoothers[0].responsiveAnalogReadSimple(rotationVector.at(0)); - rotationVector.at(1) = rotationVectorSmoothers[1].responsiveAnalogReadSimple(rotationVector.at(1)); - rotationVector.at(2) = rotationVectorSmoothers[2].responsiveAnalogReadSimple(rotationVector.at(2)); + if(similarity > 0.01) { + for (size_t i = 0; i < 3; i++) { + rotationVectorSmoothers[i].reset(); + translationVectorSmoothers[i].reset(); + } + } - translationVector.at(0) = translationVectorSmoothers[0].responsiveAnalogReadSimple(translationVector.at(0)); - translationVector.at(1) = translationVectorSmoothers[1].responsiveAnalogReadSimple(translationVector.at(1)); - translationVector.at(2) = translationVectorSmoothers[2].responsiveAnalogReadSimple(translationVector.at(2)); + for(size_t i = 0; i < 3; i++) { + rotationVector.at(i) = rotationVectorSmoothers[i].responsiveAnalogReadSimple(rotationVector.at(i)); + // translationVector.at(i) = translationVectorSmoothers[i].responsiveAnalogReadSimple(translationVector.at(i)); + } cv::projectPoints(boxModelSrc, rotationVector, translationVector, cameraMatrix, distortionCoefficients, boxModelDst); @@ -128,50 +130,14 @@ void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoeffic cv::Mat outTranslation = cv::Mat(3, 1, CV_64FC1); cv::decomposeProjectionMatrix(poseMatrix, outIntrinsics, outRotation, outTranslation, cv::noArray(), cv::noArray(), cv::noArray(), eulerAngles); - // std::cout << "(" << std::setprecision(3) << eulerAngles.at(0) << ", " << std::setprecision(3) << eulerAngles.at(1) << std::setprecision(3) << eulerAngles.at(2) << ")" << std::endl; - - center = { avgX, avgY }; - size = { maxX- minX,maxY - minY }; - - //center = { centerXSmoother.responsiveAnalogReadSimple(avgX), centerYSmoother.responsiveAnalogReadSimple(avgY) }; - //size = { sizeWidthSmoother.responsiveAnalogReadSimple(maxX- minX), sizeHeightSmoother.responsiveAnalogReadSimple(maxY - minY) }; + center = {avgX, avgY}; + size = {maxX- minX, maxY - minY}; } -void FaceStruct::ApplyPreds(const std::array& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) +void FaceStruct::ApplyPreds(const std::array& fromPreds, double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) { InnerApplyPreds(fromPreds); - CalcFromPreds(cameraMatrix, distortionCoefficients); - - /* - for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++) - { - preds[i] = preds[i] - center; - - } - - - historicalPreds.push_back(preds); - if (historicalPreds.size() > 4) - { - historicalPreds.erase(historicalPreds.begin()); - } - - - for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++) - { - preds[i] = { 0.f, 0.f }; - for (size_t x = 0; x < historicalPreds.size(); x++) - { - preds[i] += historicalPreds[x][i]; - } - - preds[i] = preds[i] / static_cast(historicalPreds.size()); - } - - for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++) - { - preds[i] = preds[i] + center; - }*/ + CalcFromPreds(similarity, cameraMatrix, distortionCoefficients); } void FaceStruct::InnerApplyPreds(const std::array& fromPreds) @@ -183,16 +149,12 @@ void FaceStruct::InnerApplyPreds(const std::array faceLandmarkArr; cv::Mat frame; @@ -399,9 +361,6 @@ void TMyApplication::InnerDraw() void TMyApplication::InnerUpdate(size_t dt) { - - - cap >> frame; // get a new frame from camera/video or read image if (frame.empty()) @@ -490,15 +449,11 @@ void TMyApplication::InnerUpdate(size_t dt) renderImage = renderImage.clone(); #else - - cv::Mat renderImage = image; + cv::Mat renderImage = image; #endif - //cv::Rect frameRect(cv::Point(), renderImage.size()); - float focalLength = renderImage.cols; - cv::Point2f center = cv::Point2f(renderImage.cols / 2, renderImage.rows / 2); - cv::Mat cameraMatrix = (cv::Mat_(3, 3) << focalLength, 0, center.x, 0, focalLength, center.y, 0, 0, 1); - cv::Mat distortionCoefficients = cv::Mat::zeros(4, 1, cv::DataType::type); + cv::Mat cameraMatrix = (cv::Mat_(3, 3) << 9.5434798118121421e+02, 0.0, 640.0, 0.0, 9.5434798118121421e+02, 360.0, 0.0, 0.0, 1.0); + cv::Mat distortionCoefficients = (cv::Mat_(5, 1) << 3.5475494652047600e-02, - 5.9818881494543774e-01, 0.0, 0.0, 1.4642154802454528e+00); for (size_t i = 0; i < faceLandmarkArr.size(); i++) { @@ -520,9 +475,11 @@ void TMyApplication::InnerUpdate(size_t dt) { if (faceLandmarkArr[index].valid) { - faceStruct[index].ApplyPreds(faceLandmarkArr[index].landmarkArr, cameraMatrix, distortionCoefficients); + faceStruct[index].ApplyPreds(faceLandmarkArr[index].landmarkArr, faceLandmarkArr[index].similarity, cameraMatrix, distortionCoefficients); + std::ostringstream similarityText; + similarityText << std::setprecision(3) << faceLandmarkArr[index].similarity; + cv::putText(renderImage, similarityText.str(), cv::Point(50, index * 60 + 60), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 0, 0)); - for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++) { cv::ellipse(renderImage, cv::Point2f(faceStruct[index].preds[i](0), 720 - faceStruct[index].preds[i](1)), cv::Size(2, 2), 0, 0, 360, cv::Scalar(255, 128, 128), 4, 8, 0); diff --git a/game/main_code.h b/game/main_code.h index 76c1961..49b6222 100755 --- a/game/main_code.h +++ b/game/main_code.h @@ -33,15 +33,8 @@ struct FaceStruct SE::Vector2f center; SE::Vector2f size; - ValueSmoother centerXSmoother; - ValueSmoother centerYSmoother; - ValueSmoother sizeWidthSmoother; - ValueSmoother sizeHeightSmoother; - std::array preds; - //std::vector> historicalPreds; - std::vector modelPoints; cv::Mat rotationVector; cv::Mat translationVector; @@ -62,9 +55,9 @@ struct FaceStruct FaceStruct(const std::array& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients); - void CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients); + void CalcFromPreds(double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients); - void ApplyPreds(const std::array& fromPred, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficientss); + void ApplyPreds(const std::array& fromPred, double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficientss); void InnerApplyPreds(const std::array& fromPreds); }; diff --git a/windows/DoubleHitBalls-win/DoubleHitBalls-win/DoubleHitBalls-win.vcxproj b/windows/DoubleHitBalls-win/DoubleHitBalls-win/DoubleHitBalls-win.vcxproj index 482c70d..0cce0df 100755 --- a/windows/DoubleHitBalls-win/DoubleHitBalls-win/DoubleHitBalls-win.vcxproj +++ b/windows/DoubleHitBalls-win/DoubleHitBalls-win/DoubleHitBalls-win.vcxproj @@ -148,7 +148,7 @@ Level3 - MaxSpeed + Disabled true true WIN32;NDEBUG;_WINDOWS;TARGET_WIN32;_WIN32_WINNT=0x0501;WIN32_LEAN_AND_MEAN;EIGEN_DONT_ALIGN_STATICALLY;%(PreprocessorDefinitions) diff --git a/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.cpp b/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.cpp index e4d6d46..6c96e1b 100755 --- a/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.cpp +++ b/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.cpp @@ -1,18 +1,13 @@ #include "FaceLandmarkDetector.h" - - static const size_t faceTrackerWidth = 300; static const size_t faceTrackerHeight = 300; static const double faceTrackerScaleFactor = 1.0; static const cv::Scalar faceTrackerMeanVal(104.0, 177.0, 123.0); - float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& face2) { - - cv::Point2f oldCenter(face1.x + face1.width*0.5f, face1.y + face1.height*0.5f); cv::Point2f newCenter(face2.x + face2.width*0.5f, face2.y + face2.height*0.5f); @@ -24,8 +19,14 @@ float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& fac return squareDistance; } - - +double calcSimilarity(const cv::Mat& first, const cv::Mat& second) +{ + // assumes that first and second have the same size + double errorL2 = cv::norm(first, second, CV_L2); + // Convert to a reasonable scale, since L2 error is summed across all pixels of the image. + double similarity = errorL2 / static_cast(first.rows * first.cols); + return similarity; +} FaceLandmarkDetector::FaceLandmarkDetector() { @@ -45,18 +46,13 @@ FaceLandmarkDetector::FaceLandmarkDetector() #endif - - faceNet = cv::dnn::readNetFromCaffe(faceTrackerConfiguration, faceTrackerBinary); - for (size_t i = 0; i < MAX_FACE_COUNT; i++) { landmarkNetArr[i] = cv::dnn::readNetFromCaffe(landmarkTrackerConfiguration, landmarkTrackerBinary); } - } - FaceLandmarkDetector::~FaceLandmarkDetector() { StopTrackProcess(); @@ -161,7 +157,6 @@ void FaceLandmarkDetector::FindFaceThreadProc() if (!localFrame.empty()) { std::vector faces = InnerFindFaces(localFrame); - std::vector faceFrames; //Get rid of faces that are too big @@ -186,14 +181,13 @@ void FaceLandmarkDetector::FindFaceThreadProc() InnerSeparateFacesToBuckets(faces, faceFrames, localFaceRectArr); - for (size_t i = 0; i lock(syncMutex); // OUT to main thread >>>> syncFaceLandmarkArr[threadIndex] = localOutputLandmarks; - - - - - bool doLoop = true; while (doLoop) @@ -315,7 +300,6 @@ void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex) } - std::vector FaceLandmarkDetector::InnerFindFaces(cv::Mat frame) { if (frame.channels() == 4) @@ -356,8 +340,6 @@ std::vector FaceLandmarkDetector::InnerFindFaces(cv::Mat frame) object.width = object.width*1.2f; object.height = object.height*1.2f; - - faces.push_back(object); } } @@ -468,6 +450,7 @@ void FaceLandmarkDetector::InnerSeparateFacesToBuckets(std::vector& fa } } + inOutBucketArr[j].similarity = calcSimilarity(inOutBucketArr[j].faceFrame, facesBelongsToSpace[j][lastIndex].second); inOutBucketArr[j].faceRect = facesBelongsToSpace[j][lastIndex].first; inOutBucketArr[j].faceFrame = facesBelongsToSpace[j][lastIndex].second; diff --git a/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.h b/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.h index 528688b..f4d1766 100755 --- a/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.h +++ b/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.h @@ -16,23 +16,17 @@ #include #include - - #include #include #include - #define USE_3D_LANDMARKS - //#include "boost/thread.hpp" constexpr size_t LANDMARK_POINT_COUNT = 68; - constexpr size_t MAX_FACE_COUNT = 5; - struct FaceIntermediateStruct { bool valid = false; @@ -41,7 +35,7 @@ struct FaceIntermediateStruct cv::Mat faceFrame; int frameIndex; - + double similarity = 0.0; }; @@ -53,6 +47,7 @@ struct FaceLandmarkStruct std::array landmarkArr; int frameIndex; + double similarity = 0.0; }; @@ -105,7 +100,4 @@ protected: cv::Mat InnerCutFaceFrame(cv::Mat frame, cv::Rect faceRect); void InnerSeparateFacesToBuckets(std::vector& faceArr, std::vector& faceFrameArr, std::array& inOutBucketArr); - - }; -