Similarity

This commit is contained in:
Alexander Biryukov 2018-06-01 22:31:55 +05:00
parent 601730e0ce
commit 15febc4651
5 changed files with 39 additions and 114 deletions

View File

@ -9,8 +9,6 @@
#include "main_code.h" #include "main_code.h"
//#define USE_PREDICTION //#define USE_PREDICTION
cv::Point flipVertical(cv::Point point) { cv::Point flipVertical(cv::Point point) {
@ -50,10 +48,10 @@ FaceStruct::FaceStruct()
FaceStruct::FaceStruct(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) FaceStruct::FaceStruct(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients)
{ {
FaceStruct(); FaceStruct();
ApplyPreds(fromPreds, cameraMatrix, distortionCoefficients); ApplyPreds(fromPreds, 0.0, cameraMatrix, distortionCoefficients);
} }
void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) void FaceStruct::CalcFromPreds(double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients)
{ {
float minX = preds[0](0); float minX = preds[0](0);
float maxX = preds[0](0); float maxX = preds[0](0);
@ -109,13 +107,17 @@ void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoeffic
cv::solvePnP(modelPoints, imagePoints, cameraMatrix, distortionCoefficients, rotationVector, translationVector); cv::solvePnP(modelPoints, imagePoints, cameraMatrix, distortionCoefficients, rotationVector, translationVector);
rotationVector.at<double>(0) = rotationVectorSmoothers[0].responsiveAnalogReadSimple(rotationVector.at<double>(0)); if(similarity > 0.01) {
rotationVector.at<double>(1) = rotationVectorSmoothers[1].responsiveAnalogReadSimple(rotationVector.at<double>(1)); for (size_t i = 0; i < 3; i++) {
rotationVector.at<double>(2) = rotationVectorSmoothers[2].responsiveAnalogReadSimple(rotationVector.at<double>(2)); rotationVectorSmoothers[i].reset();
translationVectorSmoothers[i].reset();
}
}
translationVector.at<double>(0) = translationVectorSmoothers[0].responsiveAnalogReadSimple(translationVector.at<double>(0)); for(size_t i = 0; i < 3; i++) {
translationVector.at<double>(1) = translationVectorSmoothers[1].responsiveAnalogReadSimple(translationVector.at<double>(1)); rotationVector.at<double>(i) = rotationVectorSmoothers[i].responsiveAnalogReadSimple(rotationVector.at<double>(i));
translationVector.at<double>(2) = translationVectorSmoothers[2].responsiveAnalogReadSimple(translationVector.at<double>(2)); // translationVector.at<double>(i) = translationVectorSmoothers[i].responsiveAnalogReadSimple(translationVector.at<double>(i));
}
cv::projectPoints(boxModelSrc, rotationVector, translationVector, cameraMatrix, distortionCoefficients, boxModelDst); cv::projectPoints(boxModelSrc, rotationVector, translationVector, cameraMatrix, distortionCoefficients, boxModelDst);
@ -128,50 +130,14 @@ void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoeffic
cv::Mat outTranslation = cv::Mat(3, 1, CV_64FC1); cv::Mat outTranslation = cv::Mat(3, 1, CV_64FC1);
cv::decomposeProjectionMatrix(poseMatrix, outIntrinsics, outRotation, outTranslation, cv::noArray(), cv::noArray(), cv::noArray(), eulerAngles); cv::decomposeProjectionMatrix(poseMatrix, outIntrinsics, outRotation, outTranslation, cv::noArray(), cv::noArray(), cv::noArray(), eulerAngles);
// std::cout << "(" << std::setprecision(3) << eulerAngles.at<double>(0) << ", " << std::setprecision(3) << eulerAngles.at<double>(1) << std::setprecision(3) << eulerAngles.at<double>(2) << ")" << std::endl; center = {avgX, avgY};
size = {maxX- minX, maxY - minY};
center = { avgX, avgY };
size = { maxX- minX,maxY - minY };
//center = { centerXSmoother.responsiveAnalogReadSimple(avgX), centerYSmoother.responsiveAnalogReadSimple(avgY) };
//size = { sizeWidthSmoother.responsiveAnalogReadSimple(maxX- minX), sizeHeightSmoother.responsiveAnalogReadSimple(maxY - minY) };
} }
void FaceStruct::ApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients) void FaceStruct::ApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients)
{ {
InnerApplyPreds(fromPreds); InnerApplyPreds(fromPreds);
CalcFromPreds(cameraMatrix, distortionCoefficients); CalcFromPreds(similarity, cameraMatrix, distortionCoefficients);
/*
for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++)
{
preds[i] = preds[i] - center;
}
historicalPreds.push_back(preds);
if (historicalPreds.size() > 4)
{
historicalPreds.erase(historicalPreds.begin());
}
for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++)
{
preds[i] = { 0.f, 0.f };
for (size_t x = 0; x < historicalPreds.size(); x++)
{
preds[i] += historicalPreds[x][i];
}
preds[i] = preds[i] / static_cast<float>(historicalPreds.size());
}
for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++)
{
preds[i] = preds[i] + center;
}*/
} }
void FaceStruct::InnerApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds) void FaceStruct::InnerApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds)
@ -183,16 +149,12 @@ void FaceStruct::InnerApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_CO
} }
} }
cv::VideoCapture cap; cv::VideoCapture cap;
FaceLandmarkDetector faceLandmarkDetector; FaceLandmarkDetector faceLandmarkDetector;
GLuint lastFrameTexture; GLuint lastFrameTexture;
std::array<FaceLandmarkStruct, MAX_FACE_COUNT> faceLandmarkArr; std::array<FaceLandmarkStruct, MAX_FACE_COUNT> faceLandmarkArr;
cv::Mat frame; cv::Mat frame;
@ -399,9 +361,6 @@ void TMyApplication::InnerDraw()
void TMyApplication::InnerUpdate(size_t dt) void TMyApplication::InnerUpdate(size_t dt)
{ {
cap >> frame; // get a new frame from camera/video or read image cap >> frame; // get a new frame from camera/video or read image
if (frame.empty()) if (frame.empty())
@ -490,15 +449,11 @@ void TMyApplication::InnerUpdate(size_t dt)
renderImage = renderImage.clone(); renderImage = renderImage.clone();
#else #else
cv::Mat renderImage = image;
cv::Mat renderImage = image;
#endif #endif
//cv::Rect frameRect(cv::Point(), renderImage.size());
float focalLength = renderImage.cols; cv::Mat cameraMatrix = (cv::Mat_<float>(3, 3) << 9.5434798118121421e+02, 0.0, 640.0, 0.0, 9.5434798118121421e+02, 360.0, 0.0, 0.0, 1.0);
cv::Point2f center = cv::Point2f(renderImage.cols / 2, renderImage.rows / 2); cv::Mat distortionCoefficients = (cv::Mat_<float>(5, 1) << 3.5475494652047600e-02, - 5.9818881494543774e-01, 0.0, 0.0, 1.4642154802454528e+00);
cv::Mat cameraMatrix = (cv::Mat_<float>(3, 3) << focalLength, 0, center.x, 0, focalLength, center.y, 0, 0, 1);
cv::Mat distortionCoefficients = cv::Mat::zeros(4, 1, cv::DataType<float>::type);
for (size_t i = 0; i < faceLandmarkArr.size(); i++) for (size_t i = 0; i < faceLandmarkArr.size(); i++)
{ {
@ -520,9 +475,11 @@ void TMyApplication::InnerUpdate(size_t dt)
{ {
if (faceLandmarkArr[index].valid) if (faceLandmarkArr[index].valid)
{ {
faceStruct[index].ApplyPreds(faceLandmarkArr[index].landmarkArr, cameraMatrix, distortionCoefficients); faceStruct[index].ApplyPreds(faceLandmarkArr[index].landmarkArr, faceLandmarkArr[index].similarity, cameraMatrix, distortionCoefficients);
std::ostringstream similarityText;
similarityText << std::setprecision(3) << faceLandmarkArr[index].similarity;
cv::putText(renderImage, similarityText.str(), cv::Point(50, index * 60 + 60), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 0, 0));
for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++) for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++)
{ {
cv::ellipse(renderImage, cv::Point2f(faceStruct[index].preds[i](0), 720 - faceStruct[index].preds[i](1)), cv::Size(2, 2), 0, 0, 360, cv::Scalar(255, 128, 128), 4, 8, 0); cv::ellipse(renderImage, cv::Point2f(faceStruct[index].preds[i](0), 720 - faceStruct[index].preds[i](1)), cv::Size(2, 2), 0, 0, 360, cv::Scalar(255, 128, 128), 4, 8, 0);

View File

@ -33,15 +33,8 @@ struct FaceStruct
SE::Vector2f center; SE::Vector2f center;
SE::Vector2f size; SE::Vector2f size;
ValueSmoother centerXSmoother;
ValueSmoother centerYSmoother;
ValueSmoother sizeWidthSmoother;
ValueSmoother sizeHeightSmoother;
std::array<SE::Vector2f, LANDMARK_POINT_COUNT> preds; std::array<SE::Vector2f, LANDMARK_POINT_COUNT> preds;
//std::vector<std::array<SE::Vector2f, LANDMARK_POINT_COUNT>> historicalPreds;
std::vector<cv::Point3f> modelPoints; std::vector<cv::Point3f> modelPoints;
cv::Mat rotationVector; cv::Mat rotationVector;
cv::Mat translationVector; cv::Mat translationVector;
@ -62,9 +55,9 @@ struct FaceStruct
FaceStruct(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients); FaceStruct(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients);
void CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients); void CalcFromPreds(double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients);
void ApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPred, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficientss); void ApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPred, double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficientss);
void InnerApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds); void InnerApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds);
}; };

View File

@ -148,7 +148,7 @@
<WarningLevel>Level3</WarningLevel> <WarningLevel>Level3</WarningLevel>
<PrecompiledHeader> <PrecompiledHeader>
</PrecompiledHeader> </PrecompiledHeader>
<Optimization>MaxSpeed</Optimization> <Optimization>Disabled</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;TARGET_WIN32;_WIN32_WINNT=0x0501;WIN32_LEAN_AND_MEAN;EIGEN_DONT_ALIGN_STATICALLY;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;TARGET_WIN32;_WIN32_WINNT=0x0501;WIN32_LEAN_AND_MEAN;EIGEN_DONT_ALIGN_STATICALLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>

View File

@ -1,18 +1,13 @@
#include "FaceLandmarkDetector.h" #include "FaceLandmarkDetector.h"
static const size_t faceTrackerWidth = 300; static const size_t faceTrackerWidth = 300;
static const size_t faceTrackerHeight = 300; static const size_t faceTrackerHeight = 300;
static const double faceTrackerScaleFactor = 1.0; static const double faceTrackerScaleFactor = 1.0;
static const cv::Scalar faceTrackerMeanVal(104.0, 177.0, 123.0); static const cv::Scalar faceTrackerMeanVal(104.0, 177.0, 123.0);
float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& face2) float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& face2)
{ {
cv::Point2f oldCenter(face1.x + face1.width*0.5f, face1.y + face1.height*0.5f); cv::Point2f oldCenter(face1.x + face1.width*0.5f, face1.y + face1.height*0.5f);
cv::Point2f newCenter(face2.x + face2.width*0.5f, face2.y + face2.height*0.5f); cv::Point2f newCenter(face2.x + face2.width*0.5f, face2.y + face2.height*0.5f);
@ -24,8 +19,14 @@ float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& fac
return squareDistance; return squareDistance;
} }
double calcSimilarity(const cv::Mat& first, const cv::Mat& second)
{
// assumes that first and second have the same size
double errorL2 = cv::norm(first, second, CV_L2);
// Convert to a reasonable scale, since L2 error is summed across all pixels of the image.
double similarity = errorL2 / static_cast<double>(first.rows * first.cols);
return similarity;
}
FaceLandmarkDetector::FaceLandmarkDetector() FaceLandmarkDetector::FaceLandmarkDetector()
{ {
@ -45,18 +46,13 @@ FaceLandmarkDetector::FaceLandmarkDetector()
#endif #endif
faceNet = cv::dnn::readNetFromCaffe(faceTrackerConfiguration, faceTrackerBinary); faceNet = cv::dnn::readNetFromCaffe(faceTrackerConfiguration, faceTrackerBinary);
for (size_t i = 0; i < MAX_FACE_COUNT; i++) for (size_t i = 0; i < MAX_FACE_COUNT; i++)
{ {
landmarkNetArr[i] = cv::dnn::readNetFromCaffe(landmarkTrackerConfiguration, landmarkTrackerBinary); landmarkNetArr[i] = cv::dnn::readNetFromCaffe(landmarkTrackerConfiguration, landmarkTrackerBinary);
} }
} }
FaceLandmarkDetector::~FaceLandmarkDetector() FaceLandmarkDetector::~FaceLandmarkDetector()
{ {
StopTrackProcess(); StopTrackProcess();
@ -161,7 +157,6 @@ void FaceLandmarkDetector::FindFaceThreadProc()
if (!localFrame.empty()) if (!localFrame.empty())
{ {
std::vector<cv::Rect> faces = InnerFindFaces(localFrame); std::vector<cv::Rect> faces = InnerFindFaces(localFrame);
std::vector<cv::Mat> faceFrames; std::vector<cv::Mat> faceFrames;
//Get rid of faces that are too big //Get rid of faces that are too big
@ -186,14 +181,13 @@ void FaceLandmarkDetector::FindFaceThreadProc()
InnerSeparateFacesToBuckets(faces, faceFrames, localFaceRectArr); InnerSeparateFacesToBuckets(faces, faceFrames, localFaceRectArr);
for (size_t i = 0; i <localFaceRectArr.size(); i++) for (size_t i = 0; i < localFaceRectArr.size(); i++)
{ {
if (localFaceRectArr[i].valid) if (localFaceRectArr[i].valid)
{ {
localFaceRectArr[i].frameIndex = localFrameIndex; localFaceRectArr[i].frameIndex = localFrameIndex;
} }
} }
} }
@ -214,19 +208,15 @@ void FaceLandmarkDetector::FindFaceThreadProc()
localFrameIndex = syncLastFrameIndex; localFrameIndex = syncLastFrameIndex;
localStop = syncStop; localStop = syncStop;
} }
} }
void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex) void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
{ {
FaceIntermediateStruct localInputFace; FaceIntermediateStruct localInputFace;
FaceLandmarkStruct localOutputLandmarks; FaceLandmarkStruct localOutputLandmarks;
bool localStop = false; bool localStop = false;
while (!localStop) while (!localStop)
{ {
if (localInputFace.valid) if (localInputFace.valid)
@ -258,6 +248,7 @@ void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
localOutputLandmarks.faceRect = localInputFace.faceRect; localOutputLandmarks.faceRect = localInputFace.faceRect;
localOutputLandmarks.frameIndex = localInputFace.frameIndex; localOutputLandmarks.frameIndex = localInputFace.frameIndex;
localOutputLandmarks.similarity = localInputFace.similarity;
localOutputLandmarks.valid = true; localOutputLandmarks.valid = true;
} }
else else
@ -268,17 +259,11 @@ void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
//Sync area //Sync area
std::unique_lock<std::mutex> lock(syncMutex); std::unique_lock<std::mutex> lock(syncMutex);
// OUT to main thread >>>> // OUT to main thread >>>>
syncFaceLandmarkArr[threadIndex] = localOutputLandmarks; syncFaceLandmarkArr[threadIndex] = localOutputLandmarks;
bool doLoop = true; bool doLoop = true;
while (doLoop) while (doLoop)
@ -315,7 +300,6 @@ void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
} }
std::vector<cv::Rect> FaceLandmarkDetector::InnerFindFaces(cv::Mat frame) std::vector<cv::Rect> FaceLandmarkDetector::InnerFindFaces(cv::Mat frame)
{ {
if (frame.channels() == 4) if (frame.channels() == 4)
@ -356,8 +340,6 @@ std::vector<cv::Rect> FaceLandmarkDetector::InnerFindFaces(cv::Mat frame)
object.width = object.width*1.2f; object.width = object.width*1.2f;
object.height = object.height*1.2f; object.height = object.height*1.2f;
faces.push_back(object); faces.push_back(object);
} }
} }
@ -468,6 +450,7 @@ void FaceLandmarkDetector::InnerSeparateFacesToBuckets(std::vector<cv::Rect>& fa
} }
} }
inOutBucketArr[j].similarity = calcSimilarity(inOutBucketArr[j].faceFrame, facesBelongsToSpace[j][lastIndex].second);
inOutBucketArr[j].faceRect = facesBelongsToSpace[j][lastIndex].first; inOutBucketArr[j].faceRect = facesBelongsToSpace[j][lastIndex].first;
inOutBucketArr[j].faceFrame = facesBelongsToSpace[j][lastIndex].second; inOutBucketArr[j].faceFrame = facesBelongsToSpace[j][lastIndex].second;

View File

@ -16,23 +16,17 @@
#include <opencv2/dnn.hpp> #include <opencv2/dnn.hpp>
#include <thread> #include <thread>
#include <queue> #include <queue>
#include <mutex> #include <mutex>
#include <condition_variable> #include <condition_variable>
#define USE_3D_LANDMARKS #define USE_3D_LANDMARKS
//#include "boost/thread.hpp" //#include "boost/thread.hpp"
constexpr size_t LANDMARK_POINT_COUNT = 68; constexpr size_t LANDMARK_POINT_COUNT = 68;
constexpr size_t MAX_FACE_COUNT = 5; constexpr size_t MAX_FACE_COUNT = 5;
struct FaceIntermediateStruct struct FaceIntermediateStruct
{ {
bool valid = false; bool valid = false;
@ -41,7 +35,7 @@ struct FaceIntermediateStruct
cv::Mat faceFrame; cv::Mat faceFrame;
int frameIndex; int frameIndex;
double similarity = 0.0;
}; };
@ -53,6 +47,7 @@ struct FaceLandmarkStruct
std::array<cv::Point2f, LANDMARK_POINT_COUNT> landmarkArr; std::array<cv::Point2f, LANDMARK_POINT_COUNT> landmarkArr;
int frameIndex; int frameIndex;
double similarity = 0.0;
}; };
@ -105,7 +100,4 @@ protected:
cv::Mat InnerCutFaceFrame(cv::Mat frame, cv::Rect faceRect); cv::Mat InnerCutFaceFrame(cv::Mat frame, cv::Rect faceRect);
void InnerSeparateFacesToBuckets(std::vector<cv::Rect>& faceArr, std::vector<cv::Mat>& faceFrameArr, std::array<FaceIntermediateStruct, MAX_FACE_COUNT>& inOutBucketArr); void InnerSeparateFacesToBuckets(std::vector<cv::Rect>& faceArr, std::vector<cv::Mat>& faceFrameArr, std::array<FaceIntermediateStruct, MAX_FACE_COUNT>& inOutBucketArr);
}; };