Similarity

This commit is contained in:
Alexander Biryukov 2018-06-01 22:31:55 +05:00
parent 601730e0ce
commit 15febc4651
5 changed files with 39 additions and 114 deletions

View File

@ -9,8 +9,6 @@
#include "main_code.h"
//#define USE_PREDICTION
cv::Point flipVertical(cv::Point point) {
@ -50,10 +48,10 @@ FaceStruct::FaceStruct()
FaceStruct::FaceStruct(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients)
{
FaceStruct();
ApplyPreds(fromPreds, cameraMatrix, distortionCoefficients);
ApplyPreds(fromPreds, 0.0, cameraMatrix, distortionCoefficients);
}
void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients)
void FaceStruct::CalcFromPreds(double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients)
{
float minX = preds[0](0);
float maxX = preds[0](0);
@ -109,13 +107,17 @@ void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoeffic
cv::solvePnP(modelPoints, imagePoints, cameraMatrix, distortionCoefficients, rotationVector, translationVector);
rotationVector.at<double>(0) = rotationVectorSmoothers[0].responsiveAnalogReadSimple(rotationVector.at<double>(0));
rotationVector.at<double>(1) = rotationVectorSmoothers[1].responsiveAnalogReadSimple(rotationVector.at<double>(1));
rotationVector.at<double>(2) = rotationVectorSmoothers[2].responsiveAnalogReadSimple(rotationVector.at<double>(2));
if(similarity > 0.01) {
for (size_t i = 0; i < 3; i++) {
rotationVectorSmoothers[i].reset();
translationVectorSmoothers[i].reset();
}
}
translationVector.at<double>(0) = translationVectorSmoothers[0].responsiveAnalogReadSimple(translationVector.at<double>(0));
translationVector.at<double>(1) = translationVectorSmoothers[1].responsiveAnalogReadSimple(translationVector.at<double>(1));
translationVector.at<double>(2) = translationVectorSmoothers[2].responsiveAnalogReadSimple(translationVector.at<double>(2));
for(size_t i = 0; i < 3; i++) {
rotationVector.at<double>(i) = rotationVectorSmoothers[i].responsiveAnalogReadSimple(rotationVector.at<double>(i));
// translationVector.at<double>(i) = translationVectorSmoothers[i].responsiveAnalogReadSimple(translationVector.at<double>(i));
}
cv::projectPoints(boxModelSrc, rotationVector, translationVector, cameraMatrix, distortionCoefficients, boxModelDst);
@ -128,50 +130,14 @@ void FaceStruct::CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoeffic
cv::Mat outTranslation = cv::Mat(3, 1, CV_64FC1);
cv::decomposeProjectionMatrix(poseMatrix, outIntrinsics, outRotation, outTranslation, cv::noArray(), cv::noArray(), cv::noArray(), eulerAngles);
// std::cout << "(" << std::setprecision(3) << eulerAngles.at<double>(0) << ", " << std::setprecision(3) << eulerAngles.at<double>(1) << std::setprecision(3) << eulerAngles.at<double>(2) << ")" << std::endl;
center = {avgX, avgY};
size = {maxX- minX, maxY - minY};
//center = { centerXSmoother.responsiveAnalogReadSimple(avgX), centerYSmoother.responsiveAnalogReadSimple(avgY) };
//size = { sizeWidthSmoother.responsiveAnalogReadSimple(maxX- minX), sizeHeightSmoother.responsiveAnalogReadSimple(maxY - minY) };
}
void FaceStruct::ApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients)
void FaceStruct::ApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients)
{
InnerApplyPreds(fromPreds);
CalcFromPreds(cameraMatrix, distortionCoefficients);
/*
for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++)
{
preds[i] = preds[i] - center;
}
historicalPreds.push_back(preds);
if (historicalPreds.size() > 4)
{
historicalPreds.erase(historicalPreds.begin());
}
for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++)
{
preds[i] = { 0.f, 0.f };
for (size_t x = 0; x < historicalPreds.size(); x++)
{
preds[i] += historicalPreds[x][i];
}
preds[i] = preds[i] / static_cast<float>(historicalPreds.size());
}
for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++)
{
preds[i] = preds[i] + center;
}*/
CalcFromPreds(similarity, cameraMatrix, distortionCoefficients);
}
void FaceStruct::InnerApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds)
@ -183,16 +149,12 @@ void FaceStruct::InnerApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_CO
}
}
cv::VideoCapture cap;
FaceLandmarkDetector faceLandmarkDetector;
GLuint lastFrameTexture;
std::array<FaceLandmarkStruct, MAX_FACE_COUNT> faceLandmarkArr;
cv::Mat frame;
@ -399,9 +361,6 @@ void TMyApplication::InnerDraw()
void TMyApplication::InnerUpdate(size_t dt)
{
cap >> frame; // get a new frame from camera/video or read image
if (frame.empty())
@ -490,15 +449,11 @@ void TMyApplication::InnerUpdate(size_t dt)
renderImage = renderImage.clone();
#else
cv::Mat renderImage = image;
#endif
//cv::Rect frameRect(cv::Point(), renderImage.size());
float focalLength = renderImage.cols;
cv::Point2f center = cv::Point2f(renderImage.cols / 2, renderImage.rows / 2);
cv::Mat cameraMatrix = (cv::Mat_<float>(3, 3) << focalLength, 0, center.x, 0, focalLength, center.y, 0, 0, 1);
cv::Mat distortionCoefficients = cv::Mat::zeros(4, 1, cv::DataType<float>::type);
cv::Mat cameraMatrix = (cv::Mat_<float>(3, 3) << 9.5434798118121421e+02, 0.0, 640.0, 0.0, 9.5434798118121421e+02, 360.0, 0.0, 0.0, 1.0);
cv::Mat distortionCoefficients = (cv::Mat_<float>(5, 1) << 3.5475494652047600e-02, - 5.9818881494543774e-01, 0.0, 0.0, 1.4642154802454528e+00);
for (size_t i = 0; i < faceLandmarkArr.size(); i++)
{
@ -520,8 +475,10 @@ void TMyApplication::InnerUpdate(size_t dt)
{
if (faceLandmarkArr[index].valid)
{
faceStruct[index].ApplyPreds(faceLandmarkArr[index].landmarkArr, cameraMatrix, distortionCoefficients);
faceStruct[index].ApplyPreds(faceLandmarkArr[index].landmarkArr, faceLandmarkArr[index].similarity, cameraMatrix, distortionCoefficients);
std::ostringstream similarityText;
similarityText << std::setprecision(3) << faceLandmarkArr[index].similarity;
cv::putText(renderImage, similarityText.str(), cv::Point(50, index * 60 + 60), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0, 0, 0));
for (size_t i = 0; i < LANDMARK_POINT_COUNT; i++)
{

View File

@ -33,15 +33,8 @@ struct FaceStruct
SE::Vector2f center;
SE::Vector2f size;
ValueSmoother centerXSmoother;
ValueSmoother centerYSmoother;
ValueSmoother sizeWidthSmoother;
ValueSmoother sizeHeightSmoother;
std::array<SE::Vector2f, LANDMARK_POINT_COUNT> preds;
//std::vector<std::array<SE::Vector2f, LANDMARK_POINT_COUNT>> historicalPreds;
std::vector<cv::Point3f> modelPoints;
cv::Mat rotationVector;
cv::Mat translationVector;
@ -62,9 +55,9 @@ struct FaceStruct
FaceStruct(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients);
void CalcFromPreds(cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients);
void CalcFromPreds(double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficients);
void ApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPred, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficientss);
void ApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPred, double similarity, cv::Mat& cameraMatrix, cv::Mat& distortionCoefficientss);
void InnerApplyPreds(const std::array<cv::Point2f, LANDMARK_POINT_COUNT>& fromPreds);
};

View File

@ -148,7 +148,7 @@
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<Optimization>Disabled</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;TARGET_WIN32;_WIN32_WINNT=0x0501;WIN32_LEAN_AND_MEAN;EIGEN_DONT_ALIGN_STATICALLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>

View File

@ -1,18 +1,13 @@
#include "FaceLandmarkDetector.h"
static const size_t faceTrackerWidth = 300;
static const size_t faceTrackerHeight = 300;
static const double faceTrackerScaleFactor = 1.0;
static const cv::Scalar faceTrackerMeanVal(104.0, 177.0, 123.0);
float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& face2)
{
cv::Point2f oldCenter(face1.x + face1.width*0.5f, face1.y + face1.height*0.5f);
cv::Point2f newCenter(face2.x + face2.width*0.5f, face2.y + face2.height*0.5f);
@ -24,8 +19,14 @@ float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& fac
return squareDistance;
}
double calcSimilarity(const cv::Mat& first, const cv::Mat& second)
{
// assumes that first and second have the same size
double errorL2 = cv::norm(first, second, CV_L2);
// Convert to a reasonable scale, since L2 error is summed across all pixels of the image.
double similarity = errorL2 / static_cast<double>(first.rows * first.cols);
return similarity;
}
FaceLandmarkDetector::FaceLandmarkDetector()
{
@ -45,18 +46,13 @@ FaceLandmarkDetector::FaceLandmarkDetector()
#endif
faceNet = cv::dnn::readNetFromCaffe(faceTrackerConfiguration, faceTrackerBinary);
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
{
landmarkNetArr[i] = cv::dnn::readNetFromCaffe(landmarkTrackerConfiguration, landmarkTrackerBinary);
}
}
FaceLandmarkDetector::~FaceLandmarkDetector()
{
StopTrackProcess();
@ -161,7 +157,6 @@ void FaceLandmarkDetector::FindFaceThreadProc()
if (!localFrame.empty())
{
std::vector<cv::Rect> faces = InnerFindFaces(localFrame);
std::vector<cv::Mat> faceFrames;
//Get rid of faces that are too big
@ -193,7 +188,6 @@ void FaceLandmarkDetector::FindFaceThreadProc()
localFaceRectArr[i].frameIndex = localFrameIndex;
}
}
}
@ -214,19 +208,15 @@ void FaceLandmarkDetector::FindFaceThreadProc()
localFrameIndex = syncLastFrameIndex;
localStop = syncStop;
}
}
void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
{
FaceIntermediateStruct localInputFace;
FaceLandmarkStruct localOutputLandmarks;
bool localStop = false;
while (!localStop)
{
if (localInputFace.valid)
@ -258,6 +248,7 @@ void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
localOutputLandmarks.faceRect = localInputFace.faceRect;
localOutputLandmarks.frameIndex = localInputFace.frameIndex;
localOutputLandmarks.similarity = localInputFace.similarity;
localOutputLandmarks.valid = true;
}
else
@ -268,17 +259,11 @@ void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
//Sync area
std::unique_lock<std::mutex> lock(syncMutex);
// OUT to main thread >>>>
syncFaceLandmarkArr[threadIndex] = localOutputLandmarks;
bool doLoop = true;
while (doLoop)
@ -315,7 +300,6 @@ void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
}
std::vector<cv::Rect> FaceLandmarkDetector::InnerFindFaces(cv::Mat frame)
{
if (frame.channels() == 4)
@ -356,8 +340,6 @@ std::vector<cv::Rect> FaceLandmarkDetector::InnerFindFaces(cv::Mat frame)
object.width = object.width*1.2f;
object.height = object.height*1.2f;
faces.push_back(object);
}
}
@ -468,6 +450,7 @@ void FaceLandmarkDetector::InnerSeparateFacesToBuckets(std::vector<cv::Rect>& fa
}
}
inOutBucketArr[j].similarity = calcSimilarity(inOutBucketArr[j].faceFrame, facesBelongsToSpace[j][lastIndex].second);
inOutBucketArr[j].faceRect = facesBelongsToSpace[j][lastIndex].first;
inOutBucketArr[j].faceFrame = facesBelongsToSpace[j][lastIndex].second;

View File

@ -16,23 +16,17 @@
#include <opencv2/dnn.hpp>
#include <thread>
#include <queue>
#include <mutex>
#include <condition_variable>
#define USE_3D_LANDMARKS
//#include "boost/thread.hpp"
constexpr size_t LANDMARK_POINT_COUNT = 68;
constexpr size_t MAX_FACE_COUNT = 5;
struct FaceIntermediateStruct
{
bool valid = false;
@ -41,7 +35,7 @@ struct FaceIntermediateStruct
cv::Mat faceFrame;
int frameIndex;
double similarity = 0.0;
};
@ -53,6 +47,7 @@ struct FaceLandmarkStruct
std::array<cv::Point2f, LANDMARK_POINT_COUNT> landmarkArr;
int frameIndex;
double similarity = 0.0;
};
@ -105,7 +100,4 @@ protected:
cv::Mat InnerCutFaceFrame(cv::Mat frame, cv::Rect faceRect);
void InnerSeparateFacesToBuckets(std::vector<cv::Rect>& faceArr, std::vector<cv::Mat>& faceFrameArr, std::array<FaceIntermediateStruct, MAX_FACE_COUNT>& inOutBucketArr);
};