double-hit-balls/windows/DoubleHitBalls-win/DoubleHitBalls-win/FaceLandmarkDetector.cpp
2018-06-01 22:31:55 +05:00

489 lines
12 KiB
C++
Executable File

#include "FaceLandmarkDetector.h"
static const size_t faceTrackerWidth = 300;
static const size_t faceTrackerHeight = 300;
static const double faceTrackerScaleFactor = 1.0;
static const cv::Scalar faceTrackerMeanVal(104.0, 177.0, 123.0);
float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& face2)
{
cv::Point2f oldCenter(face1.x + face1.width*0.5f, face1.y + face1.height*0.5f);
cv::Point2f newCenter(face2.x + face2.width*0.5f, face2.y + face2.height*0.5f);
float distanceX = newCenter.x - oldCenter.x;
float distanceY = newCenter.y - oldCenter.y;
float squareDistance = distanceX * distanceX + distanceY * distanceY;
return squareDistance;
}
double calcSimilarity(const cv::Mat& first, const cv::Mat& second)
{
// assumes that first and second have the same size
double errorL2 = cv::norm(first, second, CV_L2);
// Convert to a reasonable scale, since L2 error is summed across all pixels of the image.
double similarity = errorL2 / static_cast<double>(first.rows * first.cols);
return similarity;
}
FaceLandmarkDetector::FaceLandmarkDetector()
{
static const cv::String faceTrackerConfiguration = "deploy.prototxt";
static const cv::String faceTrackerBinary = "res10_300x300_ssd_iter_140000.caffemodel";
#ifdef USE_3D_LANDMARKS
static const cv::String landmarkTrackerConfiguration = "fa_deploy.prototxt";
static const cv::String landmarkTrackerBinary = "fa__iter_1400000.caffemodel";
#else
static const cv::String landmarkTrackerConfiguration = "landmark_deploy.prototxt";
static const cv::String landmarkTrackerBinary = "VanFace.caffemodel";
#endif
faceNet = cv::dnn::readNetFromCaffe(faceTrackerConfiguration, faceTrackerBinary);
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
{
landmarkNetArr[i] = cv::dnn::readNetFromCaffe(landmarkTrackerConfiguration, landmarkTrackerBinary);
}
}
FaceLandmarkDetector::~FaceLandmarkDetector()
{
StopTrackProcess();
}
void FaceLandmarkDetector::StartTrackProcess()
{
if (started)
{
return;
}
//Some init setup
syncStop = false;
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
{
syncFaceRectArr[i].valid = false;
syncFaceLandmarkArr[i].valid = false;
}
syncLastFrame = cv::Mat();
//Start threads
findFaceThread = std::thread([this]() { this->FindFaceThreadProc(); });
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
{
landmarkThreadArr[i] = std::thread([this, i]() { this->FindLandmarkThreadProc(i); });
}
started = true;
}
void FaceLandmarkDetector::StopTrackProcess()
{
if (!started)
{
return;
}
//Do not remove braces
{
std::unique_lock<std::mutex> lock(syncMutex);
syncStop = true;
syncFaceRectCond.notify_all();
}
findFaceThread.join();
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
{
landmarkThreadArr[i].join();
}
started = false;
}
std::array<FaceLandmarkStruct, MAX_FACE_COUNT> FaceLandmarkDetector::GetFaceLandmarks(cv::Mat frame, int frameIndex)
{
std::array<FaceLandmarkStruct, MAX_FACE_COUNT> result;
//Sync area
std::unique_lock<std::mutex> lock(syncMutex);
// OUT to face rect thread >>>>
syncLastFrame = frame;
syncLastFrameIndex = frameIndex;
// IN from landmark threads <<<<
result = syncFaceLandmarkArr;
return result;
}
void FaceLandmarkDetector::FindFaceThreadProc()
{
cv::Mat localFrame;
std::array<FaceIntermediateStruct, MAX_FACE_COUNT> localFaceRectArr;
bool localStop = false;
int localFrameIndex = 0;
while (!syncStop)
{
if (!localFrame.empty())
{
std::vector<cv::Rect> faces = InnerFindFaces(localFrame);
std::vector<cv::Mat> faceFrames;
//Get rid of faces that are too big
for (size_t i = 0; i < faces.size(); )
{
if (faces[i].x < 0 || faces[i].y < 0 || (faces[i].x + faces[i].width >= localFrame.cols) || (faces[i].y + faces[i].height >= localFrame.rows))
{
faces.erase(faces.begin() + i);
}
else
{
i++;
}
}
//Cut faces from frame
for (size_t i = 0; i <faces.size(); i++)
{
faceFrames.push_back(InnerCutFaceFrame(localFrame, faces[i]));
}
InnerSeparateFacesToBuckets(faces, faceFrames, localFaceRectArr);
for (size_t i = 0; i < localFaceRectArr.size(); i++)
{
if (localFaceRectArr[i].valid)
{
localFaceRectArr[i].frameIndex = localFrameIndex;
}
}
}
//Sync area
std::unique_lock<std::mutex> lock(syncMutex);
// OUT to landmark Threads >>>>
syncFaceRectArr = localFaceRectArr;
syncFaceRectCond.notify_all();
// IN from main thread <<<<
localFrame = syncLastFrame;
localFrameIndex = syncLastFrameIndex;
localStop = syncStop;
}
}
void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
{
FaceIntermediateStruct localInputFace;
FaceLandmarkStruct localOutputLandmarks;
bool localStop = false;
while (!localStop)
{
if (localInputFace.valid)
{
#ifdef USE_3D_LANDMARKS
cv::Mat landmarkInputBlob = cv::dnn::blobFromImage(localInputFace.faceFrame, 1.0, cv::Size(40, 40), cv::Scalar(), false, false);
#else
cv::Mat landmarkInputBlob = cv::dnn::blobFromImage(localInputFace.faceFrame, 1.0, cv::Size(60, 60), cv::Scalar(), false, false);
#endif
landmarkInputBlob.reshape(0, std::vector<int>{ 1, 1, localInputFace.faceFrame.rows, localInputFace.faceFrame.cols });
landmarkNetArr[threadIndex].setInput(landmarkInputBlob);
#ifdef USE_3D_LANDMARKS
cv::Mat featureBlob = landmarkNetArr[threadIndex].forward("Dense2");
#else
cv::Mat featureBlob = landmarkNetArr[threadIndex].forward("Dense3");
#endif
for (int i = 0; i < featureBlob.cols / 2; i++)
{
cv::Point landmarkPoint;
landmarkPoint.x = int(featureBlob.at<float>(0, 2 * i) * (localInputFace.faceRect.width) + localInputFace.faceRect.x);
landmarkPoint.y = int(featureBlob.at<float>(0, 2 * i + 1) * (localInputFace.faceRect.height) + localInputFace.faceRect.y);
localOutputLandmarks.landmarkArr[i] = landmarkPoint;
}
localOutputLandmarks.faceRect = localInputFace.faceRect;
localOutputLandmarks.frameIndex = localInputFace.frameIndex;
localOutputLandmarks.similarity = localInputFace.similarity;
localOutputLandmarks.valid = true;
}
else
{
localOutputLandmarks.valid = false;
}
//Sync area
std::unique_lock<std::mutex> lock(syncMutex);
// OUT to main thread >>>>
syncFaceLandmarkArr[threadIndex] = localOutputLandmarks;
bool doLoop = true;
while (doLoop)
{
//IN from face rect thread <<<<
localInputFace = syncFaceRectArr[threadIndex];
localStop = syncStop;
//Time to quit
if (localStop)
{
doLoop = false;
}
if (!localInputFace.valid)
{
syncFaceLandmarkArr[threadIndex].valid = false;
syncFaceRectCond.wait(lock);
}
else
{
doLoop = false;
}
}
}
}
std::vector<cv::Rect> FaceLandmarkDetector::InnerFindFaces(cv::Mat frame)
{
if (frame.channels() == 4)
{
cvtColor(frame, frame, cv::COLOR_BGRA2BGR);
}
cv::Mat inputBlob = cv::dnn::blobFromImage(frame, faceTrackerScaleFactor, cv::Size(faceTrackerWidth, faceTrackerHeight), faceTrackerMeanVal, false, false);
faceNet.setInput(inputBlob, "data");
cv::Mat detection = faceNet.forward("detection_out");
cv::Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
float confidenceThreshold = 0.5f;
std::vector<cv::Rect> faces;
for (int i = 0; i < detectionMat.rows; i++)
{
float confidence = detectionMat.at<float>(i, 2);
if (confidence > confidenceThreshold)
{
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);
cv::Rect object((int)xLeftBottom, (int)yLeftBottom,
(int)(xRightTop - xLeftBottom),
(int)(yRightTop - yLeftBottom));
object.x = object.x - object.width*0.1f;
object.y = object.y - object.height*0.1f;
object.width = object.width*1.2f;
object.height = object.height*1.2f;
faces.push_back(object);
}
}
return faces;
}
cv::Mat FaceLandmarkDetector::InnerCutFaceFrame(cv::Mat frame, cv::Rect faceRect)
{
cv::Mat srcROI(frame, faceRect);
cv::Mat img2;
cvtColor(srcROI, img2, CV_RGB2GRAY);
img2.convertTo(img2, CV_32FC1);
#ifdef USE_3D_LANDMARKS
cv::Size dsize = cv::Size(40, 40);
#else
cv::Size dsize = cv::Size(60, 60);
#endif
cv::Mat img3 = cv::Mat(dsize, CV_32FC1);
cv::resize(img2, img3, dsize, 0, 0, cv::INTER_CUBIC);
cv::Mat tmp_m, tmp_sd;
double m = 0, sd = 0;
meanStdDev(img3, tmp_m, tmp_sd);
m = tmp_m.at<double>(0, 0);
sd = tmp_sd.at<double>(0, 0);
img3 = (img3 - m) / (0.000001 + sd);
return img3;
}
void FaceLandmarkDetector::InnerSeparateFacesToBuckets(std::vector<cv::Rect>& faceArr, std::vector<cv::Mat>& faceFrameArr, std::array<FaceIntermediateStruct, MAX_FACE_COUNT>& inOutBucketArr)
{
static std::array<std::vector<std::pair<cv::Rect, cv::Mat>>, MAX_FACE_COUNT> facesBelongsToSpace;
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
{
facesBelongsToSpace[i].clear();
}
//Distribute current faces to nearest old positions
for (size_t i = 0; i < faceArr.size(); )
{
float lastDistance = 1000000.f;
int lastIndex = -1;
for (size_t j = 0; j < MAX_FACE_COUNT; j++)
{
if (inOutBucketArr[j].valid)
{
float newDistance = calcSquaredDistanceBetweenFaces(inOutBucketArr[j].faceRect, faceArr[i]);
if (newDistance < lastDistance)
{
lastDistance = newDistance;
lastIndex = j;
}
}
}
if (lastIndex != -1)
{
facesBelongsToSpace[lastIndex].push_back(std::make_pair(faceArr[i], faceFrameArr[i]));
faceArr.erase(faceArr.begin() + i);
faceFrameArr.erase(faceFrameArr.begin() + i);
}
else
{
i++;
}
}
//Mark "free spots" to those that don't have any face belong to
for (size_t j = 0; j < MAX_FACE_COUNT; j++)
{
if (inOutBucketArr[j].valid)
{
if (facesBelongsToSpace[j].size() == 0)
{
inOutBucketArr[j].valid = false;
}
}
}
//Find nearest face in each space - others return back to pool
for (size_t j = 0; j < MAX_FACE_COUNT; j++)
{
if (inOutBucketArr[j].valid)
{
float lastDistance = calcSquaredDistanceBetweenFaces(inOutBucketArr[j].faceRect, facesBelongsToSpace[j][0].first);
int lastIndex = 0;
for (size_t i = 1; i < facesBelongsToSpace[j].size(); i++)
{
float newDistance = calcSquaredDistanceBetweenFaces(inOutBucketArr[j].faceRect, facesBelongsToSpace[j][i].first);
if (newDistance < lastDistance)
{
lastDistance = newDistance;
lastIndex = i;
}
}
inOutBucketArr[j].similarity = calcSimilarity(inOutBucketArr[j].faceFrame, facesBelongsToSpace[j][lastIndex].second);
inOutBucketArr[j].faceRect = facesBelongsToSpace[j][lastIndex].first;
inOutBucketArr[j].faceFrame = facesBelongsToSpace[j][lastIndex].second;
facesBelongsToSpace[j].erase(facesBelongsToSpace[j].begin() + lastIndex);
for (size_t k = 0; k < facesBelongsToSpace[j].size(); k++)
{
faceArr.push_back(facesBelongsToSpace[j][k].first);
faceFrameArr.push_back(facesBelongsToSpace[j][k].second);
}
}
}
//Distribute faces from pool to empty slots
for (size_t j = 0; j < MAX_FACE_COUNT; j++)
{
if (!inOutBucketArr[j].valid)
{
if (faceArr.size() > 0)
{
inOutBucketArr[j].valid = true;
inOutBucketArr[j].faceRect = faceArr[0];
inOutBucketArr[j].faceFrame = faceFrameArr[0];
faceArr.erase(faceArr.begin());
faceFrameArr.erase(faceFrameArr.begin());
}
}
}
}