489 lines
12 KiB
C++
Executable File
489 lines
12 KiB
C++
Executable File
#include "FaceLandmarkDetector.h"
|
|
|
|
static const size_t faceTrackerWidth = 300;
|
|
static const size_t faceTrackerHeight = 300;
|
|
|
|
static const double faceTrackerScaleFactor = 1.0;
|
|
static const cv::Scalar faceTrackerMeanVal(104.0, 177.0, 123.0);
|
|
|
|
float calcSquaredDistanceBetweenFaces(const cv::Rect& face1, const cv::Rect& face2)
|
|
{
|
|
cv::Point2f oldCenter(face1.x + face1.width*0.5f, face1.y + face1.height*0.5f);
|
|
cv::Point2f newCenter(face2.x + face2.width*0.5f, face2.y + face2.height*0.5f);
|
|
|
|
float distanceX = newCenter.x - oldCenter.x;
|
|
float distanceY = newCenter.y - oldCenter.y;
|
|
|
|
float squareDistance = distanceX * distanceX + distanceY * distanceY;
|
|
|
|
return squareDistance;
|
|
}
|
|
|
|
double calcSimilarity(const cv::Mat& first, const cv::Mat& second)
|
|
{
|
|
// assumes that first and second have the same size
|
|
double errorL2 = cv::norm(first, second, CV_L2);
|
|
// Convert to a reasonable scale, since L2 error is summed across all pixels of the image.
|
|
double similarity = errorL2 / static_cast<double>(first.rows * first.cols);
|
|
return similarity;
|
|
}
|
|
|
|
FaceLandmarkDetector::FaceLandmarkDetector()
|
|
{
|
|
|
|
|
|
static const cv::String faceTrackerConfiguration = "deploy.prototxt";
|
|
static const cv::String faceTrackerBinary = "res10_300x300_ssd_iter_140000.caffemodel";
|
|
|
|
|
|
#ifdef USE_3D_LANDMARKS
|
|
static const cv::String landmarkTrackerConfiguration = "fa_deploy.prototxt";
|
|
static const cv::String landmarkTrackerBinary = "fa__iter_1400000.caffemodel";
|
|
|
|
#else
|
|
static const cv::String landmarkTrackerConfiguration = "landmark_deploy.prototxt";
|
|
static const cv::String landmarkTrackerBinary = "VanFace.caffemodel";
|
|
|
|
#endif
|
|
|
|
faceNet = cv::dnn::readNetFromCaffe(faceTrackerConfiguration, faceTrackerBinary);
|
|
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
|
|
{
|
|
landmarkNetArr[i] = cv::dnn::readNetFromCaffe(landmarkTrackerConfiguration, landmarkTrackerBinary);
|
|
}
|
|
}
|
|
|
|
FaceLandmarkDetector::~FaceLandmarkDetector()
|
|
{
|
|
StopTrackProcess();
|
|
}
|
|
|
|
|
|
void FaceLandmarkDetector::StartTrackProcess()
|
|
{
|
|
if (started)
|
|
{
|
|
return;
|
|
}
|
|
|
|
|
|
//Some init setup
|
|
|
|
syncStop = false;
|
|
|
|
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
|
|
{
|
|
syncFaceRectArr[i].valid = false;
|
|
syncFaceLandmarkArr[i].valid = false;
|
|
}
|
|
|
|
syncLastFrame = cv::Mat();
|
|
|
|
|
|
//Start threads
|
|
|
|
findFaceThread = std::thread([this]() { this->FindFaceThreadProc(); });
|
|
|
|
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
|
|
{
|
|
landmarkThreadArr[i] = std::thread([this, i]() { this->FindLandmarkThreadProc(i); });
|
|
}
|
|
|
|
started = true;
|
|
|
|
}
|
|
|
|
void FaceLandmarkDetector::StopTrackProcess()
|
|
{
|
|
if (!started)
|
|
{
|
|
return;
|
|
}
|
|
|
|
//Do not remove braces
|
|
{
|
|
std::unique_lock<std::mutex> lock(syncMutex);
|
|
|
|
syncStop = true;
|
|
|
|
syncFaceRectCond.notify_all();
|
|
}
|
|
|
|
|
|
findFaceThread.join();
|
|
|
|
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
|
|
{
|
|
landmarkThreadArr[i].join();
|
|
}
|
|
|
|
started = false;
|
|
|
|
}
|
|
|
|
std::array<FaceLandmarkStruct, MAX_FACE_COUNT> FaceLandmarkDetector::GetFaceLandmarks(cv::Mat frame, int frameIndex)
|
|
{
|
|
|
|
std::array<FaceLandmarkStruct, MAX_FACE_COUNT> result;
|
|
//Sync area
|
|
|
|
std::unique_lock<std::mutex> lock(syncMutex);
|
|
|
|
// OUT to face rect thread >>>>
|
|
syncLastFrame = frame;
|
|
|
|
syncLastFrameIndex = frameIndex;
|
|
|
|
// IN from landmark threads <<<<
|
|
result = syncFaceLandmarkArr;
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
void FaceLandmarkDetector::FindFaceThreadProc()
|
|
{
|
|
cv::Mat localFrame;
|
|
std::array<FaceIntermediateStruct, MAX_FACE_COUNT> localFaceRectArr;
|
|
|
|
bool localStop = false;
|
|
|
|
int localFrameIndex = 0;
|
|
|
|
|
|
while (!syncStop)
|
|
{
|
|
|
|
if (!localFrame.empty())
|
|
{
|
|
std::vector<cv::Rect> faces = InnerFindFaces(localFrame);
|
|
std::vector<cv::Mat> faceFrames;
|
|
|
|
//Get rid of faces that are too big
|
|
for (size_t i = 0; i < faces.size(); )
|
|
{
|
|
if (faces[i].x < 0 || faces[i].y < 0 || (faces[i].x + faces[i].width >= localFrame.cols) || (faces[i].y + faces[i].height >= localFrame.rows))
|
|
{
|
|
faces.erase(faces.begin() + i);
|
|
}
|
|
else
|
|
{
|
|
i++;
|
|
}
|
|
}
|
|
|
|
//Cut faces from frame
|
|
for (size_t i = 0; i <faces.size(); i++)
|
|
{
|
|
faceFrames.push_back(InnerCutFaceFrame(localFrame, faces[i]));
|
|
}
|
|
|
|
InnerSeparateFacesToBuckets(faces, faceFrames, localFaceRectArr);
|
|
|
|
|
|
for (size_t i = 0; i < localFaceRectArr.size(); i++)
|
|
{
|
|
if (localFaceRectArr[i].valid)
|
|
{
|
|
localFaceRectArr[i].frameIndex = localFrameIndex;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//Sync area
|
|
|
|
std::unique_lock<std::mutex> lock(syncMutex);
|
|
|
|
// OUT to landmark Threads >>>>
|
|
syncFaceRectArr = localFaceRectArr;
|
|
|
|
syncFaceRectCond.notify_all();
|
|
|
|
|
|
// IN from main thread <<<<
|
|
localFrame = syncLastFrame;
|
|
|
|
localFrameIndex = syncLastFrameIndex;
|
|
|
|
localStop = syncStop;
|
|
}
|
|
}
|
|
|
|
void FaceLandmarkDetector::FindLandmarkThreadProc(size_t threadIndex)
|
|
{
|
|
FaceIntermediateStruct localInputFace;
|
|
FaceLandmarkStruct localOutputLandmarks;
|
|
|
|
bool localStop = false;
|
|
while (!localStop)
|
|
{
|
|
if (localInputFace.valid)
|
|
{
|
|
#ifdef USE_3D_LANDMARKS
|
|
cv::Mat landmarkInputBlob = cv::dnn::blobFromImage(localInputFace.faceFrame, 1.0, cv::Size(40, 40), cv::Scalar(), false, false);
|
|
#else
|
|
cv::Mat landmarkInputBlob = cv::dnn::blobFromImage(localInputFace.faceFrame, 1.0, cv::Size(60, 60), cv::Scalar(), false, false);
|
|
#endif
|
|
|
|
landmarkInputBlob.reshape(0, std::vector<int>{ 1, 1, localInputFace.faceFrame.rows, localInputFace.faceFrame.cols });
|
|
|
|
landmarkNetArr[threadIndex].setInput(landmarkInputBlob);
|
|
#ifdef USE_3D_LANDMARKS
|
|
cv::Mat featureBlob = landmarkNetArr[threadIndex].forward("Dense2");
|
|
#else
|
|
cv::Mat featureBlob = landmarkNetArr[threadIndex].forward("Dense3");
|
|
#endif
|
|
|
|
for (int i = 0; i < featureBlob.cols / 2; i++)
|
|
{
|
|
cv::Point landmarkPoint;
|
|
|
|
landmarkPoint.x = int(featureBlob.at<float>(0, 2 * i) * (localInputFace.faceRect.width) + localInputFace.faceRect.x);
|
|
landmarkPoint.y = int(featureBlob.at<float>(0, 2 * i + 1) * (localInputFace.faceRect.height) + localInputFace.faceRect.y);
|
|
localOutputLandmarks.landmarkArr[i] = landmarkPoint;
|
|
|
|
}
|
|
|
|
localOutputLandmarks.faceRect = localInputFace.faceRect;
|
|
localOutputLandmarks.frameIndex = localInputFace.frameIndex;
|
|
localOutputLandmarks.similarity = localInputFace.similarity;
|
|
localOutputLandmarks.valid = true;
|
|
}
|
|
else
|
|
{
|
|
localOutputLandmarks.valid = false;
|
|
}
|
|
|
|
|
|
|
|
//Sync area
|
|
std::unique_lock<std::mutex> lock(syncMutex);
|
|
|
|
// OUT to main thread >>>>
|
|
syncFaceLandmarkArr[threadIndex] = localOutputLandmarks;
|
|
|
|
bool doLoop = true;
|
|
|
|
while (doLoop)
|
|
{
|
|
|
|
//IN from face rect thread <<<<
|
|
|
|
localInputFace = syncFaceRectArr[threadIndex];
|
|
|
|
|
|
localStop = syncStop;
|
|
|
|
//Time to quit
|
|
if (localStop)
|
|
{
|
|
doLoop = false;
|
|
}
|
|
|
|
if (!localInputFace.valid)
|
|
{
|
|
|
|
syncFaceLandmarkArr[threadIndex].valid = false;
|
|
|
|
syncFaceRectCond.wait(lock);
|
|
|
|
}
|
|
else
|
|
{
|
|
doLoop = false;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
|
|
std::vector<cv::Rect> FaceLandmarkDetector::InnerFindFaces(cv::Mat frame)
|
|
{
|
|
if (frame.channels() == 4)
|
|
{
|
|
cvtColor(frame, frame, cv::COLOR_BGRA2BGR);
|
|
}
|
|
|
|
cv::Mat inputBlob = cv::dnn::blobFromImage(frame, faceTrackerScaleFactor, cv::Size(faceTrackerWidth, faceTrackerHeight), faceTrackerMeanVal, false, false);
|
|
|
|
faceNet.setInput(inputBlob, "data");
|
|
|
|
cv::Mat detection = faceNet.forward("detection_out");
|
|
|
|
cv::Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
|
|
|
|
float confidenceThreshold = 0.5f;
|
|
|
|
std::vector<cv::Rect> faces;
|
|
|
|
for (int i = 0; i < detectionMat.rows; i++)
|
|
{
|
|
float confidence = detectionMat.at<float>(i, 2);
|
|
|
|
if (confidence > confidenceThreshold)
|
|
{
|
|
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * frame.cols);
|
|
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * frame.rows);
|
|
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * frame.cols);
|
|
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * frame.rows);
|
|
|
|
cv::Rect object((int)xLeftBottom, (int)yLeftBottom,
|
|
(int)(xRightTop - xLeftBottom),
|
|
(int)(yRightTop - yLeftBottom));
|
|
|
|
|
|
object.x = object.x - object.width*0.1f;
|
|
object.y = object.y - object.height*0.1f;
|
|
object.width = object.width*1.2f;
|
|
object.height = object.height*1.2f;
|
|
|
|
faces.push_back(object);
|
|
}
|
|
}
|
|
|
|
return faces;
|
|
|
|
}
|
|
|
|
cv::Mat FaceLandmarkDetector::InnerCutFaceFrame(cv::Mat frame, cv::Rect faceRect)
|
|
{
|
|
|
|
|
|
cv::Mat srcROI(frame, faceRect);
|
|
|
|
cv::Mat img2;
|
|
cvtColor(srcROI, img2, CV_RGB2GRAY);
|
|
|
|
img2.convertTo(img2, CV_32FC1);
|
|
#ifdef USE_3D_LANDMARKS
|
|
cv::Size dsize = cv::Size(40, 40);
|
|
#else
|
|
cv::Size dsize = cv::Size(60, 60);
|
|
#endif
|
|
|
|
cv::Mat img3 = cv::Mat(dsize, CV_32FC1);
|
|
cv::resize(img2, img3, dsize, 0, 0, cv::INTER_CUBIC);
|
|
|
|
cv::Mat tmp_m, tmp_sd;
|
|
double m = 0, sd = 0;
|
|
meanStdDev(img3, tmp_m, tmp_sd);
|
|
m = tmp_m.at<double>(0, 0);
|
|
sd = tmp_sd.at<double>(0, 0);
|
|
|
|
img3 = (img3 - m) / (0.000001 + sd);
|
|
|
|
return img3;
|
|
}
|
|
|
|
void FaceLandmarkDetector::InnerSeparateFacesToBuckets(std::vector<cv::Rect>& faceArr, std::vector<cv::Mat>& faceFrameArr, std::array<FaceIntermediateStruct, MAX_FACE_COUNT>& inOutBucketArr)
|
|
{
|
|
static std::array<std::vector<std::pair<cv::Rect, cv::Mat>>, MAX_FACE_COUNT> facesBelongsToSpace;
|
|
|
|
|
|
for (size_t i = 0; i < MAX_FACE_COUNT; i++)
|
|
{
|
|
facesBelongsToSpace[i].clear();
|
|
}
|
|
|
|
|
|
//Distribute current faces to nearest old positions
|
|
for (size_t i = 0; i < faceArr.size(); )
|
|
{
|
|
float lastDistance = 1000000.f;
|
|
int lastIndex = -1;
|
|
|
|
for (size_t j = 0; j < MAX_FACE_COUNT; j++)
|
|
{
|
|
if (inOutBucketArr[j].valid)
|
|
{
|
|
float newDistance = calcSquaredDistanceBetweenFaces(inOutBucketArr[j].faceRect, faceArr[i]);
|
|
if (newDistance < lastDistance)
|
|
{
|
|
lastDistance = newDistance;
|
|
lastIndex = j;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lastIndex != -1)
|
|
{
|
|
facesBelongsToSpace[lastIndex].push_back(std::make_pair(faceArr[i], faceFrameArr[i]));
|
|
faceArr.erase(faceArr.begin() + i);
|
|
faceFrameArr.erase(faceFrameArr.begin() + i);
|
|
}
|
|
else
|
|
{
|
|
i++;
|
|
}
|
|
}
|
|
|
|
//Mark "free spots" to those that don't have any face belong to
|
|
for (size_t j = 0; j < MAX_FACE_COUNT; j++)
|
|
{
|
|
if (inOutBucketArr[j].valid)
|
|
{
|
|
if (facesBelongsToSpace[j].size() == 0)
|
|
{
|
|
inOutBucketArr[j].valid = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
//Find nearest face in each space - others return back to pool
|
|
for (size_t j = 0; j < MAX_FACE_COUNT; j++)
|
|
{
|
|
if (inOutBucketArr[j].valid)
|
|
{
|
|
float lastDistance = calcSquaredDistanceBetweenFaces(inOutBucketArr[j].faceRect, facesBelongsToSpace[j][0].first);
|
|
int lastIndex = 0;
|
|
|
|
for (size_t i = 1; i < facesBelongsToSpace[j].size(); i++)
|
|
{
|
|
float newDistance = calcSquaredDistanceBetweenFaces(inOutBucketArr[j].faceRect, facesBelongsToSpace[j][i].first);
|
|
if (newDistance < lastDistance)
|
|
{
|
|
lastDistance = newDistance;
|
|
lastIndex = i;
|
|
}
|
|
}
|
|
|
|
inOutBucketArr[j].similarity = calcSimilarity(inOutBucketArr[j].faceFrame, facesBelongsToSpace[j][lastIndex].second);
|
|
inOutBucketArr[j].faceRect = facesBelongsToSpace[j][lastIndex].first;
|
|
inOutBucketArr[j].faceFrame = facesBelongsToSpace[j][lastIndex].second;
|
|
|
|
|
|
facesBelongsToSpace[j].erase(facesBelongsToSpace[j].begin() + lastIndex);
|
|
|
|
for (size_t k = 0; k < facesBelongsToSpace[j].size(); k++)
|
|
{
|
|
faceArr.push_back(facesBelongsToSpace[j][k].first);
|
|
faceFrameArr.push_back(facesBelongsToSpace[j][k].second);
|
|
}
|
|
}
|
|
}
|
|
|
|
//Distribute faces from pool to empty slots
|
|
for (size_t j = 0; j < MAX_FACE_COUNT; j++)
|
|
{
|
|
if (!inOutBucketArr[j].valid)
|
|
{
|
|
|
|
if (faceArr.size() > 0)
|
|
{
|
|
inOutBucketArr[j].valid = true;
|
|
|
|
inOutBucketArr[j].faceRect = faceArr[0];
|
|
inOutBucketArr[j].faceFrame = faceFrameArr[0];
|
|
|
|
|
|
faceArr.erase(faceArr.begin());
|
|
faceFrameArr.erase(faceFrameArr.begin());
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
} |