Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement the Chinese Whispers clustering algorithm in PHP code. #691

Merged
merged 1 commit into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
matrix:
php-versions: ['8.1']
databases: ['mysql']
server-versions: ['stable26']
server-versions: ['stable27']

name: php${{ matrix.php-versions }}-${{ matrix.databases }}-${{ matrix.server-versions }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/phpunit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
matrix:
php-versions: ['8.0', '8.1', '8.2']
databases: ['sqlite', 'mysql', 'pgsql']
server-versions: ['stable26']
server-versions: ['stable27']

name: php${{ matrix.php-versions }}-${{ matrix.databases }}-${{ matrix.server-versions }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/static-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
ocp-version: ['dev-stable26']
ocp-version: ['dev-stable27']
name: Nextcloud ${{ matrix.ocp-version }}
steps:
- name: Checkout
Expand Down
6 changes: 2 additions & 4 deletions appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
- **🚀 Build your own thing:** FaceRecognition app is just a basic building block. Through FaceRecognition API, you can build your advanced scenarios - automatically add tags to images, connect contacts and persons, share images from specific person… We want to hear your ideas!
]]>
</description>
<version>0.9.20</version>
<version>0.9.30</version>
<licence>agpl</licence>
<author>Matias De lellis</author>
<author>Branko Kokanovic</author>
Expand All @@ -34,9 +34,7 @@
<screenshot>https://matiasdelellis.github.io/img/facerecognition/facerecognition-assign-initial-name.jpeg</screenshot>
<dependencies>
<php min-version="8.0" max-version="8.2" />
<lib>pdlib</lib>
<lib>bz2</lib>
<nextcloud min-version="26" max-version="27"/>
<nextcloud min-version="27" max-version="27"/>
</dependencies>
<repair-steps>
<uninstall>
Expand Down
14 changes: 4 additions & 10 deletions lib/BackgroundJob/Tasks/CheckRequirementsTask.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ public function execute(FaceRecognitionContext $context) {
$phpMemory = MemoryLimits::getPhpMemory();
$this->logDebug("PHP Memory Limit: " . ($phpMemory > 0 ? $phpMemory : "Unknown"));

$this->logDebug("Clustering backend: " . (Requirements::pdlibLoaded() ? "pdlib" : "PHP (Not recommended."));

if ($this->imaginaryHelper->isEnabled()) {
$this->logDebug("Backend of images: Imaginary");
$this->logDebug("Image Backend: Imaginary");
$version = $this->imaginaryHelper->getVersion();
if ($version) {
$this->logDebug("Imaginary version: " . $version);
Expand All @@ -104,15 +106,7 @@ public function execute(FaceRecognitionContext $context) {
return false;
}
} else {
$this->logDebug("Backend of images: Imagick");
}

if (!Requirements::pdlibLoaded()) {
$error_message =
"The PDlib PHP extension is not loaded. Cannot continue without it." .
"Please read the documentation again about how to install the application: https://github.com/matiasdelellis/facerecognition/wiki/Installation";
$this->logInfo($error_message);
return false;
$this->logDebug("Image Backend: Imagick");
}

if (!Requirements::hasEnoughMemory()) {
Expand Down
32 changes: 25 additions & 7 deletions lib/BackgroundJob/Tasks/CreateClustersTask.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php
/**
* @copyright Copyright (c) 2017-2020 Matias De lellis <mati86dl@gmail.com>
* @copyright Copyright (c) 2017-2023 Matias De lellis <mati86dl@gmail.com>
* @copyright Copyright (c) 2018, Branko Kokanovic <branko@kokanovic.org>
*
* @author Branko Kokanovic <branko@kokanovic.org>
Expand Down Expand Up @@ -33,6 +33,9 @@
use OCA\FaceRecognition\Db\PersonMapper;

use OCA\FaceRecognition\Helper\Euclidean;
use OCA\FaceRecognition\Helper\Requirements;

use OCA\FaceRecognition\Clusterer\ChineseWhispers;

use OCA\FaceRecognition\Service\SettingsService;
/**
Expand Down Expand Up @@ -282,10 +285,9 @@ private function getNewClusters(array $faces): array {
// Clustering parameters
$sensitivity = $this->settingsService->getSensitivity();

// Create edges for chinese whispers
$edges = array();

if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) {
if (Requirements::pdlibLoaded()) {
// Create edges (neighbors) for Chinese Whispers
$edges = array();
$faces_count = count($faces);
for ($i = 0; $i < $faces_count; $i++) {
$face1 = $faces[$i];
Expand All @@ -304,8 +306,14 @@ private function getNewClusters(array $faces): array {
}
}
}

// Given the edges get the list of labels (found clusters) for each face.
$newChineseClustersByIndex = dlib_chinese_whispers($edges);
} else {
// Create edges (neighbors) for Chinese Whispers
$edges = array();
$faces_count = count($faces);

for ($i = 0; $i < $faces_count; $i++) {
$face1 = $faces[$i];
if (!isset($face1->descriptor)) {
Expand All @@ -323,17 +331,27 @@ private function getNewClusters(array $faces): array {
}
}
}

// The clustering algorithm actually expects ordered lists.
$oedges = [];
ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
usort($oedges, function($a, $b) {
if ($a[0] === $b[0]) return $a[1] - $b[1];
return $a[0] - $b[0];
});

// Given the edges get the list of labels (found clusters) for each face.
$newChineseClustersByIndex = [];
ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
}

$newChineseClustersByIndex = dlib_chinese_whispers($edges);
$newClusters = array();
for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
$newClusters[$newChineseClustersByIndex[$i]] = array();
}
$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
}

return $newClusters;
}

Expand Down
159 changes: 159 additions & 0 deletions lib/Clusterer/ChineseWhispers.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023, Matias De lellis
*
* @author Matias De lellis <mati86dl@gmail.com>
*
* @license AGPL-3.0-or-later
*
* This code is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License, version 3,
* along with this program. If not, see <http://www.gnu.org/licenses/>
*
*/

namespace OCA\FaceRecognition\Clusterer;


/**
* This class implements the graph clustering algorithm described in the
* paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its
* Application to Natural Language Processing Problems by Chris Biemann.
*
* In particular, it tries to be a shameless copy of the original dlib
* implementation.
* - https://github.com/davisking/dlib/blob/master/dlib/clustering/chinese_whispers.h
*/
class ChineseWhispers {

/**
* Cluster the dataset by assigning a label to each sample.from the edges
*/
static public function predict(array &$edges, array &$labels, int $num_iterations = 100)
{
// To improve the stability of the clusters, we must
// iterate the neighbors in a pseudo-random way.
mt_srand(2023);

$labels = [];
if (count($edges) == 0)
return 0;

$neighbors = [];
self::find_neighbor_ranges($edges, $neighbors);

// Initialize the labels, each node gets a different label.
for ($i = 0; $i < count($neighbors); ++$i)
$labels[$i] = $i;

for ($iter = 0; $iter < count($neighbors)*$num_iterations; ++$iter)
{
// Pick a random node.
$idx = mt_rand()%count($neighbors);

// Count how many times each label happens amongst our neighbors.
$labels_to_counts = [];
$end = $neighbors[$idx][1];

for ($i = $neighbors[$idx][0]; $i != $end; ++$i)
{
$iLabelFirst = $edges[$i][1];
$iLabel = $labels[$iLabelFirst];
if (isset($labels_to_counts[$iLabel]))
$labels_to_counts[$iLabel]++;
else
$labels_to_counts[$iLabel] = 1;
}

// find the most common label
// std::map<unsigned long, double>::iterator i;
$best_score = PHP_INT_MIN;
$best_label = $labels[$idx];
foreach ($labels_to_counts as $key => $value)
{
if ($value > $best_score)
{
$best_score = $value;
$best_label = $key;
}
}

$labels[$idx] = $best_label;
}

// Remap the labels into a contiguous range. First we find the
// mapping.
$label_remap = [];
for ($i = 0; $i < count($labels); ++$i)
{
$next_id = count($label_remap);
if (!isset($label_remap[$labels[$i]]))
$label_remap[$labels[$i]] = $next_id;
}
// now apply the mapping to all the labels.
for ($i = 0; $i < count($labels); ++$i)
{
$labels[$i] = $label_remap[$labels[$i]];
}

return count($label_remap);
}

static function find_neighbor_ranges (&$edges, &$neighbors) {
// setup neighbors so that [neighbors[i].first, neighbors[i].second) is the range
// within edges that contains all node i's edges.
$num_nodes = self::max_index_plus_one($edges);
for ($i = 0; $i < $num_nodes; ++$i) $neighbors[$i] = [0, 0];
$cur_node = 0;
$start_idx = 0;
for ($i = 0; $i < count($edges); ++$i)
{
if ($edges[$i][0] != $cur_node)
{
$neighbors[$cur_node] = [$start_idx, $i];
$start_idx = $i;
$cur_node = $edges[$i][0];
}
}
if (count($neighbors) !== 0)
$neighbors[$cur_node] = [$start_idx, count($edges)];
}

static function max_index_plus_one ($pairs): int {
if (count($pairs) === 0)
{
return 0;
}
else {
$max_idx = 0;
for ($i = 0; $i < count($pairs); ++$i)
{
if ($pairs[$i][0] > $max_idx)
$max_idx = $pairs[$i][0];
if ($pairs[$i][1] > $max_idx)
$max_idx = $pairs[$i][1];
}
return $max_idx + 1;
}
}

static function convert_unordered_to_ordered (&$edges, &$out_edges)
{
$out_edges = [];
for ($i = 0; $i < count($edges); ++$i)
{
$out_edges[] = [$edges[$i][0], $edges[$i][1]];
if ($edges[$i][0] != $edges[$i][1])
$out_edges[] = [$edges[$i][1], $edges[$i][0]];
}
}
}
4 changes: 0 additions & 4 deletions lib/Model/ExternalModel/ExternalModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ public function isInstalled(): bool {
}

public function meetDependencies(string &$error_message): bool {
if (!extension_loaded('pdlib')) {
$error_message = "The PDlib PHP extension is not loaded.";
return false;
}
if (is_null($this->settingsService->getExternalModelUrl())) {
$error_message = "You still need to configure the URL of the service running the model.";
return false;
Expand Down