diff --git a/.github/workflows/cluster-faces-test.yml b/.github/workflows/cluster-faces-test.yml index e07a43712..f54e67c6b 100644 --- a/.github/workflows/cluster-faces-test.yml +++ b/.github/workflows/cluster-faces-test.yml @@ -25,7 +25,7 @@ jobs: # do not stop on another job's failure fail-fast: false matrix: - php-versions: ['8.2'] + php-versions: ['8.3'] databases: ['sqlite'] server-versions: ['master'] pure-js-mode: ['false'] diff --git a/.github/workflows/files-scan-test.yml b/.github/workflows/files-scan-test.yml index cfae8dd55..6f3ce749b 100644 --- a/.github/workflows/files-scan-test.yml +++ b/.github/workflows/files-scan-test.yml @@ -23,7 +23,7 @@ jobs: # do not stop on another job's failure fail-fast: false matrix: - php-versions: ['8.2'] + php-versions: ['8.3'] databases: ['sqlite', 'mysql', 'pgsql'] server-versions: ['master'] diff --git a/.github/workflows/full-run-test.yml b/.github/workflows/full-run-test.yml index 6aca00ff0..7df6a2901 100644 --- a/.github/workflows/full-run-test.yml +++ b/.github/workflows/full-run-test.yml @@ -28,7 +28,7 @@ jobs: # do not stop on another job's failure fail-fast: false matrix: - php-versions: ['8.2'] + php-versions: ['8.3'] databases: ['sqlite'] server-versions: ['master'] pure-js-mode: ['false'] @@ -41,7 +41,7 @@ jobs: # test pure-js once - server-versions: master databases: sqlite - php-versions: 8.2 + php-versions: 8.3 pure-js-mode: true imagenet-enabled: true faces-enabled: true diff --git a/composer.json b/composer.json index a9d120a81..7cd269560 100644 --- a/composer.json +++ b/composer.json @@ -42,6 +42,7 @@ "php": "8.2.0" }, "allow-plugins": { + "bamarni/composer-bin-plugin": true, "composer/package-versions-deprecated": true }, "autoloader-suffix": "Recognize", diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index ff23f55e6..e4be8f757 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -11,6 +11,11 @@ use OCA\DAV\Events\SabrePluginAddEvent; use OCA\Recognize\Dav\Faces\PropFindPlugin; use OCA\Recognize\Hooks\FileListener; +use OCA\Recognize\TaskProcessing\AudioClassificationTaskType; +use OCA\Recognize\TaskProcessing\ImageClassificationTaskType; +use OCA\Recognize\TaskProcessing\ImageFaceRecognitionTaskType; +use OCA\Recognize\TaskProcessing\TaskResultListener; +use OCA\Recognize\TaskProcessing\VideoClassificationTaskType; use OCP\AppFramework\App; use OCP\AppFramework\Bootstrap\IBootContext; use OCP\AppFramework\Bootstrap\IBootstrap; @@ -23,6 +28,8 @@ use OCP\Files\Events\Node\NodeDeletedEvent; use OCP\Files\Events\Node\NodeRenamedEvent; use OCP\Files\Events\NodeRemovedFromCache; +use OCP\TaskProcessing\Events\TaskFailedEvent; +use OCP\TaskProcessing\Events\TaskSuccessfulEvent; final class Application extends App implements IBootstrap { public const APP_ID = 'recognize'; @@ -44,6 +51,9 @@ public function __construct() { $dispatcher->addServiceListener('OCP\Files\Config\Event\UserMountRemovedEvent', FileListener::class); // it is not fired as of now, Added and Removed events are fired instead in that order // $context->addServiceListener('OCP\Files\Config\Event\UserMountUpdatedEvent', FileListener::class); + + $dispatcher->addServiceListener(TaskSuccessfulEvent::class, TaskResultListener::class); + $dispatcher->addServiceListener(TaskFailedEvent::class, TaskResultListener::class); } public function register(IRegistrationContext $context): void { @@ -53,6 +63,11 @@ public function register(IRegistrationContext $context): void { /** Register $principalBackend for the DAV collection */ $context->registerServiceAlias('principalBackend', Principal::class); + + $context->registerTaskProcessingTaskType(ImageClassificationTaskType::class); + $context->registerTaskProcessingTaskType(VideoClassificationTaskType::class); + $context->registerTaskProcessingTaskType(AudioClassificationTaskType::class); + $context->registerTaskProcessingTaskType(ImageFaceRecognitionTaskType::class); } /** diff --git a/lib/BackgroundJobs/ClassifierJob.php b/lib/BackgroundJobs/ClassifierJob.php index 3551b9f12..94adfe540 100644 --- a/lib/BackgroundJobs/ClassifierJob.php +++ b/lib/BackgroundJobs/ClassifierJob.php @@ -29,7 +29,7 @@ public function __construct( private SettingsService $settingsService, ) { parent::__construct($time); - $this->setInterval(60 * 5); + $this->setInterval(60); $this->setTimeSensitivity(self::TIME_INSENSITIVE); $this->setAllowParallelRuns($settingsService->getSetting('concurrency.enabled') === 'true'); } @@ -38,10 +38,13 @@ public function __construct( * @param array{storageId: int, rootId: int} $argument */ protected function runClassifier(string $model, array $argument): void { - sleep(10); - if ($this->settingsService->getSetting('concurrency.enabled') !== 'true' && $this->anyOtherClassifierJobsRunning()) { - $this->logger->debug('Stalling job '.static::class.' with argument ' . var_export($argument, true) . ' because other classifiers are already reserved'); - return; + $taskProcessingMode = $this->settingsService->getSetting('taskprocessing.enabled') === 'true'; + if (!$taskProcessingMode) { + sleep(10); + if ($this->settingsService->getSetting('concurrency.enabled') !== 'true' && $this->anyOtherClassifierJobsRunning()) { + $this->logger->debug('Stalling job '.static::class.' with argument ' . var_export($argument, true) . ' because other classifiers are already reserved'); + return; + } } $storageId = $argument['storageId']; @@ -53,9 +56,10 @@ protected function runClassifier(string $model, array $argument): void { return; } $this->logger->debug('Classifying files of storage '.$storageId. ' using '.$model); + $batchSize = $taskProcessingMode ? 500 : $this->getBatchSize(); try { - $this->logger->debug('fetching '.$this->getBatchSize().' files from '.$model.' queue'); - $files = $this->queue->getFromQueue($model, $storageId, $rootId, $this->getBatchSize()); + $this->logger->debug('fetching '.$batchSize.' files from '.$model.' queue'); + $files = $this->queue->getFromQueue($model, $storageId, $rootId, $batchSize); } catch (Exception $e) { $this->settingsService->setSetting($model.'.status', 'false'); $this->logger->error('Cannot retrieve items from '.$model.' queue', ['exception' => $e]); diff --git a/lib/BackgroundJobs/ClassifyFacesJob.php b/lib/BackgroundJobs/ClassifyFacesJob.php index 7684df78f..bc7b168ce 100644 --- a/lib/BackgroundJobs/ClassifyFacesJob.php +++ b/lib/BackgroundJobs/ClassifyFacesJob.php @@ -8,6 +8,7 @@ namespace OCA\Recognize\BackgroundJobs; use OCA\Recognize\Classifiers\Images\ClusteringFaceClassifier; +use OCA\Recognize\Classifiers\TaskProcessing\ImageFaceRecognitionClassifier as TaskProcessingFaceClassifier; use OCA\Recognize\Service\Logger; use OCA\Recognize\Service\QueueService; use OCA\Recognize\Service\SettingsService; @@ -20,11 +21,13 @@ final class ClassifyFacesJob extends ClassifierJob { private SettingsService $settingsService; private ClusteringFaceClassifier $faces; + private TaskProcessingFaceClassifier $tpFaces; - public function __construct(ITimeFactory $time, Logger $logger, QueueService $queue, SettingsService $settingsService, ClusteringFaceClassifier $faceClassifier, IUserMountCache $mountCache, IJobList $jobList) { + public function __construct(ITimeFactory $time, Logger $logger, QueueService $queue, SettingsService $settingsService, ClusteringFaceClassifier $faceClassifier, TaskProcessingFaceClassifier $tpFaces, IUserMountCache $mountCache, IJobList $jobList) { parent::__construct($time, $logger, $queue, $mountCache, $jobList, $settingsService); $this->settingsService = $settingsService; $this->faces = $faceClassifier; + $this->tpFaces = $tpFaces; } /** @@ -39,6 +42,10 @@ protected function run($argument): void { * @return void */ protected function classify(array $files) : void { + if ($this->settingsService->getSetting('taskprocessing.enabled') === 'true') { + $this->tpFaces->classify($files); + return; + } $this->faces->classify($files); } diff --git a/lib/BackgroundJobs/ClassifyImagenetJob.php b/lib/BackgroundJobs/ClassifyImagenetJob.php index a3d6b2914..a9b84ec41 100644 --- a/lib/BackgroundJobs/ClassifyImagenetJob.php +++ b/lib/BackgroundJobs/ClassifyImagenetJob.php @@ -8,6 +8,7 @@ namespace OCA\Recognize\BackgroundJobs; use OCA\Recognize\Classifiers\Images\ImagenetClassifier; +use OCA\Recognize\Classifiers\TaskProcessing\ImageClassifier as TaskProcessingImageClassifier; use OCA\Recognize\Service\Logger; use OCA\Recognize\Service\QueueService; use OCA\Recognize\Service\SettingsService; @@ -20,11 +21,13 @@ final class ClassifyImagenetJob extends ClassifierJob { private SettingsService $settingsService; private ImagenetClassifier $imagenet; + private TaskProcessingImageClassifier $tpImage; - public function __construct(ITimeFactory $time, Logger $logger, QueueService $queue, SettingsService $settingsService, ImagenetClassifier $imagenet, IUserMountCache $mountCache, IJobList $jobList) { + public function __construct(ITimeFactory $time, Logger $logger, QueueService $queue, SettingsService $settingsService, ImagenetClassifier $imagenet, TaskProcessingImageClassifier $tpImage, IUserMountCache $mountCache, IJobList $jobList) { parent::__construct($time, $logger, $queue, $mountCache, $jobList, $settingsService); $this->settingsService = $settingsService; $this->imagenet = $imagenet; + $this->tpImage = $tpImage; } /** @@ -39,6 +42,10 @@ protected function run($argument): void { * @return void */ protected function classify(array $files) : void { + if ($this->settingsService->getSetting('taskprocessing.enabled') === 'true') { + $this->tpImage->classify($files); + return; + } $this->imagenet->classify($files); } diff --git a/lib/BackgroundJobs/ClassifyMovinetJob.php b/lib/BackgroundJobs/ClassifyMovinetJob.php index 5c60f6574..f9d0bfee5 100644 --- a/lib/BackgroundJobs/ClassifyMovinetJob.php +++ b/lib/BackgroundJobs/ClassifyMovinetJob.php @@ -7,6 +7,7 @@ declare(strict_types=1); namespace OCA\Recognize\BackgroundJobs; +use OCA\Recognize\Classifiers\TaskProcessing\VideoClassifier as TaskProcessingVideoClassifier; use OCA\Recognize\Classifiers\Video\MovinetClassifier; use OCA\Recognize\Service\Logger; use OCA\Recognize\Service\QueueService; @@ -20,11 +21,13 @@ final class ClassifyMovinetJob extends ClassifierJob { private SettingsService $settingsService; private MovinetClassifier $movinet; + private TaskProcessingVideoClassifier $tpVideo; - public function __construct(ITimeFactory $time, Logger $logger, QueueService $queue, SettingsService $settingsService, MovinetClassifier $movinet, IUserMountCache $mountCache, IJobList $jobList) { + public function __construct(ITimeFactory $time, Logger $logger, QueueService $queue, SettingsService $settingsService, MovinetClassifier $movinet, TaskProcessingVideoClassifier $tpVideo, IUserMountCache $mountCache, IJobList $jobList) { parent::__construct($time, $logger, $queue, $mountCache, $jobList, $settingsService); $this->settingsService = $settingsService; $this->movinet = $movinet; + $this->tpVideo = $tpVideo; } /** @@ -40,6 +43,10 @@ protected function run($argument): void { * @throws \OCA\Recognize\Exception\Exception */ protected function classify(array $files) : void { + if ($this->settingsService->getSetting('taskprocessing.enabled') === 'true') { + $this->tpVideo->classify($files); + return; + } $this->movinet->classify($files); } diff --git a/lib/BackgroundJobs/ClassifyMusicnnJob.php b/lib/BackgroundJobs/ClassifyMusicnnJob.php index 73a00c4a4..2522d96e9 100644 --- a/lib/BackgroundJobs/ClassifyMusicnnJob.php +++ b/lib/BackgroundJobs/ClassifyMusicnnJob.php @@ -8,6 +8,7 @@ namespace OCA\Recognize\BackgroundJobs; use OCA\Recognize\Classifiers\Audio\MusicnnClassifier; +use OCA\Recognize\Classifiers\TaskProcessing\AudioClassifier as TaskProcessingAudioClassifier; use OCA\Recognize\Service\Logger; use OCA\Recognize\Service\QueueService; use OCA\Recognize\Service\SettingsService; @@ -20,11 +21,13 @@ final class ClassifyMusicnnJob extends ClassifierJob { private SettingsService $settingsService; private MusicnnClassifier $musicnn; + private TaskProcessingAudioClassifier $tpAudio; - public function __construct(ITimeFactory $time, Logger $logger, QueueService $queue, SettingsService $settingsService, MusicnnClassifier $musicnn, IUserMountCache $mountCache, IJobList $jobList) { + public function __construct(ITimeFactory $time, Logger $logger, QueueService $queue, SettingsService $settingsService, MusicnnClassifier $musicnn, TaskProcessingAudioClassifier $tpAudio, IUserMountCache $mountCache, IJobList $jobList) { parent::__construct($time, $logger, $queue, $mountCache, $jobList, $settingsService); $this->settingsService = $settingsService; $this->musicnn = $musicnn; + $this->tpAudio = $tpAudio; } /** @@ -39,6 +42,10 @@ protected function run($argument): void { * @return void */ protected function classify(array $files) : void { + if ($this->settingsService->getSetting('taskprocessing.enabled') === 'true') { + $this->tpAudio->classify($files); + return; + } $this->musicnn->classify($files); } diff --git a/lib/Classifiers/AbstractTaskProcessingClassifier.php b/lib/Classifiers/AbstractTaskProcessingClassifier.php new file mode 100644 index 000000000..c367c62d3 --- /dev/null +++ b/lib/Classifiers/AbstractTaskProcessingClassifier.php @@ -0,0 +1,114 @@ + $queueFiles + */ + public function classify(array $queueFiles): void { + if (count($queueFiles) === 0) { + return; + } + + $storageId = $queueFiles[0]->getStorageId(); + $rootId = $queueFiles[0]->getRootId(); + $userId = $this->findUserForStorage($storageId, $rootId); + if ($userId === null) { + $this->logger->warning('No user with access for storage ' . $storageId . '/' . $rootId . ' found; dropping ' . count($queueFiles) . ' files from ' . $this->getModelName() . ' queue'); + $this->dropFromQueue($queueFiles); + return; + } + + $fileIds = array_values(array_unique(array_map(static fn (QueueFile $qf): int => $qf->getFileId(), $queueFiles))); + + $task = new Task( + $this->getTaskTypeId(), + ['input' => $fileIds], + Application::APP_ID, + $userId, + $this->getModelName(), + ); + + try { + $this->taskProcessingManager->scheduleTask($task); + } catch (\Throwable $e) { + // Leave files in the queue so they can be retried on the next job run + $this->logger->error('Failed to schedule ' . $this->getTaskTypeId() . ' task', ['exception' => $e]); + throw new \RuntimeException('Could not schedule ' . $this->getTaskTypeId() . ' task', 0, $e); + } + + /** + * @psalm-suppress PossiblyNullOperand + * @psalm-suppress InvalidOperand + */ + $this->logger->debug('Scheduled ' . $this->getTaskTypeId() . ' task #' . $task->getId() . ' for ' . count($fileIds) . ' files'); + + // Once scheduled, files leave the queue. The TaskResultListener applies results when the task completes. + $this->dropFromQueue($queueFiles); + } + + private function findUserForStorage(int $storageId, int $rootId): ?string { + $mounts = array_values(array_filter( + $this->userMountCache->getMountsForStorageId($storageId), + static fn (ICachedMountInfo $m): bool => $m->getRootId() === $rootId, + )); + if (count($mounts) === 0) { + return null; + } + return $mounts[0]->getUser()->getUID(); + } + + /** + * @param list $queueFiles + */ + private function dropFromQueue(array $queueFiles): void { + foreach ($queueFiles as $qf) { + try { + $this->queue->removeFromQueue($this->getModelName(), $qf); + } catch (Exception $e) { + $this->logger->warning('Could not remove file ' . $qf->getFileId() . ' from ' . $this->getModelName() . ' queue', ['exception' => $e]); + } + } + } +} diff --git a/lib/Classifiers/TaskProcessing/AudioClassifier.php b/lib/Classifiers/TaskProcessing/AudioClassifier.php new file mode 100644 index 000000000..71c53a0c5 --- /dev/null +++ b/lib/Classifiers/TaskProcessing/AudioClassifier.php @@ -0,0 +1,24 @@ +setName('recognize:classify') - ->setDescription('Classify all files with the current settings in one go (will likely take a long time)') + ->setDescription('Classify all files with the current settings in one go on the terminal (will likely take a long time; doesn\'t work with TaskProcessing mode)') ->addOption('retry', null, InputOption::VALUE_NONE, "Only classify untagged images"); } diff --git a/lib/Command/Recrawl.php b/lib/Command/Recrawl.php index 01990d888..1790972d4 100644 --- a/lib/Command/Recrawl.php +++ b/lib/Command/Recrawl.php @@ -41,7 +41,7 @@ public function __construct(IJobList $jobList, LoggerInterface $logger, QueueSer */ protected function configure() { $this->setName('recognize:recrawl') - ->setDescription('Go through all files again'); + ->setDescription('Trigger a full classification run in the background'); } /** diff --git a/lib/Db/FaceDetectionMapper.php b/lib/Db/FaceDetectionMapper.php index 939205429..75e93b64d 100644 --- a/lib/Db/FaceDetectionMapper.php +++ b/lib/Db/FaceDetectionMapper.php @@ -7,7 +7,9 @@ declare(strict_types=1); namespace OCA\Recognize\Db; +use OCA\Recognize\Classifiers\TaskProcessing\ImageFaceRecognitionClassifier; use OCA\Recognize\Service\FaceClusterAnalyzer; +use OCA\Recognize\Service\SettingsService; use OCP\AppFramework\Db\DoesNotExistException; use OCP\AppFramework\Db\Entity; use OCP\AppFramework\Db\QBMapper; @@ -21,11 +23,20 @@ */ final class FaceDetectionMapper extends QBMapper { private IConfig $config; + private SettingsService $settingsService; - public function __construct(IDBConnection $db, IConfig $config) { + public function __construct(IDBConnection $db, IConfig $config, SettingsService $settingsService) { parent::__construct($db, 'recognize_face_detections', FaceDetection::class); $this->db = $db; $this->config = $config; + $this->settingsService = $settingsService; + } + + private function getMinDetectionSize(): float { + if ($this->settingsService->getSetting('taskprocessing.enabled') === 'true') { + return ImageFaceRecognitionClassifier::MIN_DETECTION_SIZE; + } + return FaceClusterAnalyzer::MIN_DETECTION_SIZE; } /** @@ -329,12 +340,13 @@ public function findDetectionForPreviewImageByClusterId(int $clusterId) : FaceDe } public function countUnclustered(): int { + $minDetectionSize = $this->getMinDetectionSize(); $qb = $this->db->getQueryBuilder(); $qb->select($qb->func()->count('id')) ->from('recognize_face_detections') ->where($qb->expr()->isNull('cluster_id')) - ->andWhere($qb->expr()->gte('height', $qb->createPositionalParameter(FaceClusterAnalyzer::MIN_DETECTION_SIZE))) - ->andWhere($qb->expr()->gte('width', $qb->createPositionalParameter(FaceClusterAnalyzer::MIN_DETECTION_SIZE))); + ->andWhere($qb->expr()->gte('height', $qb->createPositionalParameter($minDetectionSize))) + ->andWhere($qb->expr()->gte('width', $qb->createPositionalParameter($minDetectionSize))); $result = $qb->executeQuery(); /** @var int|string $count */ $count = $result->fetch(\PDO::FETCH_COLUMN); @@ -347,12 +359,13 @@ public function countUnclustered(): int { * @throws \OCP\DB\Exception */ public function getUsersForUnclustered(): array { + $minDetectionSize = $this->getMinDetectionSize(); $qb = $this->db->getQueryBuilder(); $qb->selectDistinct('user_id') ->from('recognize_face_detections') ->where($qb->expr()->isNull('cluster_id')) - ->andWhere($qb->expr()->gte('height', $qb->createPositionalParameter(FaceClusterAnalyzer::MIN_DETECTION_SIZE))) - ->andWhere($qb->expr()->gte('width', $qb->createPositionalParameter(FaceClusterAnalyzer::MIN_DETECTION_SIZE))); + ->andWhere($qb->expr()->gte('height', $qb->createPositionalParameter($minDetectionSize))) + ->andWhere($qb->expr()->gte('width', $qb->createPositionalParameter($minDetectionSize))); $result = $qb->executeQuery(); /** @var array $users */ $users = $result->fetchAll(\PDO::FETCH_COLUMN); diff --git a/lib/Service/FaceClusterAnalyzer.php b/lib/Service/FaceClusterAnalyzer.php index 5c8e01052..9d1b90736 100644 --- a/lib/Service/FaceClusterAnalyzer.php +++ b/lib/Service/FaceClusterAnalyzer.php @@ -9,6 +9,7 @@ use \OCA\Recognize\Vendor\Rubix\ML\Datasets\Labeled; use \OCA\Recognize\Vendor\Rubix\ML\Kernels\Distance\Euclidean; +use OCA\Recognize\Classifiers\TaskProcessing\ImageFaceRecognitionClassifier; use OCA\Recognize\Clustering\HDBSCAN; use OCA\Recognize\Db\FaceCluster; use OCA\Recognize\Db\FaceClusterMapper; @@ -27,7 +28,12 @@ final class FaceClusterAnalyzer { private FaceDetectionMapper $faceDetections; private FaceClusterMapper $faceClusters; private Logger $logger; - private int $minDatasetSize = self::MIN_DATASET_SIZE; + private int $minDatasetSize; + private float $minDetectionSize; + private float $minClusterSeparation; + private float $maxClusterEdgeLength; + private float $maxOverlapNewCluster; + private float $minOverlapExistingCluster; private SettingsService $settingsService; public function __construct(FaceDetectionMapper $faceDetections, FaceClusterMapper $faceClusters, Logger $logger, SettingsService $settingsService) { @@ -35,6 +41,22 @@ public function __construct(FaceDetectionMapper $faceDetections, FaceClusterMapp $this->faceClusters = $faceClusters; $this->logger = $logger; $this->settingsService = $settingsService; + + if ($this->settingsService->getSetting('taskprocessing.enabled') === 'true') { + $this->minDatasetSize = ImageFaceRecognitionClassifier::MIN_DATASET_SIZE; + $this->minDetectionSize = ImageFaceRecognitionClassifier::MIN_DETECTION_SIZE; + $this->minClusterSeparation = ImageFaceRecognitionClassifier::MIN_CLUSTER_SEPARATION; + $this->maxClusterEdgeLength = ImageFaceRecognitionClassifier::MAX_CLUSTER_EDGE_LENGTH; + $this->maxOverlapNewCluster = ImageFaceRecognitionClassifier::MAX_OVERLAP_NEW_CLUSTER; + $this->minOverlapExistingCluster = ImageFaceRecognitionClassifier::MIN_OVERLAP_EXISTING_CLUSTER; + } else { + $this->minDatasetSize = self::MIN_DATASET_SIZE; + $this->minDetectionSize = self::MIN_DETECTION_SIZE; + $this->minClusterSeparation = self::MIN_CLUSTER_SEPARATION; + $this->maxClusterEdgeLength = self::MAX_CLUSTER_EDGE_LENGTH; + $this->maxOverlapNewCluster = self::MAX_OVERLAP_NEW_CLUSTER; + $this->minOverlapExistingCluster = self::MIN_OVERLAP_EXISTING_CLUSTER; + } } public function setMinDatasetSize(int $minSize) : void { @@ -64,12 +86,12 @@ public function calculateClusters(string $userId, int $batchSize = 0): void { } if ($batchSize > 0) { - $rejectedDetections = $this->faceDetections->sampleRejectedDetectionsByUserId($userId, $this->getRejectSampleSize($batchSize), self::MIN_DETECTION_SIZE, self::MIN_DETECTION_SIZE); + $rejectedDetections = $this->faceDetections->sampleRejectedDetectionsByUserId($userId, $this->getRejectSampleSize($batchSize), $this->minDetectionSize, $this->minDetectionSize); $requestedFreshDetectionCount = max($batchSize - count($rejectedDetections) - count($sampledDetections), 500); - $freshDetections = $this->faceDetections->findUnclusteredByUserId($userId, $requestedFreshDetectionCount, self::MIN_DETECTION_SIZE, self::MIN_DETECTION_SIZE); + $freshDetections = $this->faceDetections->findUnclusteredByUserId($userId, $requestedFreshDetectionCount, $this->minDetectionSize, $this->minDetectionSize); } else { - $freshDetections = $this->faceDetections->findUnclusteredByUserId($userId, 0, self::MIN_DETECTION_SIZE, self::MIN_DETECTION_SIZE); - $rejectedDetections = $this->faceDetections->sampleRejectedDetectionsByUserId($userId, $this->getRejectSampleSize(count($freshDetections)), self::MIN_DETECTION_SIZE, self::MIN_DETECTION_SIZE); + $freshDetections = $this->faceDetections->findUnclusteredByUserId($userId, 0, $this->minDetectionSize, $this->minDetectionSize); + $rejectedDetections = $this->faceDetections->sampleRejectedDetectionsByUserId($userId, $this->getRejectSampleSize(count($freshDetections)), $this->minDetectionSize, $this->minDetectionSize); } @@ -94,7 +116,7 @@ public function calculateClusters(string $userId, int $batchSize = 0): void { $hdbscan = new HDBSCAN($dataset, $this->getMinClusterSize($n), $this->getMinSampleSize($n)); $numberOfClusteredDetections = 0; - $clusters = $hdbscan->predict(self::MIN_CLUSTER_SEPARATION, self::MAX_CLUSTER_EDGE_LENGTH); + $clusters = $hdbscan->predict($this->minClusterSeparation, $this->maxClusterEdgeLength); foreach ($clusters as $flatCluster) { /** @var int[] $detectionKeys */ @@ -132,10 +154,10 @@ public function calculateClusters(string $userId, int $batchSize = 0): void { } // If more than X% of already clustered detections are for this, we keep it - if ($overlap > self::MIN_OVERLAP_EXISTING_CLUSTER) { + if ($overlap > $this->minOverlapExistingCluster) { $clusterId = $oldClusterId; $cluster = $this->faceClusters->find($clusterId); - } elseif ($overlap < self::MAX_OVERLAP_NEW_CLUSTER) { + } elseif ($overlap < $this->maxOverlapNewCluster) { // otherwise we create a new cluster $cluster = new FaceCluster(); @@ -187,17 +209,21 @@ public function calculateClusters(string $userId, int $batchSize = 0): void { * @return list */ public static function calculateCentroidOfDetections(array $detections): array { - // init 128 dimensional vector - /** @var list $sum */ - $sum = []; - for ($i = 0; $i < self::DIMENSIONS; $i++) { - $sum[] = 0.0; - } - if (count($detections) === 0) { - return $sum; + /** @var list $empty */ + $empty = []; + for ($i = 0; $i < self::DIMENSIONS; $i++) { + $empty[] = 0.0; + } + return $empty; } + // Size the accumulator from the first detection so both 128-dim (legacy) and + // 512-dim (buffalo_l/taskprocessing) embeddings work without a runtime switch. + $dimensions = count(reset($detections)->getVector()); + /** @var list $sum */ + $sum = array_fill(0, $dimensions, 0.0); + foreach ($detections as $detection) { $sum = array_map(static function (float $el, float $el2): float { return $el + $el2; diff --git a/lib/Service/SettingsService.php b/lib/Service/SettingsService.php index 93c6eac2f..8ea9d217b 100644 --- a/lib/Service/SettingsService.php +++ b/lib/Service/SettingsService.php @@ -18,8 +18,15 @@ use OCA\Recognize\Exception\Exception; use OCP\AppFramework\Services\IAppConfig; use OCP\BackgroundJob\IJobList; +use OCP\Server; final class SettingsService { + /** + * App id of the ExApp that provides TaskProcessing classifiers; when installed + * and enabled, taskprocessing mode is on by default. + */ + public const RECOGNIZE_BACKEND_APP_ID = 'recognize_backend'; + /** @var array */ private const DEFAULTS = [ 'tensorflow.cores' => '0', @@ -31,6 +38,7 @@ final class SettingsService { 'faces.enabled' => 'false', 'musicnn.enabled' => 'false', 'movinet.enabled' => 'false', + 'taskprocessing.enabled' => '', 'node_binary' => '', 'clusterFaces.status' => 'null', 'faces.status' => 'null', @@ -105,6 +113,10 @@ public function getSetting(string $key): string { if (strpos($key, 'batchSize') !== false) { return $this->config->getAppValueString($key, $this->getSetting('tensorflow.purejs') === 'false' ? self::DEFAULTS[$key] : self::PUREJS_DEFAULTS[$key]); } + if ($key === 'taskprocessing.enabled') { + $default = $this->isRecognizeBackendInstalled() ? 'true' : 'false'; + return $this->config->getAppValueString($key, $default); + } $lazy = false; if (in_array($key, self::LAZY_SETTINGS, true)) { $lazy = true; @@ -112,6 +124,28 @@ public function getSetting(string $key): string { return $this->config->getAppValueString($key, self::DEFAULTS[$key], lazy: $lazy); } + /** + * Whether the recognize_backend ExApp is installed and enabled. The lookup + * goes through app_api's PublicFunctions service so we don't impose a hard + * dependency on app_api: if it isn't installed, this returns false. + */ + public function isRecognizeBackendInstalled(): bool { + try { + /** + * @var \OCA\AppAPI\PublicFunctions $publicFunctions + */ + $publicFunctions = Server::get(\OCA\AppAPI\PublicFunctions::class); + } catch (\Throwable $e) { + return false; + } + try { + $exApp = $publicFunctions->getExApp(self::RECOGNIZE_BACKEND_APP_ID); + } catch (\Throwable $e) { + return false; + } + return $exApp !== null && (bool)($exApp['enabled'] ?? false); + } + /** * @param string $key * @param string $value diff --git a/lib/Settings/AdminSettings.php b/lib/Settings/AdminSettings.php index 83f8a7657..31f133706 100644 --- a/lib/Settings/AdminSettings.php +++ b/lib/Settings/AdminSettings.php @@ -35,6 +35,8 @@ public function getForm(): TemplateResponse { $tagsEnabled = $this->appManager->isEnabledForAnyone('systemtags'); $this->initialState->provideInitialState('tagsEnabled', $tagsEnabled); + $this->initialState->provideInitialState('recognizeBackendInstalled', $this->settingsService->isRecognizeBackendInstalled()); + return new TemplateResponse('recognize', 'admin'); } diff --git a/lib/TaskProcessing/AudioClassificationTaskType.php b/lib/TaskProcessing/AudioClassificationTaskType.php new file mode 100644 index 000000000..8d348c920 --- /dev/null +++ b/lib/TaskProcessing/AudioClassificationTaskType.php @@ -0,0 +1,72 @@ +l->t('Audio classification'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Classify audios into categories.'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'input' => new ShapeDescriptor( + $this->l->t('Audios'), + $this->l->t('Provide audios to classify'), + EShapeType::ListOfAudios, + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'output' => new ShapeDescriptor( + $this->l->t('Categories'), + $this->l->t('The classified categories. Each input audio is mapped to a text containing a comma separated list of categories.'), + EShapeType::ListOfTexts, + ), + ]; + } +} diff --git a/lib/TaskProcessing/ImageClassificationTaskType.php b/lib/TaskProcessing/ImageClassificationTaskType.php new file mode 100644 index 000000000..83bc938b0 --- /dev/null +++ b/lib/TaskProcessing/ImageClassificationTaskType.php @@ -0,0 +1,72 @@ +l->t('Image classification'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Classify images into categories.'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'input' => new ShapeDescriptor( + $this->l->t('Images'), + $this->l->t('Provide images to classify'), + EShapeType::ListOfImages, + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'output' => new ShapeDescriptor( + $this->l->t('Categories'), + $this->l->t('The classified categories. Each input image is mapped to a text containing a comma separated list of categories.'), + EShapeType::ListOfTexts, + ), + ]; + } +} diff --git a/lib/TaskProcessing/ImageFaceRecognitionTaskType.php b/lib/TaskProcessing/ImageFaceRecognitionTaskType.php new file mode 100644 index 000000000..fefb2f129 --- /dev/null +++ b/lib/TaskProcessing/ImageFaceRecognitionTaskType.php @@ -0,0 +1,72 @@ +l->t('Image face recognition'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Recognize faces in images and return embedding vectors for each face.'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'input' => new ShapeDescriptor( + $this->l->t('Images'), + $this->l->t('Provide images to recognize faces in'), + EShapeType::ListOfImages, + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'output' => new ShapeDescriptor( + $this->l->t('Faces'), + $this->l->t('The detected faces. Each input image is mapped to a text containing JSON-encoded face descriptions ({x,y,width,height,score,vector,angle} ) separated by line breaks.'), + EShapeType::ListOfTexts, + ), + ]; + } +} diff --git a/lib/TaskProcessing/TaskResultListener.php b/lib/TaskProcessing/TaskResultListener.php new file mode 100644 index 000000000..bfcad9f6f --- /dev/null +++ b/lib/TaskProcessing/TaskResultListener.php @@ -0,0 +1,287 @@ + + */ +final class TaskResultListener implements IEventListener { + public function __construct( + private LoggerInterface $logger, + private TagManager $tagManager, + private FaceDetectionMapper $faceDetections, + private IUserMountCache $userMountCache, + private IAppConfig $config, + private IJobList $jobList, + private QueueService $queue, + private IUserSession $userSession, + private IUserManager $userManager, + ) { + } + + public function handle(Event $event): void { + if ($event instanceof TaskFailedEvent) { + $this->handleFailure($event); + return; + } + if ($event instanceof TaskSuccessfulEvent) { + $this->handleSuccess($event); + } + } + + private function handleFailure(TaskFailedEvent $event): void { + $task = $event->getTask(); + if (!$this->isOwnTask($task)) { + return; + } + $model = $this->modelForTaskType($task->getTaskTypeId()); + /** + * @psalm-suppress PossiblyNullOperand + * @psalm-suppress InvalidOperand + */ + $this->logger->warning('TaskProcessing task ' . $task->getTaskTypeId() . ' (id=' . $task->getId() . ') failed: ' . $event->getErrorMessage()); + if ($model !== null) { + $this->config->setAppValueString($model . '.status', 'false'); + } + } + + private function handleSuccess(TaskSuccessfulEvent $event): void { + $task = $event->getTask(); + if (!$this->isOwnTask($task)) { + return; + } + + $input = $task->getInput()['input'] ?? null; + $output = ($task->getOutput() ?? [])['output'] ?? null; + if (!is_array($input) || !is_array($output)) { + /** + * @psalm-suppress PossiblyNullOperand + * @psalm-suppress InvalidOperand + */ + $this->logger->warning('TaskProcessing task ' . $task->getTaskTypeId() . ' (id=' . $task->getId() . ') has unexpected input/output shape'); + return; + } + + /** @psalm-suppress RedundantFunctionCallGivenDocblockType */ + $fileIds = array_map('intval', array_values($input)); + /** @psalm-suppress RedundantFunctionCallGivenDocblockType */ + $results = array_values($output); + + $userId = $task->getUserId(); + if ($userId === null) { + /** + * @psalm-suppress PossiblyNullOperand + * @psalm-suppress InvalidOperand + */ + $this->logger->warning('TaskProcessing task ' . $task->getTaskTypeId() . ' (id=' . $task->getId() . ') has no user set, skipping this task'); + return; + } + $this->userSession->setUser($this->userManager->get($userId)); + + switch ($task->getTaskTypeId()) { + case ImageClassificationTaskType::ID: + $this->applyTagResults($fileIds, $results, ImagenetClassifier::MODEL_NAME, false); + break; + case VideoClassificationTaskType::ID: + $this->applyTagResults($fileIds, $results, MovinetClassifier::MODEL_NAME, false); + break; + case AudioClassificationTaskType::ID: + $this->applyTagResults($fileIds, $results, MusicnnClassifier::MODEL_NAME, false); + break; + case ImageFaceRecognitionTaskType::ID: + $this->applyFaceResults($fileIds, $results); + break; + default: + return; + } + } + + private function isOwnTask(Task $task): bool { + return $task->getAppId() === Application::APP_ID + && in_array($task->getTaskTypeId(), [ + ImageClassificationTaskType::ID, + VideoClassificationTaskType::ID, + AudioClassificationTaskType::ID, + ImageFaceRecognitionTaskType::ID, + ], true); + } + + private function modelForTaskType(string $taskTypeId): ?string { + return match ($taskTypeId) { + ImageClassificationTaskType::ID => ImagenetClassifier::MODEL_NAME, + VideoClassificationTaskType::ID => MovinetClassifier::MODEL_NAME, + AudioClassificationTaskType::ID => MusicnnClassifier::MODEL_NAME, + ImageFaceRecognitionTaskType::ID => ClusteringFaceClassifier::MODEL_NAME, + default => null, + }; + } + + /** + * @param list $fileIds + * @param list $results + */ + private function applyTagResults(array $fileIds, array $results, string $model, bool $forwardToLandmarks): void { + foreach ($fileIds as $i => $fileId) { + if (!isset($results[$i])) { + continue; + } + $raw = (string)$results[$i]; + $tags = array_values(array_filter(array_map('trim', explode(',', $raw)), static fn (string $t): bool => $t !== '')); + if (count($tags) === 0) { + $this->logger->debug('No tags returned for file ' . $fileId . ' from ' . $model); + continue; + } + try { + $this->tagManager->assignTags($fileId, $tags); + } catch (\Throwable $e) { + $this->logger->warning('Could not assign ' . $model . ' tags for file ' . $fileId, ['exception' => $e]); + continue; + } + $this->config->setAppValueString($model . '.status', 'true', lazy: true); + $this->config->setAppValueString($model . '.lastFile', (string)time(), lazy: true); + + if ($forwardToLandmarks) { + $landmarkTags = array_values(array_filter($tags, static fn (string $tag): bool => in_array($tag, LandmarksClassifier::PRECONDITION_TAGS, true))); + if (count($landmarkTags) > 0) { + $this->enqueueForLandmarks($fileId); + } + } + } + } + + /** + * @param list $fileIds + * @param list $results + */ + private function applyFaceResults(array $fileIds, array $results): void { + $model = ClusteringFaceClassifier::MODEL_NAME; + $scheduledClusterJobsFor = []; + foreach ($fileIds as $i => $fileId) { + if (!isset($results[$i])) { + continue; + } + $raw = (string)$results[$i]; + $userIds = $this->getUsersWithFileAccess($fileId); + foreach (explode("\n", $raw) as $line) { + $line = trim($line); + if ($line === '') { + continue; + } + try { + $face = json_decode($line, true, 512, JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + $this->logger->warning('Invalid face JSON for file ' . $fileId, ['exception' => $e]); + continue; + } + if (!is_array($face)) { + continue; + } + if (isset($face['score']) && (float)$face['score'] < ImageFaceRecognitionClassifier::MIN_FACE_RECOGNITION_SCORE) { + continue; + } + // Accept either a full face object {x,y,width,height,score,vector,angle} + // or a bare embedding vector (list of numbers). + $isBareVector = array_is_list($face) && count($face) > 0 && is_numeric($face[0]); + $vector = $isBareVector ? $face : ($face['vector'] ?? null); + if (!is_array($vector)) { + $this->logger->warning('Face entry without embedding vector for file ' . $fileId); + continue; + } + foreach ($userIds as $userId) { + $detection = new FaceDetection(); + $detection->setFileId($fileId); + $detection->setUserId($userId); + $detection->setX((float)($face['x'] ?? 0)); + $detection->setY((float)($face['y'] ?? 0)); + $detection->setWidth((float)($face['width'] ?? 0)); + $detection->setHeight((float)($face['height'] ?? 0)); + $detection->setVector($vector); + try { + $this->faceDetections->insert($detection); + } catch (\Throwable $e) { + $this->logger->error('Could not store face detection for file ' . $fileId, ['exception' => $e]); + continue; + } + if (!isset($scheduledClusterJobsFor[$userId])) { + $this->jobList->add(ClusterFacesJob::class, ['userId' => $userId]); + $scheduledClusterJobsFor[$userId] = true; + } + } + } + $this->config->setAppValueString($model . '.status', 'true', lazy: true); + $this->config->setAppValueString($model . '.lastFile', (string)time(), lazy: true); + } + } + + private function enqueueForLandmarks(int $fileId): void { + $mounts = $this->userMountCache->getMountsForFileId($fileId); + if (count($mounts) === 0) { + return; + } + $mount = $mounts[0]; + $queueFile = new QueueFile(); + $queueFile->setFileId($fileId); + $queueFile->setStorageId($mount->getStorageId()); + $queueFile->setRootId($mount->getRootId()); + $queueFile->setUpdate(false); + try { + $this->queue->insertIntoQueue(LandmarksClassifier::MODEL_NAME, $queueFile); + } catch (\Throwable $e) { + $this->logger->warning('Could not enqueue file ' . $fileId . ' for landmark detection', ['exception' => $e]); + } + } + + /** + * @return list + */ + private function getUsersWithFileAccess(int $fileId): array { + try { + $mountInfos = $this->userMountCache->getMountsForFileId($fileId); + } catch (\Throwable $e) { + $this->logger->warning('Could not look up users with access for file ' . $fileId, ['exception' => $e]); + return []; + } + return array_values(array_unique(array_map( + static fn (ICachedMountInfo $m): string => $m->getUser()->getUID(), + $mountInfos, + ))); + } +} diff --git a/lib/TaskProcessing/VideoClassificationTaskType.php b/lib/TaskProcessing/VideoClassificationTaskType.php new file mode 100644 index 000000000..0f8203099 --- /dev/null +++ b/lib/TaskProcessing/VideoClassificationTaskType.php @@ -0,0 +1,72 @@ +l->t('Video classification'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Classify videos into categories.'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'input' => new ShapeDescriptor( + $this->l->t('Videos'), + $this->l->t('Provide videos to classify'), + EShapeType::ListOfVideos, + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'output' => new ShapeDescriptor( + $this->l->t('Categories'), + $this->l->t('The classified categories. Each input video is mapped to a text containing a comma separated list of categories.'), + EShapeType::ListOfTexts, + ), + ]; + } +} diff --git a/psalm.xml b/psalm.xml index a7f8113a9..72891004f 100644 --- a/psalm.xml +++ b/psalm.xml @@ -15,6 +15,7 @@ + diff --git a/src/components/ViewAdmin.vue b/src/components/ViewAdmin.vue index 55895fbab..bc465d77e 100644 --- a/src/components/ViewAdmin.vue +++ b/src/components/ViewAdmin.vue @@ -21,13 +21,13 @@ {{ t('recognize', 'The systemtags app is currently disabled. Some features of this app will not work.') }} - + {{ t('recognize', 'Could not execute the Node.js binary. You may need to set the path to a working binary manually.') }} {{ t('recognize', 'Background Jobs are not executed via cron. Recognize requires background jobs to be executed via cron.') }} -