diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index ad3a9146..ee48f764 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -31,6 +31,8 @@ use OCA\Assistant\Reference\Text2StickerProvider; use OCA\Assistant\TaskProcessing\AudioToAudioChatProvider; use OCA\Assistant\TaskProcessing\ContextAgentAudioInteractionProvider; +use OCA\Assistant\TaskProcessing\ImageToTextTranslateProvider; +use OCA\Assistant\TaskProcessing\ImageToTextTranslateTaskType; use OCA\Assistant\TaskProcessing\TextToStickerProvider; use OCA\Assistant\TaskProcessing\TextToStickerTaskType; use OCA\Files\Event\LoadAdditionalScriptsEvent; @@ -111,6 +113,9 @@ public function register(IRegistrationContext $context): void { $context->registerTaskProcessingTaskType(TextToStickerTaskType::class); $context->registerTaskProcessingProvider(TextToStickerProvider::class); $context->registerReferenceProvider(Text2StickerProvider::class); + + $context->registerTaskProcessingTaskType(ImageToTextTranslateTaskType::class); + $context->registerTaskProcessingProvider(ImageToTextTranslateProvider::class); } public function boot(IBootContext $context): void { diff --git a/lib/Service/TaskProcessingService.php b/lib/Service/TaskProcessingService.php index ccee7535..d7634db1 100644 --- a/lib/Service/TaskProcessingService.php +++ b/lib/Service/TaskProcessingService.php @@ -19,6 +19,7 @@ use OCP\TaskProcessing\Exception\UnauthorizedException; use OCP\TaskProcessing\Exception\ValidationException; use OCP\TaskProcessing\IManager; +use OCP\TaskProcessing\IProvider; use OCP\TaskProcessing\Task; use OCP\TaskProcessing\TaskTypes\AudioToText; use OCP\TaskProcessing\TaskTypes\TextToTextSummary; @@ -35,6 +36,10 @@ public function __construct( ) { } + public function getPreferredProvider(string $taskTypeId): IProvider { + return $this->taskProcessingManager->getPreferredProvider($taskTypeId); + } + /** * @param Task $task * @return array diff --git a/lib/TaskProcessing/ImageToTextTranslateProvider.php b/lib/TaskProcessing/ImageToTextTranslateProvider.php new file mode 100644 index 00000000..283c10e9 --- /dev/null +++ b/lib/TaskProcessing/ImageToTextTranslateProvider.php @@ -0,0 +1,151 @@ +l->t('Assistant'); + } + + public function getTaskTypeId(): string { + return ImageToTextTranslateTaskType::ID; + } + + public function getExpectedRuntime(): int { + return 60; + } + + public function getInputShapeEnumValues(): array { + $translateProvider = $this->taskProcessingService->getPreferredProvider(TextToTextTranslate::ID); + + return [ + 'origin_language' => $translateProvider->getInputShapeEnumValues()['origin_language'], + 'target_language' => $translateProvider->getInputShapeEnumValues()['target_language'], + ]; + } + + public function getInputShapeDefaults(): array { + $translateProvider = $this->taskProcessingService->getPreferredProvider(TextToTextTranslate::ID); + return [ + 'origin_language' => $translateProvider->getInputShapeDefaults()['origin_language'], + ]; + } + + + public function getOptionalInputShape(): array { + return []; + } + + public function getOptionalInputShapeEnumValues(): array { + return []; + } + + public function getOptionalInputShapeDefaults(): array { + return []; + } + + public function getOutputShapeEnumValues(): array { + return []; + } + + public function getOptionalOutputShape(): array { + return []; + } + + public function getOptionalOutputShapeEnumValues(): array { + return []; + } + + public function process(?string $userId, array $input, callable $reportProgress): array { + if (!isset($input['input']) || !is_array($input['input'])) { + throw new RuntimeException('Invalid input'); + } + foreach ($input['input'] as $i => $inputImage) { + if (!($inputImage instanceof File) || !$inputImage->isReadable()) { + throw new RuntimeException('Invalid input images'); + } + } + + if (!isset($input['origin_language']) || !is_string($input['origin_language'])) { + throw new RuntimeException('Invalid origin_language input'); + } + if (!isset($input['target_language']) || !is_string($input['target_language'])) { + throw new RuntimeException('Invalid target_language input'); + } + + // OCR + $ocrInputs = array_map(static function (File $file) { + return $file->getId(); + }, $input['input']); + try { + $task = new Task( + ImageToTextOpticalCharacterRecognition::ID, + ['input' => $ocrInputs], + Application::APP_ID . ':internal', + $userId, + ); + $taskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + $ocrOutputs = $taskOutput['output']; + } catch (Exception $e) { + $this->logger->warning('OCR sub task failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new RuntimeException('OCR sub task failed with: ' . $e->getMessage()); + } + + $translatedOutputs = []; + foreach ($ocrOutputs as $ocrOutput) { + try { + $task = new Task( + TextToTextTranslate::ID, + [ + 'input' => $ocrOutput, + 'origin_language' => $input['origin_language'], + 'target_language' => $input['target_language'], + ], + Application::APP_ID . ':internal', + $userId, + ); + $taskOutput = $this->taskProcessingService->runTaskProcessingTask($task); + $translatedOutputs[] = $taskOutput['output']; + } catch (Exception $e) { + $this->logger->warning('Translation sub task failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new RuntimeException('Translation sub task failed with: ' . $e->getMessage()); + } + } + + // Translation + return [ + 'output' => $translatedOutputs, + ]; + } +} diff --git a/lib/TaskProcessing/ImageToTextTranslateTaskType.php b/lib/TaskProcessing/ImageToTextTranslateTaskType.php new file mode 100644 index 00000000..cf49df27 --- /dev/null +++ b/lib/TaskProcessing/ImageToTextTranslateTaskType.php @@ -0,0 +1,82 @@ +l->t('Translate image'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Translate the text content of an image'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'input' => new ShapeDescriptor( + $this->l->t('Input files'), + $this->l->t('The files to extract text from'), + EShapeType::ListOfFiles + ), + 'origin_language' => new ShapeDescriptor( + $this->l->t('Origin language'), + $this->l->t('The language of the origin text'), + EShapeType::Enum + ), + 'target_language' => new ShapeDescriptor( + $this->l->t('Target language'), + $this->l->t('The desired language to translate the origin text in'), + EShapeType::Enum + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'output' => new ShapeDescriptor( + $this->l->t('Output texts'), + $this->l->t('The texts that were extracted from the files'), + EShapeType::ListOfTexts + ), + ]; + } +}