ChimeraPy · JaneWu423 · Oct 3, 2023 · Oct 3, 2023 · Oct 3, 2023 · Oct 10, 2023
diff --git a/chimerapy/pipelines/__init__.py b/chimerapy/pipelines/__init__.py
@@ -22,6 +22,14 @@ def register_nodes_metadata():
             "chimerapy.pipelines.yolov8.multi_vid_pose:YoloV8Node",
             "chimerapy.pipelines.yolov8.multi_save:MultiSaveNode",
             "chimerapy.pipelines.yolov8.display:DisplayNode",
+            "chimerapy.pipelines.huggin_face.hf_video:HFVideo",
+            "chimerapy.pipelines.huggin_face.hf_cv_node:HFCVNode",
+            "chimerapy.pipelines.huggin_face.hf_display:HFDisplay",
+            "chimerapy.pipelines.huggin_face.hf_text:HFText",
+            "chimerapy.pipelines.huggin_face.hf_text_node:HFTextNode",
+            "chimerapy.pipelines.huggin_face.data_vqa:HFVQA",
+            "chimerapy.pipelines.huggin_face.hf_vqa:HFVQANode",
+
         ],
     }
 

diff --git a/chimerapy/pipelines/huggin_face/README.md b/chimerapy/pipelines/huggin_face/README.md
@@ -0,0 +1,51 @@
+# Integrating YOLOv8
+## Nodes
+- **hf_text_node: HFTextNode** -- This node accepts textual input (right now supplied with hf_text) and applies specified Hugging Face models on input text. Need to specify specific model/task in the configuration file. 
+- **hf_cv_node: HFCVNode** -- This node accepts input frames(right now supplied with hf_video) and applies specified Hugging Face CV models on input frames. Need to specify specific model/task in the configuration file. 
+
+- **hf_vqa: HFVQANode** -- This node accepts input frames & question(right now supplied with data_vqa) and applies specified VQA models on input frames. Need to specify specific model/task in the configuration file. 
+
+* Right now all the outputs are to command line...
+
+## Example Use
+- Example configs for all three nodes are in configs/huggingface folder, there is one for each node
+
+
+
+**Using HF models require installation of additional packages:**
+- transformers: HF library for transformer models for various tasks
+
+
+## Parameters to specify model (from Hugging Face Documentation)
+**task** (str) — The task defining which pipeline will be returned. Currently accepted tasks are:
+- audio-classification": will return a AudioClassificationPipeline.
+- "automatic-speech-recognition": will return a AutomaticSpeechRecognitionPipeline.
+- "conversational": will return a ConversationalPipeline.
+- "depth-estimation": will return a DepthEstimationPipeline.
+- "document-question-answering": will return a DocumentQuestionAnsweringPipeline.
+- "feature-extraction": will return a FeatureExtractionPipeline.
+- "fill-mask": will return a FillMaskPipeline:.
+- "image-classification": will return a ImageClassificationPipeline.
+- "image-segmentation": will return a ImageSegmentationPipeline.
+- "image-to-text": will return a ImageToTextPipeline.
+- "mask-generation": will return a MaskGenerationPipeline.
+- "object-detection": will return a ObjectDetectionPipeline.
+- "question-answering": will return a QuestionAnsweringPipeline.
+- "summarization": will return a SummarizationPipeline.
+- "table-question-answering": will return a TableQuestionAnsweringPipeline.
+- "text2text-generation": will return a Text2TextGenerationPipeline.
+" text-classification" (alias "sentiment-analysis" available): will return a TextClassificationPipeline.
+- "text-generation": will return a TextGenerationPipeline:.
+- "text-to-audio" (alias "text-to-speech" available): will return a TextToAudioPipeline:.
+- "token-classification" (alias "ner" available): will return a TokenClassificationPipeline.
+- "translation": will return a TranslationPipeline.
+- "translation_xx_to_yy": will return a TranslationPipeline.
+- "video-classification": will return a VideoClassificationPipeline.
+- "visual-question-answering": will return a VisualQuestionAnsweringPipeline.
+- "zero-shot-classification": will return a ZeroShotClassificationPipeline.
+- "zero-shot-image-classification": will return a ZeroShotImageClassificationPipeline.
+- "zero-shot-audio-classification": will return a ZeroShotAudioClassificationPipeline.
+- "zero-shot-object-detection": will return a ZeroShotObjectDetectionPipeline.
+
+**model** (str or PreTrainedModel or TFPreTrainedModel, optional) — The model that will be used by the pipeline to make predictions. This can be a model identifier or an actual instance of a pretrained model inheriting from PreTrainedModel (for PyTorch) or TFPreTrainedModel (for TensorFlow).
+
diff --git a/chimerapy/pipelines/huggin_face/__init__.py b/chimerapy/pipelines/huggin_face/__init__.py
diff --git a/chimerapy/pipelines/huggin_face/data.py b/chimerapy/pipelines/huggin_face/data.py
@@ -0,0 +1,20 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional
+
+import numpy as np
+
+if TYPE_CHECKING:
+    from ultralytics.yolo.engine.results import Results
+
+
+@dataclass
+class YOLOFrame:
+    """A frame from a video source."""
+
+    arr: np.ndarray
+    frame_count: int
+    src_id: str
+    result: Optional["Results"] = None
+
+    def __repr__(self) -> str:
+        return f"<Frame from {self.src_id} {self.frame_count}>"
diff --git a/chimerapy/pipelines/huggin_face/data_vqa.py b/chimerapy/pipelines/huggin_face/data_vqa.py
@@ -0,0 +1,40 @@
+import chimerapy.engine as cpe
+from chimerapy.orchestrator import source_node
+from chimerapy.pipelines.generic_nodes.video_nodes import Video
+from .data import YOLOFrame
+
+@source_node(name="CPPipelines_HFVQA")
+class HFVQA(Video):
+    """The Node that mimic's data input for VQA model"""
+
+    def __init__(self,
+                  video_src: str,
+                  name: str = "text",
+                  data_key:str = "data",
+                  frame_key = "frame",
+                  text_src = "what is in the image",
+                  download_video = False,
+                  **kwargs
+                  ) -> None:
+
+        self.frame_key = frame_key
+        self.data_key = data_key
+        self.text_src = text_src
+        super().__init__(name=name,video_src=video_src, frame_key=frame_key, loop=True, download_video=download_video, include_meta=True, **kwargs)
+
+    def step(self) -> cpe.DataChunk:
+        data_chunk = super().step()
+        ret_chunk = cpe.DataChunk()
+        frame_arr = data_chunk.get(self.frame_key)["value"]
+        src_id = data_chunk.get("metadata")["value"]["source_name"]
+        frame_count = data_chunk.get("metadata")["value"]["frame_count"]
+        ret_chunk.add(
+            self.data_key,
+            # [question, frame],
+            # for now hardcoded the questions
+            [self.text_src,
+            YOLOFrame(frame_arr, src_id=src_id, frame_count=frame_count)]
+        )
+
+        return ret_chunk
+
diff --git a/chimerapy/pipelines/huggin_face/hf_cv_node.py b/chimerapy/pipelines/huggin_face/hf_cv_node.py
@@ -0,0 +1,86 @@
+from typing import Dict, List, Literal, Optional
+
+import chimerapy.engine as cpe
+from chimerapy.orchestrator import step_node
+from PIL import Image 
+
+
+from .data import YOLOFrame
+
+
+@step_node(name="CPPipelines_HFCVNode")
+class HFCVNode(cpe.Node):
+
+    """A node to apply Hugging Face models on video src.
+
+    Parameters:
+    ----------
+    name: str, optional (default: 'HFNode')
+        The name of the node.
+
+    model_name: str, required
+        The model name of the model from Hugging Face to be applied.
+
+    task: str, optional (default: "")
+        Specify the test to perform when model task is not defined.
+
+    device: Literal["cpu", "cuda"], optional (default: "cpu")
+        The device to use for running model.
+
+    frames_key: str, optional (default: 'frame')
+        The key to access the frames in the video.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        model_name: str,
+        task: str = "",
+        device: Literal["cpu", "cuda"] = "cpu",
+        frames_key: str = "frame",
+    ):
+        self.model_name = model_name
+        self.device = 0 if device == "cuda" else "cpu"
+        self.task = task
+        self.frames_key = frames_key
+
+        super().__init__(name=name)
+
+
+    def setup(self):
+        from transformers import pipeline
+
+        try:
+            if self.task != "":
+                if self.model_name != "":
+                    self.model = pipeline(task = self.task, model = self.model_name, device = self.device)
+                else:
+                    self.model = pipeline(task = self.task, device = self.device)
+            else:
+                self.model = pipeline(model = self.model_name, device = self.device)
+
+            print(f"Successfully imported model: {self.model_name}")
+        except AttributeError:
+            print(f"Failed to import model: {self.model_name}. Model not found in transformers library.")
+
+    def step(self, data_chunks: Dict[str, cpe.DataChunk]) -> cpe.DataChunk:
+
+        ret_chunk = cpe.DataChunk()
+
+        for _, data_chunk in data_chunks.items():
+            frame = data_chunk.get(self.frames_key)["value"]
+
+            img = Image.fromarray(frame.arr)
+            result = self.model(img)
+            print(result)
+
+            new_frame = YOLOFrame(
+                arr=frame.arr,
+                frame_count=frame.frame_count,
+                src_id=frame.src_id,
+                result=result
+            )
+
+            ret_chunk.add(self.frames_key, new_frame)
+
+        return ret_chunk
diff --git a/chimerapy/pipelines/huggin_face/hf_display.py b/chimerapy/pipelines/huggin_face/hf_display.py
@@ -0,0 +1,30 @@
+from typing import Dict, List
+
+import cv2
+
+import chimerapy.engine as cpe
+from chimerapy.orchestrator import sink_node
+
+from .data import YOLOFrame
+
+
+@sink_node(name="CPPipelines_HFDisplay")
+class HFDisplay(cpe.Node):
+    """A node that display results after applying Hugging Face models."""
+
+    def __init__(
+        self,
+        frames_key: str = "frame",
+        name: str = "DisplayNode",
+    ) -> None:
+        self.frames_key = frames_key
+        super().__init__(name=name)
+
+    def step(self, data_chunks: Dict[str, cpe.DataChunk]) -> cpe.DataChunk:
+        for _, data_chunk in data_chunks.items():
+            frame = data_chunk.get(self.frames_key)["value"]
+            cv2.imshow(frame.src_id, frame.arr)
+            cv2.waitKey(1)
+
+    def teardown(self) -> None:
+        cv2.destroyAllWindows()
diff --git a/chimerapy/pipelines/huggin_face/hf_text.py b/chimerapy/pipelines/huggin_face/hf_text.py
@@ -0,0 +1,37 @@
+import chimerapy.engine as cpe
+from chimerapy.orchestrator import source_node
+import time
+
+@source_node(name="CPPipelines_HFText")
+class HFText(cpe.Node):
+    """A text node that reads in textual input from a text file"""
+
+    def __init__(self,
+                  name: str = "text",
+                  data_key:str = "data",
+                  source: str = "./test.txt"
+
+                  ) -> None:
+        self.data_key = data_key
+        self.source = source
+        super().__init__(name=name)
+
+    def setup(self):
+        self.file = open(self.source, 'r')
+
+    def step(self) -> cpe.DataChunk:
+        if self.file:
+            line = self.file.readline()
+            # simulate input
+            # at rate of 1 line per second
+            time.sleep(1)
+            if line:
+                ret_chunk = cpe.DataChunk()
+
+                ret_chunk.add(
+                    self.data_key,
+                    line
+                )
+                return ret_chunk
+        return None
+
diff --git a/chimerapy/pipelines/huggin_face/hf_text_node.py b/chimerapy/pipelines/huggin_face/hf_text_node.py
@@ -0,0 +1,77 @@
+from typing import Dict, List, Literal, Optional
+
+import chimerapy.engine as cpe
+from chimerapy.orchestrator import step_node
+from PIL import Image 
+
+
+
+@step_node(name="CPPipelines_HFTextNode")
+class HFTextNode(cpe.Node):
+
+    """A node to apply Hugging Face models on text inputs.
+
+    Parameters:
+    ----------
+    name: str, optional (default: 'HFNode')
+        The name of the node.
+
+    model_name: str, required
+        The model name of the model from Hugging Face to be applied.
+
+    task: str, optional (default: "")
+        Specify the test to perform when model task is not defined.
+
+    device: Literal["cpu", "cuda"], optional (default: "cpu")
+        The device to use for running model.
+
+    data_key: str, optional (default: 'frame')
+        The key to access the frames in the video.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        model_name: str = "",
+        task: str = "",
+        device: Literal["cpu", "cuda"] = "cpu",
+        data_key: str = "data",
+    ):
+        self.model_name = model_name
+        self.device = 0 if device == "cuda" else "cpu"
+        self.task = task
+        self.data_key = data_key
+
+        super().__init__(name=name)
+
+
+    def setup(self):
+        from transformers import pipeline
+
+        try:
+            if self.task != "":
+                if self.model_name != "":
+                    self.model = pipeline(task = self.task, model = self.model_name, device = self.device)
+                else:
+                    self.model = pipeline(task = self.task, device = self.device)
+            else:
+                self.model = pipeline(model = self.model_name, device = self.device)
+
+            print(f"Successfully imported model: {self.model_name}")
+        except AttributeError:
+            print(f"Failed to import model: {self.model_name}. Model not found in transformers library.")
+
+    def step(self, data_chunks: Dict[str, cpe.DataChunk]) -> cpe.DataChunk:
+
+        ret_chunk = cpe.DataChunk()
+
+        for _, data_chunk in data_chunks.items():
+            question = data_chunk.get(self.data_key)["value"]
+
+            result = self.model(question)
+            print(question, result)
+
+
+            ret_chunk.add(self.data_key, result)
+
+        return ret_chunk
diff --git a/chimerapy/pipelines/huggin_face/hf_video.py b/chimerapy/pipelines/huggin_face/hf_video.py
@@ -0,0 +1,28 @@
+import chimerapy.engine as cpe
+from chimerapy.orchestrator import source_node
+from chimerapy.pipelines.generic_nodes.video_nodes import Video
+
+from .data import YOLOFrame
+
+
+@source_node(name="CPPipelines_HFVideo")
+class HFVideo(Video):
+    """A video node that returns a Frame object with identifiable metadata."""
+
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.include_meta = True
+
+    def step(self) -> cpe.DataChunk:
+        data_chunk = super().step()
+        ret_chunk = cpe.DataChunk()
+        frame_arr = data_chunk.get(self.frame_key)["value"]
+        src_id = data_chunk.get("metadata")["value"]["source_name"]
+        frame_count = data_chunk.get("metadata")["value"]["frame_count"]
+
+        ret_chunk.add(
+            self.frame_key,
+            YOLOFrame(frame_arr, src_id=src_id, frame_count=frame_count),
+        )
+
+        return ret_chunk