Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions chimerapy/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ def register_nodes_metadata():
"chimerapy.pipelines.yolov8.multi_vid_pose:YoloV8Node",
"chimerapy.pipelines.yolov8.multi_save:MultiSaveNode",
"chimerapy.pipelines.yolov8.display:DisplayNode",
"chimerapy.pipelines.huggin_face.hf_video:HFVideo",
"chimerapy.pipelines.huggin_face.hf_cv_node:HFCVNode",
"chimerapy.pipelines.huggin_face.hf_display:HFDisplay",
"chimerapy.pipelines.huggin_face.hf_text:HFText",
"chimerapy.pipelines.huggin_face.hf_text_node:HFTextNode",
"chimerapy.pipelines.huggin_face.data_vqa:HFVQA",
"chimerapy.pipelines.huggin_face.hf_vqa:HFVQANode",

],
}

Expand Down
51 changes: 51 additions & 0 deletions chimerapy/pipelines/huggin_face/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Integrating YOLOv8
## Nodes
- **hf_text_node: HFTextNode** -- This node accepts textual input (right now supplied with hf_text) and applies specified Hugging Face models on input text. Need to specify specific model/task in the configuration file.
- **hf_cv_node: HFCVNode** -- This node accepts input frames(right now supplied with hf_video) and applies specified Hugging Face CV models on input frames. Need to specify specific model/task in the configuration file.

- **hf_vqa: HFVQANode** -- This node accepts input frames & question(right now supplied with data_vqa) and applies specified VQA models on input frames. Need to specify specific model/task in the configuration file.

* Right now all the outputs are to command line...

## Example Use
- Example configs for all three nodes are in configs/huggingface folder, there is one for each node



**Using HF models require installation of additional packages:**
- transformers: HF library for transformer models for various tasks


## Parameters to specify model (from Hugging Face Documentation)
**task** (str) — The task defining which pipeline will be returned. Currently accepted tasks are:
- audio-classification": will return a AudioClassificationPipeline.
- "automatic-speech-recognition": will return a AutomaticSpeechRecognitionPipeline.
- "conversational": will return a ConversationalPipeline.
- "depth-estimation": will return a DepthEstimationPipeline.
- "document-question-answering": will return a DocumentQuestionAnsweringPipeline.
- "feature-extraction": will return a FeatureExtractionPipeline.
- "fill-mask": will return a FillMaskPipeline:.
- "image-classification": will return a ImageClassificationPipeline.
- "image-segmentation": will return a ImageSegmentationPipeline.
- "image-to-text": will return a ImageToTextPipeline.
- "mask-generation": will return a MaskGenerationPipeline.
- "object-detection": will return a ObjectDetectionPipeline.
- "question-answering": will return a QuestionAnsweringPipeline.
- "summarization": will return a SummarizationPipeline.
- "table-question-answering": will return a TableQuestionAnsweringPipeline.
- "text2text-generation": will return a Text2TextGenerationPipeline.
" text-classification" (alias "sentiment-analysis" available): will return a TextClassificationPipeline.
- "text-generation": will return a TextGenerationPipeline:.
- "text-to-audio" (alias "text-to-speech" available): will return a TextToAudioPipeline:.
- "token-classification" (alias "ner" available): will return a TokenClassificationPipeline.
- "translation": will return a TranslationPipeline.
- "translation_xx_to_yy": will return a TranslationPipeline.
- "video-classification": will return a VideoClassificationPipeline.
- "visual-question-answering": will return a VisualQuestionAnsweringPipeline.
- "zero-shot-classification": will return a ZeroShotClassificationPipeline.
- "zero-shot-image-classification": will return a ZeroShotImageClassificationPipeline.
- "zero-shot-audio-classification": will return a ZeroShotAudioClassificationPipeline.
- "zero-shot-object-detection": will return a ZeroShotObjectDetectionPipeline.

**model** (str or PreTrainedModel or TFPreTrainedModel, optional) — The model that will be used by the pipeline to make predictions. This can be a model identifier or an actual instance of a pretrained model inheriting from PreTrainedModel (for PyTorch) or TFPreTrainedModel (for TensorFlow).

Empty file.
20 changes: 20 additions & 0 deletions chimerapy/pipelines/huggin_face/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Optional

import numpy as np

if TYPE_CHECKING:
from ultralytics.yolo.engine.results import Results


@dataclass
class YOLOFrame:
"""A frame from a video source."""

arr: np.ndarray
frame_count: int
src_id: str
result: Optional["Results"] = None

def __repr__(self) -> str:
return f"<Frame from {self.src_id} {self.frame_count}>"
40 changes: 40 additions & 0 deletions chimerapy/pipelines/huggin_face/data_vqa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import chimerapy.engine as cpe
from chimerapy.orchestrator import source_node
from chimerapy.pipelines.generic_nodes.video_nodes import Video
from .data import YOLOFrame

@source_node(name="CPPipelines_HFVQA")
class HFVQA(Video):
"""The Node that mimic's data input for VQA model"""

def __init__(self,
video_src: str,
name: str = "text",
data_key:str = "data",
frame_key = "frame",
text_src = "what is in the image",
download_video = False,
**kwargs
) -> None:

self.frame_key = frame_key
self.data_key = data_key
self.text_src = text_src
super().__init__(name=name,video_src=video_src, frame_key=frame_key, loop=True, download_video=download_video, include_meta=True, **kwargs)

def step(self) -> cpe.DataChunk:
data_chunk = super().step()
ret_chunk = cpe.DataChunk()
frame_arr = data_chunk.get(self.frame_key)["value"]
src_id = data_chunk.get("metadata")["value"]["source_name"]
frame_count = data_chunk.get("metadata")["value"]["frame_count"]
ret_chunk.add(
self.data_key,
# [question, frame],
# for now hardcoded the questions
[self.text_src,
YOLOFrame(frame_arr, src_id=src_id, frame_count=frame_count)]
)

return ret_chunk

86 changes: 86 additions & 0 deletions chimerapy/pipelines/huggin_face/hf_cv_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from typing import Dict, List, Literal, Optional

import chimerapy.engine as cpe
from chimerapy.orchestrator import step_node
from PIL import Image


from .data import YOLOFrame


@step_node(name="CPPipelines_HFCVNode")
class HFCVNode(cpe.Node):

"""A node to apply Hugging Face models on video src.

Parameters:
----------
name: str, optional (default: 'HFNode')
The name of the node.

model_name: str, required
The model name of the model from Hugging Face to be applied.

task: str, optional (default: "")
Specify the test to perform when model task is not defined.

device: Literal["cpu", "cuda"], optional (default: "cpu")
The device to use for running model.

frames_key: str, optional (default: 'frame')
The key to access the frames in the video.
"""

def __init__(
self,
name: str,
model_name: str,
task: str = "",
device: Literal["cpu", "cuda"] = "cpu",
frames_key: str = "frame",
):
self.model_name = model_name
self.device = 0 if device == "cuda" else "cpu"
self.task = task
self.frames_key = frames_key

super().__init__(name=name)


def setup(self):
from transformers import pipeline

try:
if self.task != "":
if self.model_name != "":
self.model = pipeline(task = self.task, model = self.model_name, device = self.device)
else:
self.model = pipeline(task = self.task, device = self.device)
else:
self.model = pipeline(model = self.model_name, device = self.device)

print(f"Successfully imported model: {self.model_name}")
except AttributeError:
print(f"Failed to import model: {self.model_name}. Model not found in transformers library.")

def step(self, data_chunks: Dict[str, cpe.DataChunk]) -> cpe.DataChunk:

ret_chunk = cpe.DataChunk()

for _, data_chunk in data_chunks.items():
frame = data_chunk.get(self.frames_key)["value"]

img = Image.fromarray(frame.arr)
result = self.model(img)
print(result)

new_frame = YOLOFrame(
arr=frame.arr,
frame_count=frame.frame_count,
src_id=frame.src_id,
result=result
)

ret_chunk.add(self.frames_key, new_frame)

return ret_chunk
30 changes: 30 additions & 0 deletions chimerapy/pipelines/huggin_face/hf_display.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import Dict, List

import cv2

import chimerapy.engine as cpe
from chimerapy.orchestrator import sink_node

from .data import YOLOFrame


@sink_node(name="CPPipelines_HFDisplay")
class HFDisplay(cpe.Node):
"""A node that display results after applying Hugging Face models."""

def __init__(
self,
frames_key: str = "frame",
name: str = "DisplayNode",
) -> None:
self.frames_key = frames_key
super().__init__(name=name)

def step(self, data_chunks: Dict[str, cpe.DataChunk]) -> cpe.DataChunk:
for _, data_chunk in data_chunks.items():
frame = data_chunk.get(self.frames_key)["value"]
cv2.imshow(frame.src_id, frame.arr)
cv2.waitKey(1)

def teardown(self) -> None:
cv2.destroyAllWindows()
37 changes: 37 additions & 0 deletions chimerapy/pipelines/huggin_face/hf_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import chimerapy.engine as cpe
from chimerapy.orchestrator import source_node
import time

@source_node(name="CPPipelines_HFText")
class HFText(cpe.Node):
"""A text node that reads in textual input from a text file"""

def __init__(self,
name: str = "text",
data_key:str = "data",
source: str = "./test.txt"

) -> None:
self.data_key = data_key
self.source = source
super().__init__(name=name)

def setup(self):
self.file = open(self.source, 'r')

def step(self) -> cpe.DataChunk:
if self.file:
line = self.file.readline()
# simulate input
# at rate of 1 line per second
time.sleep(1)
if line:
ret_chunk = cpe.DataChunk()

ret_chunk.add(
self.data_key,
line
)
return ret_chunk
return None

77 changes: 77 additions & 0 deletions chimerapy/pipelines/huggin_face/hf_text_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from typing import Dict, List, Literal, Optional

import chimerapy.engine as cpe
from chimerapy.orchestrator import step_node
from PIL import Image



@step_node(name="CPPipelines_HFTextNode")
class HFTextNode(cpe.Node):

"""A node to apply Hugging Face models on text inputs.

Parameters:
----------
name: str, optional (default: 'HFNode')
The name of the node.

model_name: str, required
The model name of the model from Hugging Face to be applied.

task: str, optional (default: "")
Specify the test to perform when model task is not defined.

device: Literal["cpu", "cuda"], optional (default: "cpu")
The device to use for running model.

data_key: str, optional (default: 'frame')
The key to access the frames in the video.
"""

def __init__(
self,
name: str,
model_name: str = "",
task: str = "",
device: Literal["cpu", "cuda"] = "cpu",
data_key: str = "data",
):
self.model_name = model_name
self.device = 0 if device == "cuda" else "cpu"
self.task = task
self.data_key = data_key

super().__init__(name=name)


def setup(self):
from transformers import pipeline

try:
if self.task != "":
if self.model_name != "":
self.model = pipeline(task = self.task, model = self.model_name, device = self.device)
else:
self.model = pipeline(task = self.task, device = self.device)
else:
self.model = pipeline(model = self.model_name, device = self.device)

print(f"Successfully imported model: {self.model_name}")
except AttributeError:
print(f"Failed to import model: {self.model_name}. Model not found in transformers library.")

def step(self, data_chunks: Dict[str, cpe.DataChunk]) -> cpe.DataChunk:

ret_chunk = cpe.DataChunk()

for _, data_chunk in data_chunks.items():
question = data_chunk.get(self.data_key)["value"]

result = self.model(question)
print(question, result)


ret_chunk.add(self.data_key, result)

return ret_chunk
28 changes: 28 additions & 0 deletions chimerapy/pipelines/huggin_face/hf_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import chimerapy.engine as cpe
from chimerapy.orchestrator import source_node
from chimerapy.pipelines.generic_nodes.video_nodes import Video

from .data import YOLOFrame


@source_node(name="CPPipelines_HFVideo")
class HFVideo(Video):
"""A video node that returns a Frame object with identifiable metadata."""

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.include_meta = True

def step(self) -> cpe.DataChunk:
data_chunk = super().step()
ret_chunk = cpe.DataChunk()
frame_arr = data_chunk.get(self.frame_key)["value"]
src_id = data_chunk.get("metadata")["value"]["source_name"]
frame_count = data_chunk.get("metadata")["value"]["frame_count"]

ret_chunk.add(
self.frame_key,
YOLOFrame(frame_arr, src_id=src_id, frame_count=frame_count),
)

return ret_chunk
Loading