Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 124 additions & 24 deletions examples/react/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import { FAQ } from './components/FAQ'
import { Tensor } from '@xenova/transformers'
import cv from '@techstark/opencv-js'
import { StableDiffusionControlNetPipeline } from '../../../dist/pipelines/StableDiffusionControlNetPipeline';
import { getBlobFromImage, generateColors, segArgmax, posePostProcess, loadAnnotatorFile } from './annotator_helper_functions'

const darkTheme = createTheme({
palette: {
Expand All @@ -39,6 +40,7 @@ interface SelectedPipeline {
steps: number
hasImg2Img: boolean
hasControlNet: boolean
controlnet?: string
}

const pipelines = [
Expand Down Expand Up @@ -91,7 +93,32 @@ const pipelines = [
height: 512,
steps: 20,
hasImg2Img: true,
hasControlNet: true
hasControlNet: true,
controlnet: 'canny'
},
{
name: 'StableDiffusion 1.5 Base FP16 Semantic Segmentation (2.9GB)',
repo: 'jdp8/stable-diffusion-1-5-seg-v11p-onnx',
revision: 'main',
fp16: true,
width: 512,
height: 512,
steps: 20,
hasImg2Img: true,
hasControlNet: true,
controlnet: 'semantic_segmentation'
},
{
name: 'StableDiffusion 1.5 Base FP16 OpenPose (2.9GB)',
repo: 'jdp8/stable-diffusion-1-5-openpose-v11p-onnx',
revision: 'main',
fp16: true,
width: 512,
height: 512,
steps: 20,
hasImg2Img: true,
hasControlNet: true,
controlnet: 'openpose'
},
]

Expand All @@ -110,6 +137,8 @@ function App() {
const [inputImage, setInputImage] = useState<Float32Array>();
const [strength, setStrength] = useState(0.8);
const [controlNetImage, setControlNetImage] = useState<Float32Array>();
const [annotator_model, setAnnotatorModel] = useState('');
const [annotator_config, setAnnotatorConfig] = useState('');
const [runVaeOnEachStep, setRunVaeOnEachStep] = useState(false);
useEffect(() => {
setModelCacheDir('models')
Expand Down Expand Up @@ -201,7 +230,7 @@ function App() {
* @param type Pipeline of the input image.
* @returns void
*/
function uploadImage(e: any, type: 'controlnet'|'img2img') {
function uploadImage(e: any, type: 'controlnet'|'img2img', controlnet='canny') {
if(!e.target.files[0]) {
// No image uploaded
return;
Expand All @@ -225,29 +254,70 @@ function App() {
setInputImage(rgb_array);
}
else if(type == 'controlnet') {
// For now only Canny Edge Detection is available
const cvImg = cv.imread(uploadedImage); // RGBA Image | 4 Channels
const imgGray = new cv.Mat();
cv.cvtColor(cvImg, imgGray, cv.COLOR_RGBA2GRAY); // Gray Image | 1 Channel
const imgCanny = new cv.Mat();
cv.Canny(imgGray, imgCanny, 100, 200, 3, false); // Canny Image | 1 Channel
const rgbaCanny = new cv.Mat();
cv.cvtColor(imgCanny, rgbaCanny, cv.COLOR_GRAY2RGBA, 0); // RGBA Canny Image | 4 Channels

/**
* The canny data can be accessed as so:
* cannyEdges.data -> UInt8Array
* cannyEdges.data8S -> Int8Array
* cannyEdges.data16S -> Int16Array
* cannyEdges.data16U -> UInt16Array
* cannyEdges.data32F -> Float32Array
* cannyEdges.data32S -> Int32Array
* cannyEdges.data64F -> Float64Array
*/
if(controlnet == 'semantic_segmentation') {
const inputSize = [513, 513];
const mean = [127.5, 127.5, 127.5];
const std = 0.007843;
const swapRB = false;

const input = getBlobFromImage(inputSize, mean, std, swapRB, cvImg);
const net = cv.readNet(annotator_model);
net.setInput(input);
const result = net.forward();
const colors = generateColors(result);
const output = segArgmax(result, colors);
const resizedOutput = new cv.Mat();
const dsize = new cv.Size(512, 512);
cv.resize(output, resizedOutput, dsize, 0, 0, cv.INTER_AREA);
const rgbSem = getRgbData(Uint8ClampedArray.from(resizedOutput.data), false);
setControlNetImage(rgbSem);
cvImg.delete();input.delete();net.delete();result.delete();resizedOutput.delete();
}
else if(controlnet == 'openpose') {
// inputSize can be changed, the original is [368, 368]. The higher the size, the slower the annotation and vice-versa.
const inputSize = [125, 125];
const mean = [0, 0, 0];
const std = 0.00392;
const swapRB = false;
const threshold = 0.1;

// the pairs of keypoint, can be "COCO", "MPI" and "BODY_25"
let dataset = '';

const rgbCanny = getRgbData(Uint8ClampedArray.from(rgbaCanny.data), false);
setControlNetImage(rgbCanny);
cvImg.delete();imgGray.delete();imgCanny.delete();rgbaCanny.delete();
if(annotator_model == 'pose_iter_584000.caffemodel') {
dataset = 'BODY_25'
}
else if(annotator_model == 'pose_iter_440000.caffemodel') {
dataset = 'COCO'
}
else if (annotator_model == 'pose_iter_160000.caffemodel') {
dataset = 'MPI'
}

const input = getBlobFromImage(inputSize, mean, std, swapRB, cvImg);
let net = cv.readNet(annotator_model, annotator_config);
net.setInput(input);
const result = net.forward();
const output = posePostProcess(result, dataset, threshold, 512, 512);
const rgbaPose = new cv.Mat();
cv.cvtColor(output, rgbaPose, cv.COLOR_RGB2RGBA, 0); // RGBA Pose Image | 4 Channels
const rgbPose = getRgbData(Uint8ClampedArray.from(rgbaPose.data), false);
setControlNetImage(rgbPose);
cvImg.delete();input.delete();net.delete();result.delete();rgbaPose.delete();
}
else if(controlnet == 'canny') {
const imgGray = new cv.Mat();
cv.cvtColor(cvImg, imgGray, cv.COLOR_RGBA2GRAY); // Gray Image | 1 Channel
const imgCanny = new cv.Mat();
cv.Canny(imgGray, imgCanny, 100, 200, 3, false); // Canny Image | 1 Channel
const rgbaCanny = new cv.Mat();
cv.cvtColor(imgCanny, rgbaCanny, cv.COLOR_GRAY2RGBA, 0); // RGBA Canny Image | 4 Channels
const rgbCanny = getRgbData(Uint8ClampedArray.from(rgbaCanny.data), false);
setControlNetImage(rgbCanny);
cvImg.delete();imgGray.delete();imgCanny.delete();rgbaCanny.delete();
}
}
});
uploadedImage.src = file.target.result;
Expand Down Expand Up @@ -333,13 +403,43 @@ function App() {
{selectedPipeline?.hasControlNet &&
(
<>
<label htmlFor="upload_controlnet_image">Upload Image for ControlNet Pipeline:</label>
<label htmlFor="upload_annotator_model">Upload Annotator Model for ControlNet Pipeline:</label>
<TextField
id="upload_annotator_model"
inputProps={{accept:".caffemodel,.pb"}}
type={"file"}
disabled={modelState != 'ready'}
onChange={async (e) => {
const fileName = await loadAnnotatorFile(e)
//@ts-ignore
setAnnotatorModel(fileName)
}}
/>

{selectedPipeline?.controlnet == 'openpose' &&
(
<>
<label htmlFor="upload_config">Upload Annotator Config File for ControlNet Pipeline:</label>
<TextField
id="upload_config"
inputProps={{accept:".prototxt"}}
type={"file"}
disabled={modelState != 'ready'}
onChange={async (e) => {
const fileName = await loadAnnotatorFile(e)
//@ts-ignore
setAnnotatorConfig(fileName)
}}
/>
</>
)}
<label htmlFor="upload_controlnet_image">Upload Image for ControlNet Pipeline (annotator files must be uploaded first):</label>
<TextField
id="upload_controlnet_image"
inputProps={{accept:"image/*"}}
type={"file"}
disabled={modelState != 'ready'}
onChange={(e) => uploadImage(e, "controlnet")}
onChange={(e) => uploadImage(e, "controlnet", selectedPipeline.controlnet)}
/>
</>
)}
Expand Down
171 changes: 171 additions & 0 deletions examples/react/src/annotator_helper_functions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import cv from '@techstark/opencv-js'

export const getBlobFromImage = function(inputSize: Array<number>, mean: Array<number>, std: number, swapRB: boolean, cvImg: any) {
const matC3 = new cv.Mat(cvImg.matSize[0], cvImg.matSize[1], cv.CV_8UC3);
cv.cvtColor(cvImg, matC3, cv.COLOR_RGBA2BGR);
const input = cv.blobFromImage(matC3, std, new cv.Size(inputSize[0], inputSize[1]), new cv.Scalar(mean[0], mean[1], mean[2]), swapRB);

matC3.delete();
return input;
}

export const loadAnnotatorFile = async (e: any) => {
if(!e.target.files[0]) {
return;
}

return new Promise((resolve) => {
let file = e.target.files[0];
let path = file.name;
let reader = new FileReader();
reader.readAsArrayBuffer(file);
reader.onload = function(ev) {
if(reader.readyState === 2) {
let buffer: any = reader.result;
let data = new Uint8Array(buffer);
cv.FS_createDataFile('/', path, data, true, false, false);
resolve(path);
}
}
});
}

export const generateColors = function(result: any) {
const numClasses = result.matSize[1];
let colors = [0, 0, 0];
while(colors.length < numClasses * 3) {
colors.push(Math.round((Math.random() * 255 + colors[colors.length - 3]) / 2));
}
return colors;
}

export const segArgmax = function(result: any, colors: Array<number>) {
const C = result.matSize[1];
const H = result.matSize[2];
const W = result.matSize[3];
const resultData = result.data32F;
const imgSize = H*W;

let classId = [];
let i, j;
for(i = 0; i < imgSize; ++i) {
let id = 0;
for(j = 0; j < C; ++j) {
if(resultData[j*imgSize+i] > resultData[id*imgSize+i]) {
id = j;
}
}
classId.push(colors[id*3]);
classId.push(colors[id*3+1]);
classId.push(colors[id*3+2]);
classId.push(255);
}

const output = cv.matFromArray(H, W, cv.CV_8UC4, classId);
return output;
}

export const posePostProcess = function(result: any, dataset: string, threshold: number, outputWidth: number, outputHeight: number) {
const resultData = result.data32F;
const matSize = result.matSize;
// const size1 = matSize[1];
const size2 = matSize[2];
const size3 = matSize[3];
const mapSize = size2 * size3;

let output = cv.Mat.zeros(outputWidth, outputHeight, cv.CV_8UC3);

let BODY_PARTS: any = {};
let POSE_PAIRS: any = [];

if(dataset === 'COCO') {
BODY_PARTS = { "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
"LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
"RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "REye": 14,
"LEye": 15, "REar": 16, "LEar": 17, "Background": 18 };

POSE_PAIRS = [ ["Neck", "RShoulder"], ["Neck", "LShoulder"], ["RShoulder", "RElbow"],
["RElbow", "RWrist"], ["LShoulder", "LElbow"], ["LElbow", "LWrist"],
["Neck", "RHip"], ["RHip", "RKnee"], ["RKnee", "RAnkle"], ["Neck", "LHip"],
["LHip", "LKnee"], ["LKnee", "LAnkle"], ["Neck", "Nose"], ["Nose", "REye"],
["REye", "REar"], ["Nose", "LEye"], ["LEye", "LEar"] ]
}
else if (dataset === 'MPI') {
BODY_PARTS = { "Head": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
"LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
"RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "Chest": 14,
"Background": 15 }

POSE_PAIRS = [ ["Head", "Neck"], ["Neck", "RShoulder"], ["RShoulder", "RElbow"],
["RElbow", "RWrist"], ["Neck", "LShoulder"], ["LShoulder", "LElbow"],
["LElbow", "LWrist"], ["Neck", "Chest"], ["Chest", "RHip"], ["RHip", "RKnee"],
["RKnee", "RAnkle"], ["Chest", "LHip"], ["LHip", "LKnee"], ["LKnee", "LAnkle"] ]
}
else if (dataset === 'BODY_25') {
BODY_PARTS = { "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
"LShoulder": 5, "LElbow": 6, "LWrist": 7, "MidHip": 8, "RHip": 9,
"RKnee": 10, "RAnkle": 11, "LHip": 12, "LKnee": 13, "LAnkle": 14,
"REye": 15, "LEye": 16, "REar": 17, "LEar": 18, "LBigToe": 19,
"LSmallToe": 20, "LHeel": 21, "RBigToe": 22, "RSmallToe": 23,
"RHeel": 24, "Background": 25 }

POSE_PAIRS = [ ["Neck", "Nose"], ["Neck", "RShoulder"],
["Neck", "LShoulder"], ["RShoulder", "RElbow"],
["RElbow", "RWrist"], ["LShoulder", "LElbow"],
["LElbow", "LWrist"], ["Nose", "REye"],
["REye", "REar"], ["Nose", "LEye"],
["LEye", "LEar"], ["Neck", "MidHip"],
["MidHip", "RHip"], ["RHip", "RKnee"],
["RKnee", "RAnkle"], ["RAnkle", "RBigToe"],
["RBigToe", "RSmallToe"], ["RAnkle", "RHeel"],
["MidHip", "LHip"], ["LHip", "LKnee"],
["LKnee", "LAnkle"], ["LAnkle", "LBigToe"],
["LBigToe", "LSmallToe"], ["LAnkle", "LHeel"] ]
}

// get position of keypoints from output
let points = [];
let i;
for(i = 0; i < Object.keys(BODY_PARTS).length; ++i) {
let heatMap = resultData.slice(i * mapSize, (i+1) * mapSize);
let maxIndex = 0;
let maxConf = heatMap[0];
let index: any;
for(index in heatMap) {
if(heatMap[index] > heatMap[maxIndex]) {
maxIndex = index;
maxConf = heatMap[index];
}
}

if(maxConf > threshold) {
let indexX = maxIndex % size3;
let indexY = maxIndex / size3;

let x = outputWidth * indexX / size3;
let y = outputHeight * indexY / size2;

points[i] = [Math.round(x), Math.round(y)];
}
}

// draw the points and lines into the image
for(const pair of POSE_PAIRS) {
const partFrom = pair[0];
const partTo = pair[1];
const idFrom = BODY_PARTS[partFrom];
const idTo = BODY_PARTS[partTo];
const pointFrom = points[idFrom];
const pointTo = points[idTo];

if(points[idFrom] && points[idTo]) {
cv.line(output, new cv.Point(pointFrom[0], pointFrom[1]),
new cv.Point(pointTo[0], pointTo[1]), new cv.Scalar(0, 255, 0), 3);
cv.ellipse(output, new cv.Point(pointFrom[0], pointFrom[1]), new cv.Size(3, 3), 0, 0, 360,
new cv.Scalar(0, 0, 255), cv.FILLED);
cv.ellipse(output, new cv.Point(pointTo[0], pointTo[1]), new cv.Size(3, 3), 0, 0, 360,
new cv.Scalar(0, 0, 255), cv.FILLED);
}
}
return output;
}