|
| 1 | +import com.beust.jcommander.JCommander; |
| 2 | +import com.beust.jcommander.Parameter; |
| 3 | +import com.beust.jcommander.UnixStyleUsageFormatter; |
| 4 | +import org.bytedeco.opencv.global.opencv_dnn; |
| 5 | +import org.bytedeco.opencv.opencv_core.*; |
| 6 | +import org.bytedeco.opencv.opencv_objdetect.FaceDetectorYN; |
| 7 | +import org.bytedeco.opencv.opencv_videoio.VideoCapture; |
| 8 | + |
| 9 | +import static org.bytedeco.opencv.global.opencv_highgui.*; |
| 10 | +import static org.bytedeco.opencv.global.opencv_imgcodecs.imread; |
| 11 | +import static org.bytedeco.opencv.global.opencv_imgcodecs.imwrite; |
| 12 | +import static org.bytedeco.opencv.global.opencv_imgproc.FONT_HERSHEY_SIMPLEX; |
| 13 | +import static org.bytedeco.opencv.global.opencv_imgproc.putText; |
| 14 | +import static org.bytedeco.opencv.global.opencv_videoio.CAP_PROP_FRAME_HEIGHT; |
| 15 | +import static org.bytedeco.opencv.global.opencv_videoio.CAP_PROP_FRAME_WIDTH; |
| 16 | + |
| 17 | +public class demo { |
| 18 | + |
| 19 | + // Valid combinations of backends and targets |
| 20 | + static int[][] backendTargetPairs = { |
| 21 | + {opencv_dnn.DNN_BACKEND_OPENCV, opencv_dnn.DNN_TARGET_CPU}, |
| 22 | + {opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA}, |
| 23 | + {opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA_FP16}, |
| 24 | + {opencv_dnn.DNN_BACKEND_TIMVX, opencv_dnn.DNN_TARGET_NPU}, |
| 25 | + {opencv_dnn.DNN_BACKEND_CANN, opencv_dnn.DNN_TARGET_NPU} |
| 26 | + }; |
| 27 | + |
| 28 | + static class Args { |
| 29 | + @Parameter(names = {"--help", "-h"}, order = 0, help = true, |
| 30 | + description = "Print help message.") |
| 31 | + boolean help; |
| 32 | + @Parameter(names = {"--input", "-i"}, order = 1, |
| 33 | + description = "Set input to a certain image, omit if using camera.") |
| 34 | + String input; |
| 35 | + @Parameter(names = {"--model", "-m"}, order = 2, |
| 36 | + description = "Set model type.") |
| 37 | + String model = "face_detection_yunet_2023mar.onnx"; |
| 38 | + @Parameter(names = {"--backend_target", "-bt"}, order = 3, |
| 39 | + description = "Choose one of the backend-target pair to run this demo:" + |
| 40 | + " 0: OpenCV implementation + CPU," + |
| 41 | + " 1: CUDA + GPU (CUDA), " + |
| 42 | + " 2: CUDA + GPU (CUDA FP16)," + |
| 43 | + " 3: TIM-VX + NPU," + |
| 44 | + " 4: CANN + NPU") |
| 45 | + int backendTarget = 0; |
| 46 | + @Parameter(names = {"--conf_threshold"}, order = 5, |
| 47 | + description = "Set the minimum needed confidence for the model to identify a face. Filter out faces of conf < conf_threshold") |
| 48 | + float confThreshold = 0.9f; |
| 49 | + @Parameter(names = {"--nms_threshold"}, order = 5, |
| 50 | + description = "Set the threshold to suppress overlapped boxes. Suppress boxes if IoU(box1, box2) >= nms_threshold, the one of higher score is kept.") |
| 51 | + float nmsThreshold = 0.3f; |
| 52 | + @Parameter(names = {"--top_k"}, order = 5, |
| 53 | + description = "Keep top_k bounding boxes before NMS. Set a lower value may help speed up postprocessing.") |
| 54 | + int topK = 5000; |
| 55 | + @Parameter(names = {"--save", "-s"}, order = 4, |
| 56 | + description = "Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.") |
| 57 | + boolean save; |
| 58 | + @Parameter(names = {"--vis", "-v"}, order = 5, arity = 1, |
| 59 | + description = "Specify to open a new window to show results. Invalid in case of camera input.") |
| 60 | + boolean vis = true; |
| 61 | + } |
| 62 | + |
| 63 | + |
| 64 | + static class YuNet { |
| 65 | + private final FaceDetectorYN model; |
| 66 | + |
| 67 | + YuNet(String modelPath, Size inputSize, float confThreshold, float nmsThreshold, int topK, |
| 68 | + int backendId, int targetId) { |
| 69 | + model = FaceDetectorYN.create(modelPath, "", inputSize, confThreshold, nmsThreshold, topK, |
| 70 | + backendId, targetId); |
| 71 | + } |
| 72 | + |
| 73 | + void setInputSize(Size inputSize) { |
| 74 | + model.setInputSize(inputSize); |
| 75 | + } |
| 76 | + |
| 77 | + Mat infer(Mat image) { |
| 78 | + final Mat res = new Mat(); |
| 79 | + model.detect(image, res); |
| 80 | + return res; |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + static Mat visualize(Mat image, Mat faces, Scalar textColor, double fps) { |
| 85 | + final Mat output = image.clone(); |
| 86 | + final int h = output.rows(); |
| 87 | + final int w = output.cols(); |
| 88 | + if (fps >= 0) { |
| 89 | + putText(output, String.format("FPS: %.2f", fps), new Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, textColor); |
| 90 | + } |
| 91 | + |
| 92 | + // TODO |
| 93 | + return output; |
| 94 | + } |
| 95 | + |
| 96 | + public static void main(String[] argv) { |
| 97 | + final Args args = new Args(); |
| 98 | + final JCommander jc = JCommander.newBuilder() |
| 99 | + .addObject(args) |
| 100 | + .build(); |
| 101 | + jc.setUsageFormatter(new UnixStyleUsageFormatter(jc)); |
| 102 | + jc.parse(argv); |
| 103 | + if (args.help) { |
| 104 | + jc.usage(); |
| 105 | + return; |
| 106 | + } |
| 107 | + final int backendId = backendTargetPairs[args.backendTarget][0]; |
| 108 | + final int targetId = backendTargetPairs[args.backendTarget][1]; |
| 109 | + final YuNet model = new YuNet(args.model, new Size(320, 320), args.confThreshold, args.nmsThreshold, |
| 110 | + args.topK, backendId, targetId); |
| 111 | + |
| 112 | + if (args.input != null) { |
| 113 | + final Mat image = imread(args.input); |
| 114 | + |
| 115 | + // Inference |
| 116 | + model.setInputSize(image.size()); |
| 117 | + final Mat faces = model.infer(image); |
| 118 | + |
| 119 | + // Print faces |
| 120 | + System.out.printf("%d faces detected:\n", faces.rows()); |
| 121 | + for (int i = 0; i < faces.rows(); i++) { |
| 122 | + // TODO |
| 123 | + } |
| 124 | + |
| 125 | + // Draw reults on the input image |
| 126 | + if (args.save || args.vis) { |
| 127 | + final Mat resImage = visualize(image, faces, new Scalar(0, 0, 255, 0), -1); |
| 128 | + if (args.save) { |
| 129 | + System.out.println("Results are saved to result.jpg"); |
| 130 | + imwrite("result.jpg", resImage); |
| 131 | + } |
| 132 | + if (args.vis) { |
| 133 | + namedWindow(args.input, WINDOW_AUTOSIZE); |
| 134 | + imshow(args.input, resImage); |
| 135 | + waitKey(0); |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + } else { // // Call default camera |
| 140 | + final int deviceId = 0; |
| 141 | + final VideoCapture cap = new VideoCapture(deviceId); |
| 142 | + final int w = (int) cap.get(CAP_PROP_FRAME_WIDTH); |
| 143 | + final int h = (int) cap.get(CAP_PROP_FRAME_HEIGHT); |
| 144 | + model.setInputSize(new Size(w, h)); |
| 145 | + |
| 146 | + final TickMeter tm = new TickMeter(); |
| 147 | + final Mat frame = new Mat(); |
| 148 | + while (waitKey(1) < 0) { |
| 149 | + boolean hasFrame = cap.read(frame); |
| 150 | + if (!hasFrame) { |
| 151 | + System.out.println("No frames grabbed! Exiting ..."); |
| 152 | + break; |
| 153 | + } |
| 154 | + // Inference |
| 155 | + tm.start(); |
| 156 | + final Mat faces = model.infer(frame); |
| 157 | + tm.stop(); |
| 158 | + final Mat resImage = visualize(frame, faces, new Scalar(0, 0, 255, 0), tm.getFPS()); |
| 159 | + imshow("YuNet Demo", resImage); |
| 160 | + |
| 161 | + tm.reset(); |
| 162 | + } |
| 163 | + } |
| 164 | + } |
| 165 | + |
| 166 | + |
| 167 | +} |
0 commit comments