Skip to content

Commit 0f69e83

Browse files
committed
Implements object_tracking_vittrack java demo(opencv#251)
1 parent 5962eb4 commit 0f69e83

File tree

5 files changed

+265
-17
lines changed

5 files changed

+265
-17
lines changed

models/object_tracking_vittrack/README.md

+15
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,21 @@ cmake --build build
4040
./build/opencv_zoo_object_tracking_vittrack -h
4141
```
4242

43+
## Java
44+
45+
Install Maven to get started.
46+
47+
```shell
48+
# tracking on camera input
49+
mvn compile exec:java -q
50+
51+
# tracking on video
52+
mvn compile exec:java -q -Dexec.args="-i /path/to/video"
53+
54+
# get help messages
55+
mvn compile exec:java -q -Dexec.args="-h"
56+
```
57+
4358
# Example outputs
4459

4560
<img src="example_outputs/vittrack_demo.gif" style="zoom:200%;" />
+206
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
import com.beust.jcommander.JCommander;
2+
import com.beust.jcommander.Parameter;
3+
import com.beust.jcommander.UnixStyleUsageFormatter;
4+
import org.bytedeco.javacpp.BytePointer;
5+
import org.bytedeco.opencv.global.opencv_dnn;
6+
import org.bytedeco.opencv.opencv_core.*;
7+
import org.bytedeco.opencv.opencv_video.TrackerVit;
8+
import org.bytedeco.opencv.opencv_videoio.VideoCapture;
9+
import org.bytedeco.opencv.opencv_videoio.VideoWriter;
10+
11+
import static org.bytedeco.opencv.global.opencv_highgui.*;
12+
import static org.bytedeco.opencv.global.opencv_imgproc.*;
13+
import static org.bytedeco.opencv.global.opencv_videoio.CAP_PROP_FPS;
14+
15+
public class demo {
16+
17+
// Valid combinations of backends and targets
18+
static int[][] backendTargetPairs = {
19+
{opencv_dnn.DNN_BACKEND_OPENCV, opencv_dnn.DNN_TARGET_CPU},
20+
{opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA},
21+
{opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA_FP16},
22+
{opencv_dnn.DNN_BACKEND_TIMVX, opencv_dnn.DNN_TARGET_NPU},
23+
{opencv_dnn.DNN_BACKEND_CANN, opencv_dnn.DNN_TARGET_NPU}
24+
};
25+
26+
static class Args {
27+
@Parameter(names = {"--help", "-h"}, order = 0, help = true,
28+
description = "Print help message.")
29+
boolean help;
30+
@Parameter(names = {"--input", "-i"}, order = 1,
31+
description = "Set path to the input video. Omit for using default camera.")
32+
String input;
33+
@Parameter(names = {"--model_path", "-m"}, order = 2,
34+
description = "Set model path.")
35+
String modelPath = "object_tracking_vittrack_2023sep.onnx";
36+
@Parameter(names = {"--backend_target", "-bt"}, order = 3,
37+
description = "Choose one of the backend-target pair to run this demo:" +
38+
" 0: OpenCV implementation + CPU," +
39+
" 1: CUDA + GPU (CUDA), " +
40+
" 2: CUDA + GPU (CUDA FP16)," +
41+
" 3: TIM-VX + NPU," +
42+
" 4: CANN + NPU")
43+
int backendTarget = 0;
44+
@Parameter(names = {"--save", "-s"}, order = 4,
45+
description = "Specify to save a file with results.")
46+
boolean save;
47+
@Parameter(names = {"--vis", "-v"}, order = 5, arity = 1,
48+
description = "Specify to open a new window to show results.")
49+
boolean vis = true;
50+
}
51+
52+
static class TrackingResult {
53+
boolean isLocated;
54+
Rect bbox;
55+
float score;
56+
}
57+
58+
static class VitTrack {
59+
private final TrackerVit model;
60+
61+
VitTrack(String modelPath, int backendId, int targetId) {
62+
final TrackerVit.Params params = new TrackerVit.Params();
63+
params.net(new BytePointer(modelPath))
64+
.backend(backendId)
65+
.target(targetId);
66+
model = TrackerVit.create(params);
67+
}
68+
69+
void init(Mat image, Rect roi) {
70+
model.init(image, roi);
71+
}
72+
73+
TrackingResult infer(Mat image) {
74+
final TrackingResult result = new TrackingResult();
75+
result.bbox = new Rect();
76+
result.isLocated = model.update(image, result.bbox);
77+
result.score = model.getTrackingScore();
78+
return result;
79+
}
80+
}
81+
82+
static Mat visualize(Mat image, Rect bbox, float score, boolean isLocated, double fps, Scalar boxColor,
83+
Scalar textColor, double fontScale, int fontSize) {
84+
final Mat output = image.clone();
85+
final int h = output.rows();
86+
final int w = output.cols();
87+
if (fps >= 0) {
88+
putText(output, String.format("FPS: %.2f", fps), new Point(0, 30), FONT_HERSHEY_DUPLEX, fontScale,
89+
textColor);
90+
}
91+
92+
if (isLocated && score >= 0.3) {
93+
rectangle(output, bbox, boxColor, 2, LINE_8, 0);
94+
putText(output, String.format("%.2f", score), new Point(bbox.x(), bbox.y() + 25),
95+
FONT_HERSHEY_DUPLEX, fontScale, textColor, fontSize, LINE_8, false);
96+
} else {
97+
final Size textSize = getTextSize("Target lost!", FONT_HERSHEY_DUPLEX, fontScale, fontSize, new int[]{0});
98+
final int textX = (w - textSize.width()) / 2;
99+
final int textY = (h - textSize.height()) / 2;
100+
putText(output, "Target lost!", new Point(textX, textY), FONT_HERSHEY_DUPLEX,
101+
fontScale, new Scalar(0, 0, 255, 0), fontSize, LINE_8, false);
102+
}
103+
104+
return output;
105+
}
106+
107+
/**
108+
* Execute: mvn compile exec:java -q -Dexec.args=""
109+
*/
110+
public static void main(String[] argv) {
111+
final Args args = new Args();
112+
final JCommander jc = JCommander.newBuilder()
113+
.addObject(args)
114+
.build();
115+
jc.setUsageFormatter(new UnixStyleUsageFormatter(jc));
116+
jc.parse(argv);
117+
if (args.help) {
118+
jc.usage();
119+
return;
120+
}
121+
final int backendId = backendTargetPairs[args.backendTarget][0];
122+
final int targetId = backendTargetPairs[args.backendTarget][1];
123+
VitTrack tracker = new VitTrack(args.modelPath, backendId, targetId);
124+
125+
final VideoCapture video = new VideoCapture();
126+
if (args.input == null) {
127+
video.open(0);
128+
} else {
129+
video.open(args.input);
130+
}
131+
if (!video.isOpened()) {
132+
System.err.println("Error: Could not open video source");
133+
return;
134+
}
135+
136+
Mat firstFrame = new Mat();
137+
video.read(firstFrame);
138+
139+
if (firstFrame.empty()) {
140+
System.err.println("No frames grabbed!");
141+
return;
142+
}
143+
144+
Mat firstFrameCopy = firstFrame.clone();
145+
putText(firstFrameCopy, "1. Drag a bounding box to track.", new Point(0, 25), FONT_HERSHEY_SIMPLEX, 1, new Scalar(0, 255, 0, 0));
146+
putText(firstFrameCopy, "2. Press ENTER to confirm", new Point(0, 50), FONT_HERSHEY_SIMPLEX, 1, new Scalar(0, 255, 0, 0));
147+
final Rect roi = selectROI("VitTrack Demo", firstFrameCopy);
148+
149+
if (roi.area() == 0) {
150+
System.err.println("No ROI is selected! Exiting...");
151+
return;
152+
} else {
153+
System.out.printf("Selected ROI: (x: %d, y: %d, width: %d, height: %d)%n", roi.x(), roi.y(), roi.width(),
154+
roi.height());
155+
}
156+
157+
// Create VideoWriter if save option is specified
158+
final VideoWriter outputVideo = new VideoWriter();
159+
if (args.save) {
160+
final Size frameSize = firstFrame.size();
161+
outputVideo.open("output.mp4", VideoWriter.fourcc((byte) 'm', (byte) 'p', (byte) '4', (byte) 'v'),
162+
video.get(CAP_PROP_FPS), frameSize);
163+
if (!outputVideo.isOpened()) {
164+
System.err.println("Error: Could not create output video stream");
165+
return;
166+
}
167+
}
168+
169+
// Initialize tracker with ROI
170+
tracker.init(firstFrame, roi);
171+
172+
// Track frame by frame
173+
final TickMeter tm = new TickMeter();
174+
while (waitKey(1) < 0) {
175+
video.read(firstFrame);
176+
if (firstFrame.empty()) {
177+
System.out.println("End of video");
178+
break;
179+
}
180+
181+
// Inference
182+
tm.start();
183+
final TrackingResult result = tracker.infer(firstFrame);
184+
tm.stop();
185+
186+
// Visualize
187+
Mat frame = firstFrame.clone();
188+
frame = visualize(frame, result.bbox, result.score, result.isLocated, tm.getFPS(),
189+
new Scalar(0, 255, 0, 0), new Scalar(0, 255, 0, 0), 1.0, 1);
190+
191+
if (args.save) {
192+
outputVideo.write(frame);
193+
}
194+
if (args.vis) {
195+
imshow("VitTrack Demo", frame);
196+
}
197+
tm.reset();
198+
}
199+
if (args.save) {
200+
outputVideo.release();
201+
}
202+
203+
video.release();
204+
}
205+
206+
}
+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<project xmlns="http://maven.apache.org/POM/4.0.0"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
6+
<modelVersion>4.0.0</modelVersion>
7+
8+
<parent>
9+
<groupId>opencv_zoo</groupId>
10+
<artifactId>demo</artifactId>
11+
<version>1.0.0-SNAPSHOT</version>
12+
</parent>
13+
14+
<artifactId>object_tracking_vittrack</artifactId>
15+
16+
<build>
17+
<sourceDirectory>${project.basedir}</sourceDirectory>
18+
<plugins>
19+
<plugin>
20+
<groupId>org.codehaus.mojo</groupId>
21+
<artifactId>exec-maven-plugin</artifactId>
22+
<version>3.3.0</version>
23+
<configuration>
24+
<executable>java</executable>
25+
<mainClass>demo</mainClass>
26+
</configuration>
27+
</plugin>
28+
</plugins>
29+
</build>
30+
31+
</project>

models/pom.xml

+13-2
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,29 @@
88
<groupId>opencv_zoo</groupId>
99
<artifactId>demo</artifactId>
1010
<version>1.0.0-SNAPSHOT</version>
11-
<name>opencv zoo demo application</name>
11+
<name>OpenCV Zoo demo application</name>
1212
<packaging>pom</packaging>
1313

1414
<build>
1515
<sourceDirectory>${project.basedir}</sourceDirectory>
16+
<plugins>
17+
<plugin>
18+
<groupId>org.codehaus.mojo</groupId>
19+
<artifactId>exec-maven-plugin</artifactId>
20+
<version>3.3.0</version>
21+
<configuration>
22+
<executable>java</executable>
23+
<mainClass>demo</mainClass>
24+
</configuration>
25+
</plugin>
26+
</plugins>
1627
</build>
1728

1829
<modules>
30+
<module>object_tracking_vittrack</module>
1931
<module>text_detection_ppocr</module>
2032
</modules>
2133

22-
2334
<dependencies>
2435
<dependency>
2536
<groupId>org.bytedeco</groupId>

models/text_detection_ppocr/pom.xml

-15
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,4 @@
1313

1414
<artifactId>text_detection_ppocr</artifactId>
1515

16-
<build>
17-
<sourceDirectory>${project.basedir}</sourceDirectory>
18-
<plugins>
19-
<plugin>
20-
<groupId>org.codehaus.mojo</groupId>
21-
<artifactId>exec-maven-plugin</artifactId>
22-
<version>3.3.0</version>
23-
<configuration>
24-
<executable>java</executable>
25-
<mainClass>demo</mainClass>
26-
</configuration>
27-
</plugin>
28-
</plugins>
29-
</build>
30-
3116
</project>

0 commit comments

Comments
 (0)