Skip to content

Commit 2a2e544

Browse files
committed
Implements text_detection_ppocr java demo(opencv#251)
1 parent ec5b2c8 commit 2a2e544

File tree

4 files changed

+316
-0
lines changed

4 files changed

+316
-0
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
**/__pycache__/**
44

55
.vscode
6+
.idea
67

78
build/
89
**/build
910
**/build/**
11+
12+
target/
13+
**/target
14+
**/target/**

models/pom.xml

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<project xmlns="http://maven.apache.org/POM/4.0.0"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
6+
<modelVersion>4.0.0</modelVersion>
7+
8+
<groupId>opencv_zoo</groupId>
9+
<artifactId>demo</artifactId>
10+
<version>1.0.0-SNAPSHOT</version>
11+
<name>opencv zoo demo application</name>
12+
<packaging>pom</packaging>
13+
14+
<build>
15+
<sourceDirectory>${project.basedir}</sourceDirectory>
16+
</build>
17+
18+
<modules>
19+
<module>text_detection_ppocr</module>
20+
</modules>
21+
22+
<dependencies>
23+
<dependency>
24+
<groupId>org.bytedeco</groupId>
25+
<artifactId>javacv-platform</artifactId>
26+
<version>1.5.10</version>
27+
<exclusions>
28+
<exclusion>
29+
<groupId>org.bytedeco</groupId>
30+
<artifactId>flycapture-platform</artifactId>
31+
</exclusion>
32+
<exclusion>
33+
<groupId>org.bytedeco</groupId>
34+
<artifactId>libdc1394-platform</artifactId>
35+
</exclusion>
36+
<exclusion>
37+
<groupId>org.bytedeco</groupId>
38+
<artifactId>libfreenect-platform</artifactId>
39+
</exclusion>
40+
<exclusion>
41+
<groupId>org.bytedeco</groupId>
42+
<artifactId>libfreenect2-platform</artifactId>
43+
</exclusion>
44+
<exclusion>
45+
<groupId>org.bytedeco</groupId>
46+
<artifactId>librealsense-platform</artifactId>
47+
</exclusion>
48+
<exclusion>
49+
<groupId>org.bytedeco</groupId>
50+
<artifactId>librealsense2-platform</artifactId>
51+
</exclusion>
52+
<exclusion>
53+
<groupId>org.bytedeco</groupId>
54+
<artifactId>videoinput-platform</artifactId>
55+
</exclusion>
56+
<exclusion>
57+
<groupId>org.bytedeco</groupId>
58+
<artifactId>artoolkitplus-platform</artifactId>
59+
</exclusion>
60+
<exclusion>
61+
<groupId>org.bytedeco</groupId>
62+
<artifactId>leptonica-platform</artifactId>
63+
</exclusion>
64+
<exclusion>
65+
<groupId>org.bytedeco</groupId>
66+
<artifactId>tesseract-platform</artifactId>
67+
</exclusion>
68+
</exclusions>
69+
</dependency>
70+
<dependency>
71+
<groupId>com.beust</groupId>
72+
<artifactId>jcommander</artifactId>
73+
<version>1.82</version>
74+
</dependency>
75+
</dependencies>
76+
</project>

models/text_detection_ppocr/demo.java

+215
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
import com.beust.jcommander.JCommander;
2+
import com.beust.jcommander.Parameter;
3+
import com.beust.jcommander.UnixStyleUsageFormatter;
4+
import org.bytedeco.javacpp.FloatPointer;
5+
import org.bytedeco.javacv.CanvasFrame;
6+
import org.bytedeco.javacv.OpenCVFrameConverter;
7+
import org.bytedeco.opencv.global.opencv_dnn;
8+
import org.bytedeco.opencv.opencv_core.*;
9+
import org.bytedeco.opencv.opencv_dnn.TextDetectionModel_DB;
10+
import org.bytedeco.opencv.opencv_videoio.VideoCapture;
11+
12+
import java.util.AbstractMap;
13+
import java.util.Map;
14+
15+
import static org.bytedeco.opencv.global.opencv_imgcodecs.imwrite;
16+
import static org.bytedeco.opencv.global.opencv_imgproc.*;
17+
18+
public class demo {
19+
20+
// Valid combinations of backends and targets
21+
static int[][] backendTargetPairs = {
22+
{opencv_dnn.DNN_BACKEND_OPENCV, opencv_dnn.DNN_TARGET_CPU},
23+
{opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA},
24+
{opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA_FP16},
25+
{opencv_dnn.DNN_BACKEND_TIMVX, opencv_dnn.DNN_TARGET_NPU},
26+
{opencv_dnn.DNN_BACKEND_CANN, opencv_dnn.DNN_TARGET_NPU}
27+
};
28+
29+
static class Args {
30+
@Parameter(names = {"--help", "-h"}, order = 0, help = true,
31+
description = "Print help message.")
32+
boolean help;
33+
@Parameter(names = {"--model", "-m"}, order = 1,
34+
description = "Set model type.")
35+
String model = "text_detection_en_ppocrv3_2023may.onnx";
36+
@Parameter(names = {"--input", "-i"}, order = 2,
37+
description = "Path to input image or video file. Skip this argument to capture frames from a camera.")
38+
String input;
39+
@Parameter(names = "--width", order = 3,
40+
description = "Resize input image to certain width, It should be multiple by 32.")
41+
int width = 736;
42+
@Parameter(names = "--height", order = 4,
43+
description = "Resize input image to certain height, It should be multiple by 32.")
44+
int height = 736;
45+
@Parameter(names = "--binary_threshold", order = 5,
46+
description = "Threshold of the binary map.")
47+
float binaryThreshold = 0.3f;
48+
@Parameter(names = "--polygon_threshold", order = 6,
49+
description = "Threshold of polygons.")
50+
float polygonThreshold = 0.5f;
51+
@Parameter(names = "--max_candidates", order = 7,
52+
description = "Set maximum number of polygon candidates.")
53+
int maxCandidates = 200;
54+
@Parameter(names = "--unclip_ratio", order = 8,
55+
description = "The unclip ratio of the detected text region, which determines the output size.")
56+
double unclipRatio = 2.0;
57+
@Parameter(names = {"--save", "-s"}, order = 9, arity = 1,
58+
description = "Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.")
59+
boolean save = true;
60+
@Parameter(names = {"--viz", "-v"}, order = 10, arity = 1,
61+
description = "Specify to open a new window to show results. Invalid in case of camera input.")
62+
boolean viz = true;
63+
@Parameter(names = {"--backend", "-bt"}, order = 11,
64+
description = "Choose one of computation backends:" +
65+
" 0: OpenCV implementation + CPU," +
66+
" 1: CUDA + GPU (CUDA), " +
67+
" 2: CUDA + GPU (CUDA FP16)," +
68+
" 3: TIM-VX + NPU," +
69+
" 4: CANN + NPU")
70+
int backend = 0;
71+
}
72+
73+
static class PPOCRDet {
74+
private final TextDetectionModel_DB model;
75+
private final Size inputSize;
76+
77+
public PPOCRDet(String modelPath, Size inputSize,
78+
float binaryThreshold, float polygonThreshold, int maxCandidates, double unclipRatio,
79+
int backendId, int targetId) {
80+
this.inputSize = inputSize;
81+
82+
model = new TextDetectionModel_DB(modelPath);
83+
model.setPreferableBackend(backendId);
84+
model.setPreferableTarget(targetId);
85+
86+
model.setBinaryThreshold(binaryThreshold);
87+
model.setPolygonThreshold(polygonThreshold);
88+
model.setUnclipRatio(unclipRatio);
89+
model.setMaxCandidates(maxCandidates);
90+
91+
model.setInputParams(1.0 / 255.0, inputSize,
92+
new Scalar(122.67891434, 116.66876762, 104.00698793, 0), true, false);
93+
}
94+
95+
public Map.Entry<PointVectorVector, FloatPointer> infer(Mat image) {
96+
assert image.rows() == inputSize.height() : "height of input image != net input size";
97+
assert image.cols() == inputSize.width() : "width of input image != net input size";
98+
final PointVectorVector pt = new PointVectorVector();
99+
final FloatPointer confidences = new FloatPointer();
100+
model.detect(image, pt, confidences);
101+
return new AbstractMap.SimpleEntry<>(pt, confidences);
102+
}
103+
}
104+
105+
static Mat visualize(Mat image, Map.Entry<PointVectorVector, FloatPointer> results, double fps, Scalar boxColor,
106+
Scalar textColor, boolean isClosed, int thickness) {
107+
final Mat output = new Mat();
108+
image.copyTo(output);
109+
if (fps > 0) {
110+
putText(output, String.format("FPS: %.2f", fps), new Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, textColor);
111+
}
112+
final PointVectorVector pvv = results.getKey();
113+
final MatVector matVector = new MatVector();
114+
for (int i = 0; i < pvv.size(); i++) {
115+
final PointVector pv = pvv.get(i);
116+
final Point pts = new Point(pv.size());
117+
for (int j = 0; j < pv.size(); j++) {
118+
pts.position(j).x(pv.get(j).x()).y(pv.get(j).y());
119+
}
120+
matVector.push_back(new Mat(pts.position(0)));
121+
}
122+
polylines(output, matVector, isClosed, boxColor, thickness, LINE_AA, 0);
123+
matVector.close();
124+
return output;
125+
}
126+
127+
/**
128+
* Execute:
129+
* mvn compile exec:java -Dexec.mainClass=demo -q -Dexec.args="--help"
130+
*/
131+
public static void main(String[] argv) {
132+
final Args args = new Args();
133+
final JCommander jc = JCommander.newBuilder()
134+
.addObject(args)
135+
.build();
136+
jc.setUsageFormatter(new UnixStyleUsageFormatter(jc));
137+
jc.parse(argv);
138+
if (args.help) {
139+
jc.usage();
140+
return;
141+
}
142+
final int[] backendTargetPair = backendTargetPairs[args.backend];
143+
assert args.model != null && !args.model.isEmpty() : "Model name is empty";
144+
final Size inpSize = new Size(args.width, args.height);
145+
146+
final PPOCRDet model = new PPOCRDet(args.model, inpSize,
147+
args.binaryThreshold, args.polygonThreshold, args.maxCandidates, args.unclipRatio,
148+
backendTargetPair[0], backendTargetPair[1]);
149+
150+
final VideoCapture cap = new VideoCapture();
151+
if (args.input != null) {
152+
cap.open(args.input);
153+
} else {
154+
cap.open(0);
155+
}
156+
assert cap.isOpened() : "Cannot open video or file";
157+
Mat originalImage = new Mat();
158+
159+
final OpenCVFrameConverter.ToMat converter = new OpenCVFrameConverter.ToMat();
160+
CanvasFrame mainframe = null;
161+
if (args.input == null || args.viz) {
162+
mainframe = new CanvasFrame(args.model + " Demo", CanvasFrame.getDefaultGamma() / 2.2);
163+
mainframe.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
164+
mainframe.setVisible(true);
165+
}
166+
167+
final Scalar boxColor = new Scalar(0, 255, 0, 0);
168+
final Scalar textColor = new Scalar(0, 0, 255, 0);
169+
final TickMeter tm = new TickMeter();
170+
while (cap.read(originalImage)) {
171+
cap.read(originalImage);
172+
173+
final int originalW = originalImage.cols();
174+
final int originalH = originalImage.rows();
175+
final double scaleHeight = originalH / (double) inpSize.height();
176+
final double scaleWidth = originalW / (double) inpSize.width();
177+
final Mat image = new Mat();
178+
resize(originalImage, image, inpSize);
179+
180+
// inference
181+
tm.start();
182+
Map.Entry<PointVectorVector, FloatPointer> results = model.infer(image);
183+
tm.stop();
184+
// Scale the results bounding box
185+
final PointVectorVector pvv = results.getKey();
186+
for (int i = 0; i < pvv.size(); i++) {
187+
final PointVector pts = pvv.get(i);
188+
for (int j = 0; j < pts.size(); j++) {
189+
pts.get(j).x((int) (pts.get(j).x() * scaleWidth));
190+
pts.get(j).y((int) (pts.get(j).y() * scaleHeight));
191+
}
192+
}
193+
194+
originalImage = visualize(originalImage, results, tm.getFPS(), boxColor, textColor, true, 2);
195+
tm.reset();
196+
if (args.input != null) {
197+
if (args.save) {
198+
System.out.println("Result image saved to result.jpg");
199+
imwrite("result.jpg", originalImage);
200+
}
201+
if (args.viz) {
202+
mainframe.showImage(converter.convert(originalImage));
203+
}
204+
} else {
205+
mainframe.showImage(converter.convert(originalImage));
206+
}
207+
208+
// clear
209+
pvv.close();
210+
image.close();
211+
}
212+
tm.close();
213+
}
214+
215+
}

models/text_detection_ppocr/pom.xml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<project xmlns="http://maven.apache.org/POM/4.0.0"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
6+
<modelVersion>4.0.0</modelVersion>
7+
8+
<parent>
9+
<groupId>opencv_zoo</groupId>
10+
<artifactId>demo</artifactId>
11+
<version>1.0.0-SNAPSHOT</version>
12+
</parent>
13+
14+
<artifactId>text_detection_ppocr</artifactId>
15+
16+
<build>
17+
<sourceDirectory>${project.basedir}</sourceDirectory>
18+
</build>
19+
20+
</project>

0 commit comments

Comments
 (0)