Skip to content

Commit 7145b04

Browse files
committed
Use java-string-similarity and JaroWinkler for fuzzy matching
1 parent 42aaf66 commit 7145b04

File tree

2 files changed

+22
-13
lines changed

2 files changed

+22
-13
lines changed

core/trino-main/pom.xml

+12-6
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,18 @@
8484
<artifactId>failsafe</artifactId>
8585
</dependency>
8686

87+
<dependency>
88+
<groupId>info.debatty</groupId>
89+
<artifactId>java-string-similarity</artifactId>
90+
<version>2.0.0</version>
91+
<exclusions>
92+
<exclusion>
93+
<groupId>net.jcip</groupId>
94+
<artifactId>jcip-annotations</artifactId>
95+
</exclusion>
96+
</exclusions>
97+
</dependency>
98+
8799
<dependency>
88100
<groupId>io.airlift</groupId>
89101
<artifactId>aircompressor-v3</artifactId>
@@ -329,12 +341,6 @@
329341
<artifactId>joda-time</artifactId>
330342
</dependency>
331343

332-
<dependency>
333-
<groupId>me.xdrop</groupId>
334-
<artifactId>fuzzywuzzy</artifactId>
335-
<version>1.4.0</version>
336-
</dependency>
337-
338344
<dependency>
339345
<groupId>org.apache.commons</groupId>
340346
<artifactId>commons-math3</artifactId>

core/trino-main/src/main/java/io/trino/execution/SessionPropertyEvaluator.java

+10-7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
package io.trino.execution;
1515

1616
import com.google.inject.Inject;
17+
import info.debatty.java.stringsimilarity.JaroWinkler;
18+
import info.debatty.java.stringsimilarity.interfaces.StringSimilarity;
1719
import io.trino.Session;
1820
import io.trino.metadata.SessionPropertyManager;
1921
import io.trino.security.AccessControl;
@@ -28,7 +30,6 @@
2830
import io.trino.sql.tree.NodeRef;
2931
import io.trino.sql.tree.Parameter;
3032
import io.trino.sql.tree.QualifiedName;
31-
import me.xdrop.fuzzywuzzy.FuzzySearch;
3233

3334
import java.util.List;
3435
import java.util.Map;
@@ -44,11 +45,13 @@
4445
import static io.trino.spi.StandardErrorCode.INVALID_SESSION_PROPERTY;
4546
import static io.trino.sql.analyzer.SemanticExceptions.semanticException;
4647
import static java.lang.String.format;
47-
import static java.util.Comparator.comparingInt;
48+
import static java.util.Comparator.comparingDouble;
4849
import static java.util.Objects.requireNonNull;
4950

5051
public class SessionPropertyEvaluator
5152
{
53+
private static final StringSimilarity SIMILARITY = new JaroWinkler();
54+
5255
private final PlannerContext plannerContext;
5356
private final AccessControl accessControl;
5457
private final SessionPropertyManager sessionPropertyManager;
@@ -120,20 +123,20 @@ public static List<PropertyMetadata<?>> findSimilar(String propertyName, Set<Pro
120123
{
121124
return candidates.stream()
122125
.filter(property -> !property.isHidden())
123-
.map(candidate -> new Match(candidate, FuzzySearch.ratio(candidate.getName(), propertyName)))
124-
.filter(match -> match.ratio() > 75)
125-
.sorted(comparingInt(Match::ratio).reversed())
126+
.map(candidate -> new Match(candidate, SIMILARITY.similarity(candidate.getName(), propertyName)))
127+
.filter(match -> match.ratio() > 0.85)
128+
.sorted(comparingDouble(Match::ratio).reversed())
126129
.limit(count)
127130
.map(Match::metadata)
128131
.collect(toImmutableList());
129132
}
130133

131-
private record Match(PropertyMetadata<?> metadata, int ratio)
134+
private record Match(PropertyMetadata<?> metadata, double ratio)
132135
{
133136
public Match
134137
{
135138
requireNonNull(metadata, "metadata is null");
136-
verify(ratio >= 0 && ratio < 100, "ratio must be in the [0, 100) range");
139+
verify(ratio >= 0.0 && ratio <= 1.0, "ratio must be in the [0, 1.0] range");
137140
}
138141
}
139142

0 commit comments

Comments
 (0)