Skip to content

Commit 61ee8af

Browse files
authored
MBS-11854: Recognize unicode hyphen in guess case (metabrainz#2199)
Words with a unicode hyphen were not being recognized as split words: the hyphen was just being considered one more character. This changes that, and then ensures that guess case recognizes the hyphen as such and does the same as with a hyphen-minus. Sadly, part of my fix for MBS-10156 (for re‐mode with unicode hyphen) will no longer work now since it is no longer considered one single word, but I think this makes a lot more sense for now. The current code already didn't do anything with re-mode with a hyphen-minus, so this actually adds consistency.
1 parent 6e3e634 commit 61ee8af

File tree

4 files changed

+14
-7
lines changed

4 files changed

+14
-7
lines changed

root/static/scripts/guess-case/MB/GuessCase/Handler/Base.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ MB.GuessCase.Handler.Base = function (gc) {
117117
*/
118118
var handled = false;
119119
if (!gc.regexes.SPECIALCASES) {
120-
gc.regexes.SPECIALCASES = /(&|¿|¡|\?|\!|;|:|'|||||"|||||«|»|\-|\+|,|\*|\.|#|%|\/|\(|\)|\{|\}|\[|\])/;
120+
gc.regexes.SPECIALCASES = /(&|¿|¡|\?|\!|;|:|'|||||"|||||«|»|\-||\+|,|\*|\.|#|%|\/|\(|\)|\{|\}|\[|\])/;
121121
}
122122
if (input.matchCurrentWord(gc.regexes.SPECIALCASES)) {
123123
handled = !!(
@@ -332,7 +332,7 @@ MB.GuessCase.Handler.Base = function (gc) {
332332
*/
333333
self.doHyphen = function () {
334334
if (!gc.regexes.HYPHEN) {
335-
gc.regexes.HYPHEN = '-';
335+
gc.regexes.HYPHEN = /^[\-]$/;
336336
}
337337
if (input.matchCurrentWord(gc.regexes.HYPHEN)) {
338338
output.appendWordPreserveWhiteSpace(true);

root/static/scripts/guess-case/MB/GuessCase/Input.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ class GuessCaseInput {
182182
const splitwords = [];
183183
let word = [];
184184
if (!gc.regexes.SPLITWORDSANDPUNCTUATION) {
185-
gc.regexes.SPLITWORDSANDPUNCTUATION = /[^!¿¡\"%&'´`«»()\[\]\{\}\*\+,-\.\/:;<=>\?\s#]/;
185+
gc.regexes.SPLITWORDSANDPUNCTUATION = /[^!¿¡\"%&'´`«»()\[\]\{\}\*\+\-,\.\/:;<=>\?\s#]/;
186186
}
187187
for (let i = 0; i < chars.length; i++) {
188188
if (chars[i].match(gc.regexes.SPLITWORDSANDPUNCTUATION)) {

root/static/scripts/guess-case/utils.js

-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ const preBracketSingleWordsList = [
6161
'rehearsal',
6262
'remixed',
6363
'remode',
64-
're‐mode',
6564
'rework',
6665
'reworked',
6766
'session',

root/static/scripts/tests/GuessCase.js

+11-3
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ test('Recording', function (t) {
175175
});
176176

177177
test('Work', function (t) {
178-
t.plan(23);
178+
t.plan(24);
179179

180180
const tests = [
181181
{
@@ -344,6 +344,14 @@ test('Work', function (t) {
344344
roman: false,
345345
keepuppercase: false,
346346
},
347+
{
348+
input: 'hyphen-minus? hyphen‐maximus!',
349+
expected: 'Hyphen-Minus? Hyphen‐Maximus!',
350+
bug: 'MBS-11854',
351+
mode: 'English',
352+
roman: false,
353+
keepuppercase: false,
354+
},
347355
];
348356

349357
for (const test of tests) {
@@ -457,8 +465,8 @@ test('BugFixes', function (t) {
457465
mode: 'French',
458466
},
459467
{
460-
input: 'We Love Techno (Re‐Mode)',
461-
expected: 'We Love Techno (re‐mode)',
468+
input: 'We Love Techno (Remode)',
469+
expected: 'We Love Techno (remode)',
462470
bug: 'MBS-10156',
463471
mode: 'English',
464472
},

0 commit comments

Comments
 (0)