Skip to content

Commit ef9e478

Browse files
committed
[CI] More title normalization tweaks
1 parent 9a838dc commit ef9e478

File tree

2 files changed

+54
-57
lines changed

2 files changed

+54
-57
lines changed

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"all": "bash -c 'x=0; for c in \"$@\"; do npm run $c || x=$((x+1)); done; ((!x)) || (echo \"ERROR: some scripts failed!\" && exit 1)' -",
4545
"build:preview": "set -x && npm run _build -- --minify",
4646
"build:production": "npm run _hugo -- --minify",
47-
"build": "npm run _build",
47+
"build": "npm run _build --",
4848
"cd:public": "cd public &&",
4949
"check:expired": "find content -name '*.md' | xargs ./scripts/list-expired.pl",
5050
"check:filenames": "test -z \"$(npm run -s _ls-bad-filenames)\" || npm run -s _filename-error",

scripts/content-modules/normalize-titles.pl

+53-56
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/perl -w -i
22
#
33
# DRAFT script used to normalize semconv doc-page tiles and add Hugo front matter
4-
#
54

65
$^W = 1;
76

@@ -30,43 +29,28 @@
3029
my $beforeTitle = '';
3130

3231
sub toTitleCase($) {
33-
my $str = shift;
32+
my ($str) = @_;
3433

35-
my @mixedCaseWords; # mixed-case or ALLCAPS
36-
while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) {
37-
push @mixedCaseWords, $1;
38-
}
34+
# Capitalize non-mixedcase words
35+
$str =~ s/(^|\s)([a-zA-Z])([a-z][a-z0-9]*)\b/$1\u$2$3/g;
3936

40-
$str =~ s/(\w+)/\u\L$1/g;
37+
# Revert / lowercase articles etc.
38+
$str =~ s/\b(A|And|As|By|For|In|On)\b/\L$1/g;
4139

42-
foreach my $word (@mixedCaseWords) {
43-
my $lc_word = lc($word);
44-
$str =~ s/\b$lc_word\b/$word/ig;
45-
}
46-
$str =~ s/\b(A|And|As|By|For|In|On|\.Js)\b/\L$1/g;
4740
return $str;
4841
}
4942

50-
my @specialWords = qw(Core); # for .NET
51-
5243
sub toSentenceCase($) {
53-
my $str = shift;
44+
my ($str) = @_;
5445

55-
my @mixedCaseWords = @specialWords; # mixed-case or ALLCAPS
56-
while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) {
57-
push @mixedCaseWords, $1;
58-
}
46+
# Lowercase non-mixedcase words
47+
$str =~ s/\b([A-Z]?[a-z][a-z0-9]*)\b/\l$1/g;
5948

60-
$str = lc $str;
49+
# Capitalize the first word unless it is mixed case
50+
$str =~ s/^([a-z][a-z0-9]*)\b/\u$1/;
6151

62-
# Replace words with their mixed-case or ALL CAPS versions
63-
foreach my $word (@mixedCaseWords) {
64-
my $lc_word = lc($word);
65-
$str =~ s/\b\Q$lc_word\E\b/$word/g;
66-
}
67-
68-
# Capitalize the first letter of the string
69-
$str =~ s/^(\s*\w)/\u$1/;
52+
# Handle exception
53+
$str =~ s/(ASP.NET) core/$1 Core/;
7054

7155
return $str;
7256
}
@@ -75,7 +59,7 @@ ()
7559
my $frontMatter = '';
7660
if ($frontMatterFromFile) {
7761
# printf STDOUT "> $file has front matter:\n$frontMatterFromFile\n"; # if $gD;
78-
$frontMatterFromFile = '' unless $frontMatterFromFile =~ /aliases|cSpell|cascade/i;
62+
$frontMatterFromFile = '' unless $frontMatterFromFile =~ /auto_gen|aliases/i;
7963
# printf STDOUT "> $file\n" if $ARGV =~ /\/system\b/;
8064
}
8165
$linkTitle = $title;
@@ -110,6 +94,7 @@ ()
11094
$linkTitle = 'HTTP' if $linkTitle =~ /^HTTP Client and Server/i;
11195
$linkTitle = 'SQL' if $title =~ /SQL Databases$/i;
11296
$linkTitle = 'System use cases' if $title =~ /System .*?General Use Cases/i;
97+
$linkTitle = $1 if $title =~ /GenAI (\w+)$/i;
11398

11499
# Missing an `s` in "Semantic Convention"?
115100
if ($title =~ /^Semantic Convention\b/i and $title !~ /Groups$/i) {
@@ -137,53 +122,68 @@ ()
137122
$linkTitle =~ s/Semantic Conventions? Stability //i;
138123
}
139124

140-
if ($linkTitle and $linkTitle ne $title) {
141-
$linkTitle = toSentenceCase($linkTitle) unless $linkTitle =~ /^gRPC/;
142-
if ($frontMatterFromFile =~ /linkTitle: /) {
143-
$frontMatterFromFile =~ s/^(linkTitle: ).*$/$1$linkTitle/m;
144-
} else {
145-
$frontMatter .= "linkTitle: $linkTitle\n"
125+
$frontMatter .= $frontMatterFromFile if $frontMatterFromFile;
126+
127+
if ($linkTitle && $linkTitle ne $title) {
128+
$linkTitle = toSentenceCase($linkTitle); # unless $linkTitle =~ /^gRPC/;
129+
if ($frontMatter !~ /linkTitle: /) {
130+
$frontMatter .= "linkTitle: $linkTitle\n";
131+
} elsif ($frontMatter !~ /^auto_gen:/m) {
132+
$frontMatter =~ s/^(linkTitle: ).*$/$1$linkTitle/m;
146133
}
147134
}
148135

149-
$frontMatter .= $frontMatterFromFile if $frontMatterFromFile;
150-
151136
if ($ARGV =~ /docs\/(.*?)(README|_index).md$/) {
152137
$frontMatter .= "path_base_for_github_subdir:\n";
153138
$frontMatter .= " from: tmp/semconv/docs/$1_index.md\n";
154139
$frontMatter .= " to: $1README.md\n";
155140
}
156-
$frontMatter .= "weight: -1\n" if $title eq 'General Semantic Conventions';
157141

158142
return $frontMatter;
159143
}
160144

161145
sub printTitleAndFrontMatter() {
162-
my $frontMatter;
163-
164-
165-
# if ($ARGV =~ /docs\/(README|_index)/) {
166-
# print STDOUT "> $ARGV\n > frontMatterFromFile: $frontMatterFromFile\n";
167-
# print STDOUT " > title: $title\n";
168-
# print STDOUT " > linkTitle: $linkTitle\n";
169-
# }
170-
171-
if ($frontMatterFromFile && $frontMatterFromFile =~ /auto_gen:\s*false/) {
172-
$frontMatter = $frontMatterFromFile;
173-
} else {
174-
$frontMatter = computeTitleAndFrontMatter();
175-
}
146+
my $frontMatter = computeTitleAndFrontMatter();
176147

177148
if ($frontMatter) {
178149
$frontMatter = "<!--- Hugo front matter used to generate the website version of this page:\n" . $frontMatter;
179150
$frontMatter .= "--->\n";
180151
print "$frontMatter\n";
181152
}
153+
182154
print $beforeTitle if $beforeTitle;
183155
$title = toTitleCase($title);
184156
print "# $title\n"
185157
}
186158

159+
sub gatherFrontMatter() {
160+
my $autoGenValues = 'false|below';
161+
my $autoGenDirective = '';
162+
my $autoGenSkip = 0;
163+
164+
while(<>) {
165+
last if /^--->/;
166+
next if $autoGenSkip;
167+
168+
my ($keyWord, $autoGenDirective) = /^(auto.?gen): ([^\#]+)/;
169+
if ($keyWord) {
170+
# print STDOUT ">> $ARGV:\n$frontMatterFromFile";
171+
if ($keyWord ne 'auto_gen') {
172+
warn "$ARGV: WARN: misspelled keyword, should be 'auto_gen' not '$keyWord'\n";
173+
} elsif (!$autoGenDirective or $autoGenDirective !~ /^($autoGenValues)/) {
174+
warn "$ARGV: WARN: missing or unrecognized 'auto_gen' value, should match '$autoGenValues', not $autoGenDirective\n";
175+
} elsif ($autoGenDirective =~ /^below/) {
176+
$autoGenSkip = 1;
177+
# print STDOUT ">>>> skipping\n";
178+
} else {
179+
# print STDOUT ">> wa?\n";
180+
}
181+
}
182+
183+
$frontMatterFromFile .= $_;
184+
}
185+
}
186+
187187
# main
188188

189189
my $titleRegexStr = '^#\s+(.*)';
@@ -200,10 +200,7 @@ ()
200200
$beforeTitle = '';
201201
$linkTitle = '';
202202
if (/^<!--- Hugo/) {
203-
while(<>) {
204-
last if /^--->/;
205-
$frontMatterFromFile .= $_;
206-
}
203+
gatherFrontMatter();
207204
next;
208205
}
209206
}

0 commit comments

Comments
 (0)