Skip to content

Commit 3cdcc99

Browse files
committed
[CI] Script updates in support of semconv spec processing
1 parent 1dca592 commit 3cdcc99

File tree

2 files changed

+134
-93
lines changed

2 files changed

+134
-93
lines changed

scripts/content-modules/adjust-pages.pl

+5-5
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ ($$$)
8282

8383
return 0 if $patchMsgCount{$key};
8484

85-
if (($vers = $versions{$specName}) ne $targetVers) {
86-
print STDOUT "INFO: remove obsolete patch '$patchID' now that spec '$specName' is at v$vers, not v$targetVers - $0\n";
87-
} elsif (($vers = $versFromSubmod{$specName}) ne $targetVers) {
88-
print STDOUT "INFO [$patchID]: skipping patch '$patchID' since spec '$specName' submodule is at v$vers not v$targetVers - $0\n";
85+
if (($vers = $versions{$specName}) gt $targetVers) {
86+
print STDOUT "INFO: remove obsolete patch '$patchID' now that spec '$specName' is at v$vers > v$targetVers - $0\n";
87+
} elsif (($vers = $versFromSubmod{$specName}) gt $targetVers) {
88+
print STDOUT "INFO [$patchID]: skipping patch '$patchID' since spec '$specName' submodule is at v$vers > v$targetVers - $0\n";
8989
} else {
9090
return 'Apply the patch';
9191
}
@@ -103,7 +103,7 @@ ()
103103

104104
sub patchSemConv1_30_0() {
105105
return unless $ARGV =~ /^tmp\/semconv\/docs\//
106-
&& applyPatchOrPrintMsgIf('2025-01-24-emit-an-event', 'semconv', '1.30.0');
106+
&& applyPatchOrPrintMsgIf('2025-01-24-emit-an-event', 'semconv', '1.30.0-3-g');
107107

108108
s|Emit Event API|Log API|;
109109
s|(docs/specs/otel/logs/api.md#emit-a)n-event|$1-logrecord|;

scripts/content-modules/normalize-titles.pl

+129-88
Original file line numberDiff line numberDiff line change
@@ -20,161 +20,202 @@
2020
my $semconvSpecRepoUrl = 'https://github.com/open-telemetry/semantic-conventions';
2121
my $semConvRef = "$otelSpecRepoUrl/blob/main/semantic_conventions/README.md";
2222
my $specBasePath = '/docs/specs';
23-
my $path_base_for_github_subdir = "content/en$specBasePath";
2423
my %versions = qw(
2524
spec: 1.22.0
2625
otlp: 1.0.0
2726
);
2827
my $otelSpecVers = $versions{'spec:'};
2928
my $otlpSpecVers = $versions{'otlp:'};
29+
my $seenFirstNonBlankLineBeforeTitle;
30+
my $beforeTitle = '';
3031

31-
# TODO: remove once OpAMP spec has been updated
32-
my $opampFrontMatter = << "EOS";
33-
title: Open Agent Management Protocol
34-
linkTitle: OpAMP
35-
body_class: otel-docs-spec
36-
github_repo: &repo $opAmpSpecRepoUrl
37-
github_project_repo: *repo
38-
path_base_for_github_subdir:
39-
from: content/en/docs/specs/opamp/index.md
40-
to: specification.md
41-
EOS
42-
43-
# TODO: remove once Semconv spec has been updated
44-
my $semconvFrontMatter = << "EOS";
45-
linkTitle: Semantic Conventions
46-
# no_list: true
47-
cascade:
48-
body_class: otel-docs-spec
49-
github_repo: &repo $semconvSpecRepoUrl
50-
github_subdir: docs
51-
path_base_for_github_subdir: content/en/docs/specs/semconv/
52-
github_project_repo: *repo
53-
EOS
54-
55-
# Adjust semconv title capitalization
5632
sub toTitleCase($) {
5733
my $str = shift;
58-
my @specialCaseWords = qw(
59-
CloudEvents
60-
CouchDB
61-
DynamoDB
62-
FaaS
63-
GraphQL
64-
gRPC
65-
HBase
66-
MongoDB
67-
OpenTelemetry
68-
RabbitMQ
69-
RocketMQ
70-
);
71-
my %specialCases = map { lc($_) => $_ } @specialCaseWords;
72-
while ($str =~ /(\b[A-Z]+\b)/g) {
73-
$specialCases{lc $1} = $1;
34+
35+
my @mixedCaseWords; # mixed-case or ALLCAPS
36+
while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) {
37+
push @mixedCaseWords, $1;
7438
}
39+
7540
$str =~ s/(\w+)/\u\L$1/g;
76-
while (my ($key, $value) = each %specialCases) {
77-
$str =~ s/\b\u\L$key\b/$value/g;
41+
42+
foreach my $word (@mixedCaseWords) {
43+
my $lc_word = lc($word);
44+
$str =~ s/\b$lc_word\b/$word/ig;
7845
}
79-
$str =~ s/\b(A|And|As|For|In|On)\b/\L$1/g;
46+
$str =~ s/\b(A|And|As|By|For|In|On|\.Js)\b/\L$1/g;
8047
return $str;
8148
}
8249

83-
sub printTitleAndFrontMatter() {
50+
my @specialWords = qw(Core); # for .NET
51+
52+
sub toSentenceCase($) {
53+
my $str = shift;
54+
55+
my @mixedCaseWords = @specialWords; # mixed-case or ALLCAPS
56+
while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) {
57+
push @mixedCaseWords, $1;
58+
}
59+
60+
$str = lc $str;
61+
62+
# Replace words with their mixed-case or ALL CAPS versions
63+
foreach my $word (@mixedCaseWords) {
64+
my $lc_word = lc($word);
65+
$str =~ s/\b\Q$lc_word\E\b/$word/g;
66+
}
67+
68+
# Capitalize the first letter of the string
69+
$str =~ s/^(\s*\w)/\u$1/;
70+
71+
return $str;
72+
}
73+
74+
sub computeTitleAndFrontMatter() {
8475
my $frontMatter = '';
85-
my $originalTitle = $title;
8676
if ($frontMatterFromFile) {
8777
# printf STDOUT "> $file has front matter:\n$frontMatterFromFile\n"; # if $gD;
88-
$frontMatterFromFile = '' unless $ARGV =~ /\/system\/[^R]/;
78+
$frontMatterFromFile = '' unless $frontMatterFromFile =~ /aliases|cSpell|cascade/i;
8979
# printf STDOUT "> $file\n" if $ARGV =~ /\/system\b/;
9080
}
91-
if ($title eq 'OpenTelemetry Semantic Conventions') {
92-
$frontMatterFromFile = $semconvFrontMatter unless $frontMatterFromFile;
93-
} elsif ($ARGV =~ /json-rpc/) {
94-
$title = 'Semantic Conventions for JSON-RPC';
95-
}
96-
$title = toTitleCase($title);
97-
my $titleMaybeQuoted = ($title =~ ':') ? "\"$title\"" : $title;
98-
# $frontMatter .= "title: $titleMaybeQuoted\n" if $frontMatterFromFile !~ /title: /;
81+
$linkTitle = $title;
82+
9983
if ($title =~ /^OpenTelemetry (Protocol )?(.*)/) {
10084
$linkTitle = $2;
10185
} elsif ($title =~ /^(.*?) Semantic Conventions?$/i) {
10286
$linkTitle = toTitleCase($1);
103-
} elsif ($title =~ /^Semantic Conventions? for (.*)$/i) {
87+
} elsif ($title =~ /^.*? for (.*)$/i) {
10488
$linkTitle = toTitleCase($1);
10589
}
10690
if ($linkTitle =~ /^Function.as.a.Service$/i) {
10791
$linkTitle = 'FaaS';
10892
}
10993
$linkTitle = 'Database' if $title =~ /Database Calls and Systems$/i;
110-
if ($linkTitle =~ /^Database (.*)$/i) {
111-
$linkTitle = "$1";
112-
} elsif ($linkTitle =~ /^FaaS (.*)$/i) {
113-
$linkTitle = "$1";
114-
} elsif ($linkTitle =~ /^HTTP (.*)$/i) {
115-
$linkTitle = "$1";
116-
} elsif ($linkTitle =~ /^Microsoft (.*)$/i) {
117-
$linkTitle = "$1";
94+
if ($linkTitle =~ /^(?:FaaS|HTTP) (.*)$/i && $ARGV !~ /dotnet|migration/) {
95+
$linkTitle = $1;
96+
} elsif ($linkTitle =~ /^Microsoft (?:Azure)? (.*)$/i) {
97+
$linkTitle = $1;
11898
} elsif ($linkTitle =~ /^RPC (.*)$/i) {
119-
$linkTitle = "$1";
99+
$linkTitle = $1;
120100
} elsif ($linkTitle =~ /^(Exceptions|Feature Flags) .. (.*)$/i) {
121-
$linkTitle = "$2";
101+
$linkTitle = $2;
122102
}
123103
if ($linkTitle =~ /^(.*) Attributes$/i && $title ne 'General Attributes') {
124-
$linkTitle = "$1";
104+
$linkTitle = $1;
125105
}
106+
126107
$linkTitle = 'Attributes' if $title eq 'General Attributes';
127-
$linkTitle = 'Events' if $linkTitle eq 'Event';
128-
$linkTitle = 'Logs' if $title =~ /Logs Attributes$/;
129-
$linkTitle = 'Connect' if $title =~ /Connect RPC$/;
130-
$linkTitle = 'SQL' if $title =~ /SQL Databases$/;
131-
$title = 'Semantic Conventions for Function-as-a-Service' if $title eq 'Semantic Conventions for FaaS';
132-
$linkTitle = 'Tracing Compatibility' if $linkTitle eq 'Tracing Compatibility Components';
133-
if ($title =~ /Semantic Convention\b/) {
134-
$title =~ s/Semantic Convention\b/$&s/g;
135-
printf STDOUT "> $title -> $linkTitle\n";
108+
$linkTitle = 'Events' if $linkTitle =~ /Mobile Events/;
109+
$linkTitle = 'Connect' if $title =~ /Connect RPC$/i;
110+
$linkTitle = 'HTTP' if $linkTitle =~ /^HTTP Client and Server/i;
111+
$linkTitle = 'SQL' if $title =~ /SQL Databases$/i;
112+
$linkTitle = 'System use cases' if $title =~ /System .*?General Use Cases/i;
113+
114+
# Missing an `s` in "Semantic Convention"?
115+
if ($title =~ /^Semantic Convention\b/i and $title !~ /Groups$/i) {
116+
$title =~ s/Semantic Convention\b/$&s/ig;
117+
printf STDOUT "> $title -> $linkTitle - added 's' to 'Conventions'\n";
118+
}
119+
$linkTitle =~ s/^Database Client //;
120+
if ($ARGV =~ /docs\/azure/) {
121+
$linkTitle =~ s/ Resource Logs?//i;
122+
$linkTitle =~ s/Azure //i;
123+
} elsif ($ARGV =~ /docs\/messaging\/[^R]/) {
124+
$linkTitle =~ s/( messaging|messaging )//i;
125+
}
126+
127+
$linkTitle =~ s/^General //i; # if $ARGV =~ /docs\/general/
128+
$linkTitle =~ s/( (runtime|(web )?server))? metrics( emitted by .*)?$//i
129+
unless $ARGV =~ /gen-ai-metrics/;
130+
$linkTitle =~ s/ (components|guide|queries|supplementary information|systems|platform)$//i;
131+
$linkTitle =~ s/ \(command line interface\)//i;
132+
133+
$linkTitle = '.NET' if $linkTitle =~ /.net common language runtime/i;
134+
$linkTitle = 'CLI' if $linkTitle =~ /\(command line interface\) programs/i;
135+
136+
if ($ARGV =~ /non-normative/) {
137+
$linkTitle =~ s/Semantic Conventions? Stability //i;
138+
}
139+
140+
if ($linkTitle and $linkTitle ne $title) {
141+
$linkTitle = toSentenceCase($linkTitle) unless $linkTitle =~ /^gRPC/;
142+
if ($frontMatterFromFile =~ /linkTitle: /) {
143+
$frontMatterFromFile =~ s/^(linkTitle: ).*$/$1$linkTitle/m;
144+
} else {
145+
$frontMatter .= "linkTitle: $linkTitle\n"
146+
}
136147
}
137148

138-
$frontMatter .= "linkTitle: $linkTitle\n" if $linkTitle and $frontMatterFromFile !~ /linkTitle: /;
139149
$frontMatter .= $frontMatterFromFile if $frontMatterFromFile;
150+
140151
if ($ARGV =~ /docs\/(.*?)(README|_index).md$/) {
141152
$frontMatter .= "path_base_for_github_subdir:\n";
142-
$frontMatter .= " from: $path_base_for_github_subdir/semconv/$1_index.md\n";
153+
$frontMatter .= " from: tmp/semconv/docs/$1_index.md\n";
143154
$frontMatter .= " to: $1README.md\n";
144155
}
145156
$frontMatter .= "weight: -1\n" if $title eq 'General Semantic Conventions';
157+
158+
return $frontMatter;
159+
}
160+
161+
sub printTitleAndFrontMatter() {
162+
my $frontMatter;
163+
164+
165+
# if ($ARGV =~ /docs\/(README|_index)/) {
166+
# print STDOUT "> $ARGV\n > frontMatterFromFile: $frontMatterFromFile\n";
167+
# print STDOUT " > title: $title\n";
168+
# print STDOUT " > linkTitle: $linkTitle\n";
169+
# }
170+
171+
if ($frontMatterFromFile && $frontMatterFromFile =~ /auto_gen:\s*false/) {
172+
$frontMatter = $frontMatterFromFile;
173+
} else {
174+
$frontMatter = computeTitleAndFrontMatter();
175+
}
176+
146177
if ($frontMatter) {
147178
$frontMatter = "<!--- Hugo front matter used to generate the website version of this page:\n" . $frontMatter;
148179
$frontMatter .= "--->\n";
149180
print "$frontMatter\n";
150181
}
182+
print $beforeTitle if $beforeTitle;
183+
$title = toTitleCase($title);
151184
print "# $title\n"
152185
}
153186

154187
# main
155188

189+
my $titleRegexStr = '^#\s+(.*)';
190+
156191
while(<>) {
157192
# printf STDOUT "$ARGV Got: $_" if $gD;
158193

159194
if ($file ne $ARGV) {
160195
$file = $ARGV;
161196
# printf STDOUT "> $file\n"; # if $gD;
197+
$seenFirstNonBlankLineBeforeTitle = 0;
162198
$frontMatterFromFile = '';
163199
$title = '';
164-
if (/^<!---? Hugo/) {
200+
$beforeTitle = '';
201+
$linkTitle = '';
202+
if (/^<!--- Hugo/) {
165203
while(<>) {
166-
last if /^-?-->/;
204+
last if /^--->/;
167205
$frontMatterFromFile .= $_;
168206
}
169207
next;
170208
}
171209
}
172-
if(! $title) {
173-
($title) = /^#\s+(.*)/;
174-
$linkTitle = '';
175-
printTitleAndFrontMatter() if $title;
176-
next;
177-
}
178210

179-
print;
211+
if ($title) {
212+
print;
213+
} elsif (/^\s*$/ && !$seenFirstNonBlankLineBeforeTitle) {
214+
next; # Drop blank lines until we see a title
215+
} elsif (($title) = /$titleRegexStr/) {
216+
printTitleAndFrontMatter();
217+
} else {
218+
$seenFirstNonBlankLineBeforeTitle = 1;
219+
$beforeTitle .= $_;
220+
}
180221
}

0 commit comments

Comments
 (0)