|
1 | 1 | #!/usr/bin/perl -w -i
|
2 | 2 | #
|
3 | 3 | # DRAFT script used to normalize semconv doc-page tiles and add Hugo front matter
|
4 |
| -# |
5 | 4 |
|
6 | 5 | $^W = 1;
|
7 | 6 |
|
|
30 | 29 | my $beforeTitle = '';
|
31 | 30 |
|
32 | 31 | sub toTitleCase($) {
|
33 |
| - my $str = shift; |
| 32 | + my ($str) = @_; |
34 | 33 |
|
35 |
| - my @mixedCaseWords; # mixed-case or ALLCAPS |
36 |
| - while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) { |
37 |
| - push @mixedCaseWords, $1; |
38 |
| - } |
| 34 | + # Capitalize non-mixedcase words |
| 35 | + $str =~ s/(^|\s)([a-zA-Z])([a-z][a-z0-9]*)\b/$1\u$2$3/g; |
39 | 36 |
|
40 |
| - $str =~ s/(\w+)/\u\L$1/g; |
| 37 | + # Revert / lowercase articles etc. |
| 38 | + $str =~ s/\b(A|And|As|By|For|In|On)\b/\L$1/g; |
41 | 39 |
|
42 |
| - foreach my $word (@mixedCaseWords) { |
43 |
| - my $lc_word = lc($word); |
44 |
| - $str =~ s/\b$lc_word\b/$word/ig; |
45 |
| - } |
46 |
| - $str =~ s/\b(A|And|As|By|For|In|On|\.Js)\b/\L$1/g; |
47 | 40 | return $str;
|
48 | 41 | }
|
49 | 42 |
|
50 |
| -my @specialWords = qw(Core); # for .NET |
51 |
| - |
52 | 43 | sub toSentenceCase($) {
|
53 |
| - my $str = shift; |
| 44 | + my ($str) = @_; |
54 | 45 |
|
55 |
| - my @mixedCaseWords = @specialWords; # mixed-case or ALLCAPS |
56 |
| - while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) { |
57 |
| - push @mixedCaseWords, $1; |
58 |
| - } |
| 46 | + # Lowercase non-mixedcase words |
| 47 | + $str =~ s/\b([A-Z]?[a-z][a-z0-9]*)\b/\l$1/g; |
59 | 48 |
|
60 |
| - $str = lc $str; |
| 49 | + # Capitalize the first word unless it is mixed case |
| 50 | + $str =~ s/^([a-z][a-z0-9]*)\b/\u$1/; |
61 | 51 |
|
62 |
| - # Replace words with their mixed-case or ALL CAPS versions |
63 |
| - foreach my $word (@mixedCaseWords) { |
64 |
| - my $lc_word = lc($word); |
65 |
| - $str =~ s/\b\Q$lc_word\E\b/$word/g; |
66 |
| - } |
67 |
| - |
68 |
| - # Capitalize the first letter of the string |
69 |
| - $str =~ s/^(\s*\w)/\u$1/; |
| 52 | + # Handle exceptions |
| 53 | + $str =~ s/(.NET) (core)/$1 \u$2/; |
| 54 | + $str =~ s/(AI) (inference)/$1 \u$2/; |
| 55 | + $str =~ s|google cloud|Google Cloud|i; |
| 56 | + $str =~ s|pub/sub|Pub/Sub|; |
70 | 57 |
|
71 | 58 | return $str;
|
72 | 59 | }
|
|
75 | 62 | my $frontMatter = '';
|
76 | 63 | if ($frontMatterFromFile) {
|
77 | 64 | # printf STDOUT "> $file has front matter:\n$frontMatterFromFile\n"; # if $gD;
|
78 |
| - $frontMatterFromFile = '' unless $frontMatterFromFile =~ /aliases|cSpell|cascade/i; |
| 65 | + $frontMatterFromFile = '' unless $frontMatterFromFile =~ /auto_gen|aliases/i; |
79 | 66 | # printf STDOUT "> $file\n" if $ARGV =~ /\/system\b/;
|
80 | 67 | }
|
81 | 68 | $linkTitle = $title;
|
|
104 | 91 | $linkTitle = $1;
|
105 | 92 | }
|
106 | 93 |
|
107 |
| - $linkTitle = 'Attributes' if $title eq 'General Attributes'; |
108 | 94 | $linkTitle = 'Events' if $linkTitle =~ /Mobile Events/;
|
109 | 95 | $linkTitle = 'Connect' if $title =~ /Connect RPC$/i;
|
110 | 96 | $linkTitle = 'HTTP' if $linkTitle =~ /^HTTP Client and Server/i;
|
111 | 97 | $linkTitle = 'SQL' if $title =~ /SQL Databases$/i;
|
112 | 98 | $linkTitle = 'System use cases' if $title =~ /System .*?General Use Cases/i;
|
| 99 | + $linkTitle = $1 if $title =~ /Gen(?:erative) ?AI (\w+)$/i && $title !~ /Systems$/i; |
| 100 | + $linkTitle = $1 if $title =~ /(OpenAI) \w+$/i; |
113 | 101 |
|
114 | 102 | # Missing an `s` in "Semantic Convention"?
|
115 | 103 | if ($title =~ /^Semantic Convention\b/i and $title !~ /Groups$/i) {
|
116 | 104 | $title =~ s/Semantic Convention\b/$&s/ig;
|
117 | 105 | printf STDOUT "> $title -> $linkTitle - added 's' to 'Conventions'\n";
|
118 | 106 | }
|
119 |
| - $linkTitle =~ s/^Database Client //; |
| 107 | + $linkTitle =~ s/^(Database|Messaging) Client //; |
120 | 108 | if ($ARGV =~ /docs\/azure/) {
|
121 | 109 | $linkTitle =~ s/ Resource Logs?//i;
|
122 | 110 | $linkTitle =~ s/Azure //i;
|
|
129 | 117 | unless $ARGV =~ /gen-ai-metrics/;
|
130 | 118 | $linkTitle =~ s/ (components|guide|queries|supplementary information|systems|platform)$//i;
|
131 | 119 | $linkTitle =~ s/ \(command line interface\)//i;
|
| 120 | + $linkTitle =~ s/ resources$//i; |
| 121 | + $linkTitle =~ s/(Process) and process runtime$/$1/i; |
132 | 122 |
|
133 | 123 | $linkTitle = '.NET' if $linkTitle =~ /.net common language runtime/i;
|
134 | 124 | $linkTitle = 'CLI' if $linkTitle =~ /\(command line interface\) programs/i;
|
|
137 | 127 | $linkTitle =~ s/Semantic Conventions? Stability //i;
|
138 | 128 | }
|
139 | 129 |
|
140 |
| - if ($linkTitle and $linkTitle ne $title) { |
141 |
| - $linkTitle = toSentenceCase($linkTitle) unless $linkTitle =~ /^gRPC/; |
142 |
| - if ($frontMatterFromFile =~ /linkTitle: /) { |
143 |
| - $frontMatterFromFile =~ s/^(linkTitle: ).*$/$1$linkTitle/m; |
144 |
| - } else { |
145 |
| - $frontMatter .= "linkTitle: $linkTitle\n" |
| 130 | + $frontMatter .= $frontMatterFromFile if $frontMatterFromFile; |
| 131 | + |
| 132 | + if ($linkTitle && $linkTitle ne $title) { |
| 133 | + $linkTitle = toSentenceCase($linkTitle); # unless $linkTitle =~ /^gRPC/; |
| 134 | + if ($frontMatter !~ /linkTitle: /) { |
| 135 | + $frontMatter .= "linkTitle: $linkTitle\n"; |
| 136 | + } elsif ($frontMatter !~ /^auto_gen:/m) { |
| 137 | + $frontMatter =~ s/^(linkTitle: ).*$/$1$linkTitle/m; |
146 | 138 | }
|
147 | 139 | }
|
148 | 140 |
|
149 |
| - $frontMatter .= $frontMatterFromFile if $frontMatterFromFile; |
150 |
| - |
151 | 141 | if ($ARGV =~ /docs\/(.*?)(README|_index).md$/) {
|
152 | 142 | $frontMatter .= "path_base_for_github_subdir:\n";
|
153 | 143 | $frontMatter .= " from: tmp/semconv/docs/$1_index.md\n";
|
154 | 144 | $frontMatter .= " to: $1README.md\n";
|
155 | 145 | }
|
156 |
| - $frontMatter .= "weight: -1\n" if $title eq 'General Semantic Conventions'; |
157 | 146 |
|
158 | 147 | return $frontMatter;
|
159 | 148 | }
|
160 | 149 |
|
161 | 150 | sub printTitleAndFrontMatter() {
|
162 |
| - my $frontMatter; |
163 |
| - |
164 |
| - |
165 |
| - # if ($ARGV =~ /docs\/(README|_index)/) { |
166 |
| - # print STDOUT "> $ARGV\n > frontMatterFromFile: $frontMatterFromFile\n"; |
167 |
| - # print STDOUT " > title: $title\n"; |
168 |
| - # print STDOUT " > linkTitle: $linkTitle\n"; |
169 |
| - # } |
170 |
| - |
171 |
| - if ($frontMatterFromFile && $frontMatterFromFile =~ /auto_gen:\s*false/) { |
172 |
| - $frontMatter = $frontMatterFromFile; |
173 |
| - } else { |
174 |
| - $frontMatter = computeTitleAndFrontMatter(); |
175 |
| - } |
| 151 | + my $frontMatter = computeTitleAndFrontMatter(); |
176 | 152 |
|
177 | 153 | if ($frontMatter) {
|
178 | 154 | $frontMatter = "<!--- Hugo front matter used to generate the website version of this page:\n" . $frontMatter;
|
179 | 155 | $frontMatter .= "--->\n";
|
180 | 156 | print "$frontMatter\n";
|
181 | 157 | }
|
| 158 | + |
182 | 159 | print $beforeTitle if $beforeTitle;
|
183 | 160 | $title = toTitleCase($title);
|
184 | 161 | print "# $title\n"
|
185 | 162 | }
|
186 | 163 |
|
| 164 | +sub gatherFrontMatter() { |
| 165 | + my $autoGenValues = 'false|below'; |
| 166 | + my $autoGenDirective = ''; |
| 167 | + my $autoGenSkip = 0; |
| 168 | + |
| 169 | + while(<>) { |
| 170 | + last if /^--->/; |
| 171 | + next if $autoGenSkip; |
| 172 | + |
| 173 | + my ($keyWord, $autoGenDirective) = /^(auto.?gen): ([^\#]+)/; |
| 174 | + if ($keyWord) { |
| 175 | + # print STDOUT ">> $ARGV:\n$frontMatterFromFile"; |
| 176 | + if ($keyWord ne 'auto_gen') { |
| 177 | + warn "$ARGV: WARN: misspelled keyword, should be 'auto_gen' not '$keyWord'\n"; |
| 178 | + } elsif (!$autoGenDirective or $autoGenDirective !~ /^($autoGenValues)/) { |
| 179 | + warn "$ARGV: WARN: missing or unrecognized 'auto_gen' value, should match '$autoGenValues', not $autoGenDirective\n"; |
| 180 | + } elsif ($autoGenDirective =~ /^below/) { |
| 181 | + $autoGenSkip = 1; |
| 182 | + # print STDOUT ">>>> skipping\n"; |
| 183 | + } else { |
| 184 | + # print STDOUT ">> wa?\n"; |
| 185 | + } |
| 186 | + } |
| 187 | + |
| 188 | + $frontMatterFromFile .= $_; |
| 189 | + } |
| 190 | +} |
| 191 | + |
187 | 192 | # main
|
188 | 193 |
|
189 | 194 | my $titleRegexStr = '^#\s+(.*)';
|
|
200 | 205 | $beforeTitle = '';
|
201 | 206 | $linkTitle = '';
|
202 | 207 | if (/^<!--- Hugo/) {
|
203 |
| - while(<>) { |
204 |
| - last if /^--->/; |
205 |
| - $frontMatterFromFile .= $_; |
206 |
| - } |
| 208 | + gatherFrontMatter(); |
207 | 209 | next;
|
208 | 210 | }
|
209 | 211 | }
|
|
0 commit comments