|
1 | 1 | #!/usr/bin/perl -w -i
|
2 | 2 | #
|
3 | 3 | # DRAFT script used to normalize semconv doc-page tiles and add Hugo front matter
|
4 |
| -# |
5 | 4 |
|
6 | 5 | $^W = 1;
|
7 | 6 |
|
|
30 | 29 | my $beforeTitle = '';
|
31 | 30 |
|
32 | 31 | sub toTitleCase($) {
|
33 |
| - my $str = shift; |
| 32 | + my ($str) = @_; |
34 | 33 |
|
35 |
| - my @mixedCaseWords; # mixed-case or ALLCAPS |
36 |
| - while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) { |
37 |
| - push @mixedCaseWords, $1; |
38 |
| - } |
| 34 | + # Capitalize non-mixedcase words |
| 35 | + $str =~ s/(^|\s)([a-zA-Z])([a-z][a-z0-9]*)\b/$1\u$2$3/g; |
39 | 36 |
|
40 |
| - $str =~ s/(\w+)/\u\L$1/g; |
| 37 | + # Revert / lowercase articles etc. |
| 38 | + $str =~ s/\b(A|And|As|By|For|In|On)\b/\L$1/g; |
41 | 39 |
|
42 |
| - foreach my $word (@mixedCaseWords) { |
43 |
| - my $lc_word = lc($word); |
44 |
| - $str =~ s/\b$lc_word\b/$word/ig; |
45 |
| - } |
46 |
| - $str =~ s/\b(A|And|As|By|For|In|On|\.Js)\b/\L$1/g; |
47 | 40 | return $str;
|
48 | 41 | }
|
49 | 42 |
|
50 |
| -my @specialWords = qw(Core); # for .NET |
51 |
| - |
52 | 43 | sub toSentenceCase($) {
|
53 |
| - my $str = shift; |
| 44 | + my ($str) = @_; |
54 | 45 |
|
55 |
| - my @mixedCaseWords = @specialWords; # mixed-case or ALLCAPS |
56 |
| - while ($str =~ /\b([a-z]?[A-Z][A-Z0-9]+|[A-Z]\w*[A-Z]\w*)\b/g) { |
57 |
| - push @mixedCaseWords, $1; |
58 |
| - } |
| 46 | + # Lowercase non-mixedcase words |
| 47 | + $str =~ s/\b([A-Z]?[a-z][a-z0-9]*)\b/\l$1/g; |
59 | 48 |
|
60 |
| - $str = lc $str; |
| 49 | + # Capitalize the first word unless it is mixed case |
| 50 | + $str =~ s/^([a-z][a-z0-9]*)\b/\u$1/; |
61 | 51 |
|
62 |
| - # Replace words with their mixed-case or ALL CAPS versions |
63 |
| - foreach my $word (@mixedCaseWords) { |
64 |
| - my $lc_word = lc($word); |
65 |
| - $str =~ s/\b\Q$lc_word\E\b/$word/g; |
66 |
| - } |
67 |
| - |
68 |
| - # Capitalize the first letter of the string |
69 |
| - $str =~ s/^(\s*\w)/\u$1/; |
| 52 | + # Handle exception |
| 53 | + $str =~ s/(ASP.NET) core/$1 Core/; |
70 | 54 |
|
71 | 55 | return $str;
|
72 | 56 | }
|
|
75 | 59 | my $frontMatter = '';
|
76 | 60 | if ($frontMatterFromFile) {
|
77 | 61 | # printf STDOUT "> $file has front matter:\n$frontMatterFromFile\n"; # if $gD;
|
78 |
| - $frontMatterFromFile = '' unless $frontMatterFromFile =~ /aliases|cSpell|cascade/i; |
| 62 | + $frontMatterFromFile = '' unless $frontMatterFromFile =~ /auto_gen|aliases/i; |
79 | 63 | # printf STDOUT "> $file\n" if $ARGV =~ /\/system\b/;
|
80 | 64 | }
|
81 | 65 | $linkTitle = $title;
|
|
110 | 94 | $linkTitle = 'HTTP' if $linkTitle =~ /^HTTP Client and Server/i;
|
111 | 95 | $linkTitle = 'SQL' if $title =~ /SQL Databases$/i;
|
112 | 96 | $linkTitle = 'System use cases' if $title =~ /System .*?General Use Cases/i;
|
| 97 | + $linkTitle = $1 if $title =~ /GenAI (\w+)$/i; |
113 | 98 |
|
114 | 99 | # Missing an `s` in "Semantic Convention"?
|
115 | 100 | if ($title =~ /^Semantic Convention\b/i and $title !~ /Groups$/i) {
|
|
137 | 122 | $linkTitle =~ s/Semantic Conventions? Stability //i;
|
138 | 123 | }
|
139 | 124 |
|
140 |
| - if ($linkTitle and $linkTitle ne $title) { |
141 |
| - $linkTitle = toSentenceCase($linkTitle) unless $linkTitle =~ /^gRPC/; |
142 |
| - if ($frontMatterFromFile =~ /linkTitle: /) { |
143 |
| - $frontMatterFromFile =~ s/^(linkTitle: ).*$/$1$linkTitle/m; |
144 |
| - } else { |
145 |
| - $frontMatter .= "linkTitle: $linkTitle\n" |
| 125 | + $frontMatter .= $frontMatterFromFile if $frontMatterFromFile; |
| 126 | + |
| 127 | + if ($linkTitle && $linkTitle ne $title) { |
| 128 | + $linkTitle = toSentenceCase($linkTitle); # unless $linkTitle =~ /^gRPC/; |
| 129 | + if ($frontMatter !~ /linkTitle: /) { |
| 130 | + $frontMatter .= "linkTitle: $linkTitle\n"; |
| 131 | + } elsif ($frontMatter !~ /^auto_gen:/m) { |
| 132 | + $frontMatter =~ s/^(linkTitle: ).*$/$1$linkTitle/m; |
146 | 133 | }
|
147 | 134 | }
|
148 | 135 |
|
149 |
| - $frontMatter .= $frontMatterFromFile if $frontMatterFromFile; |
150 |
| - |
151 | 136 | if ($ARGV =~ /docs\/(.*?)(README|_index).md$/) {
|
152 | 137 | $frontMatter .= "path_base_for_github_subdir:\n";
|
153 | 138 | $frontMatter .= " from: tmp/semconv/docs/$1_index.md\n";
|
154 | 139 | $frontMatter .= " to: $1README.md\n";
|
155 | 140 | }
|
156 |
| - $frontMatter .= "weight: -1\n" if $title eq 'General Semantic Conventions'; |
157 | 141 |
|
158 | 142 | return $frontMatter;
|
159 | 143 | }
|
160 | 144 |
|
161 | 145 | sub printTitleAndFrontMatter() {
|
162 |
| - my $frontMatter; |
163 |
| - |
164 |
| - |
165 |
| - # if ($ARGV =~ /docs\/(README|_index)/) { |
166 |
| - # print STDOUT "> $ARGV\n > frontMatterFromFile: $frontMatterFromFile\n"; |
167 |
| - # print STDOUT " > title: $title\n"; |
168 |
| - # print STDOUT " > linkTitle: $linkTitle\n"; |
169 |
| - # } |
170 |
| - |
171 |
| - if ($frontMatterFromFile && $frontMatterFromFile =~ /auto_gen:\s*false/) { |
172 |
| - $frontMatter = $frontMatterFromFile; |
173 |
| - } else { |
174 |
| - $frontMatter = computeTitleAndFrontMatter(); |
175 |
| - } |
| 146 | + my $frontMatter = computeTitleAndFrontMatter(); |
176 | 147 |
|
177 | 148 | if ($frontMatter) {
|
178 | 149 | $frontMatter = "<!--- Hugo front matter used to generate the website version of this page:\n" . $frontMatter;
|
179 | 150 | $frontMatter .= "--->\n";
|
180 | 151 | print "$frontMatter\n";
|
181 | 152 | }
|
| 153 | + |
182 | 154 | print $beforeTitle if $beforeTitle;
|
183 | 155 | $title = toTitleCase($title);
|
184 | 156 | print "# $title\n"
|
185 | 157 | }
|
186 | 158 |
|
| 159 | +sub gatherFrontMatter() { |
| 160 | + my $autoGenValues = 'false|below'; |
| 161 | + my $autoGenDirective = ''; |
| 162 | + my $autoGenSkip = 0; |
| 163 | + |
| 164 | + while(<>) { |
| 165 | + last if /^--->/; |
| 166 | + next if $autoGenSkip; |
| 167 | + |
| 168 | + my ($keyWord, $autoGenDirective) = /^(auto.?gen): ([^\#]+)/; |
| 169 | + if ($keyWord) { |
| 170 | + # print STDOUT ">> $ARGV:\n$frontMatterFromFile"; |
| 171 | + if ($keyWord ne 'auto_gen') { |
| 172 | + warn "$ARGV: WARN: misspelled keyword, should be 'auto_gen' not '$keyWord'\n"; |
| 173 | + } elsif (!$autoGenDirective or $autoGenDirective !~ /^($autoGenValues)/) { |
| 174 | + warn "$ARGV: WARN: missing or unrecognized 'auto_gen' value, should match '$autoGenValues', not $autoGenDirective\n"; |
| 175 | + } elsif ($autoGenDirective =~ /^below/) { |
| 176 | + $autoGenSkip = 1; |
| 177 | + # print STDOUT ">>>> skipping\n"; |
| 178 | + } else { |
| 179 | + # print STDOUT ">> wa?\n"; |
| 180 | + } |
| 181 | + } |
| 182 | + |
| 183 | + $frontMatterFromFile .= $_; |
| 184 | + } |
| 185 | +} |
| 186 | + |
187 | 187 | # main
|
188 | 188 |
|
189 | 189 | my $titleRegexStr = '^#\s+(.*)';
|
|
200 | 200 | $beforeTitle = '';
|
201 | 201 | $linkTitle = '';
|
202 | 202 | if (/^<!--- Hugo/) {
|
203 |
| - while(<>) { |
204 |
| - last if /^--->/; |
205 |
| - $frontMatterFromFile .= $_; |
206 |
| - } |
| 203 | + gatherFrontMatter(); |
207 | 204 | next;
|
208 | 205 | }
|
209 | 206 | }
|
|
0 commit comments