Skip to content

Commit 1a54b41

Browse files
committed
some updates
1 parent c2debee commit 1a54b41

19 files changed

+553
-141
lines changed

PythonLexer.g4

+2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ THE SOFTWARE.
2828
*/
2929

3030
lexer grammar PythonLexer;
31+
3132
options { superClass=PythonLexerBase; }
33+
3234
tokens {
3335
// the following tokens are only for compatibility with the PythonLexerBase class
3436
LSQB, RSQB, LBRACE, RBRACE

PythonParser.g4

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ THE SOFTWARE.
2626
* Developed by : Robert Einhorn
2727
*/
2828

29+
// Tiny Python based on: https://docs.python.org/3.8/reference/grammar.html
30+
parser grammar PythonParser;
2931

30-
parser grammar PythonParser; // Tiny Python based on: https://docs.python.org/3.8/reference/grammar.html
3132
options { tokenVocab=PythonLexer; }
3233

3334
// ANTLR4 grammar for a tiny Python

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Tiny Python   [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
22
A considerably stripped down Python grammar for a starter Python (or a Python-like) parser or even for educational purposes.
33

4-
The ANTLR4 parser grammar is based on the last "traditional" [Python 3.8 grammar](https://docs.python.org/3.8/reference/grammar.html) which not yet written in [PEG](https://en.wikipedia.org/wiki/Parsing_expression_grammar).
4+
The ANTLR4 parser grammar is based on the last "traditional" [Python 3.8 grammar](https://docs.python.org/3.8/reference/grammar.html) which not yet written in [PEG](https://peps.python.org/pep-0617/).
55

66

77
### Related links:

port_CSharp/AssemblyInfo.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
[assembly: CLSCompliant(true)]
1+
[assembly: CLSCompliant(true)]
2+

port_CSharp/README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
#### Prerequisites:
44
- Installed [ANTLR4-tools](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md#getting-started-the-easy-way-using-antlr4-tools)
5-
- Installed [.NET Framework](https://dotnet.microsoft.com/en-us/download/dotnet-framework)
5+
- Installed [.NET](https://dotnet.microsoft.com/en-us/download)
66

77

88
#### Command line example for Windows:
9-
- first create a C# project called csgrun4py then copy the two grammar files and example.py to this directory:
9+
- first create a C# project called grun4py then copy the two grammar files and example.py to this directory:
1010
```bash
11-
dotnet new console -o . -n csgrun4py -f net8.0
11+
dotnet new console -o . -n grun4py -f net9.0
1212
del program.cs
1313
dotnet add package Antlr4.Runtime.Standard --version 4.13.1
1414
```
@@ -22,7 +22,7 @@
2222
antlr4 -Dlanguage=CSharp PythonLexer.g4
2323
antlr4 -Dlanguage=CSharp PythonParser.g4
2424
dotnet build
25-
dotnet run example.py --no-build
25+
dotnet run example.py --no-builddotnet run example.py --no-build
2626
```
2727

2828

port_CSharp/csgrun4py.cs

-35
This file was deleted.

port_CSharp/grun4py.cs

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// ******* GRUN (Grammar Unit Test) for Python *******
2+
3+
using System;
4+
using System.Text.RegularExpressions;
5+
using System.Text;
6+
using Antlr4.Runtime;
7+
8+
namespace grun4py
9+
{
10+
internal static class Program
11+
{
12+
public static int Main(string[] args)
13+
{
14+
if (args.Length < 1)
15+
{
16+
Console.Error.WriteLine("Error: Please provide an input file path");
17+
return 1;
18+
}
19+
20+
try
21+
{
22+
var filePath = args[0];
23+
var input = GetEncodedInputStreamByPythonComment(filePath);
24+
var lexer = new PythonLexer(input);
25+
var tokens = new CommonTokenStream((ITokenSource)lexer);
26+
var parser = new PythonParser(tokens);
27+
28+
tokens.Fill(); // Test the lexer grammar
29+
foreach (IToken t in tokens.GetTokens())
30+
{
31+
Console.WriteLine(GetTokenMetaDataWithTokenName(t));
32+
}
33+
34+
parser.file_input(); // Test the parser grammar
35+
return parser.NumberOfSyntaxErrors;
36+
37+
}
38+
catch (Exception ex)
39+
{
40+
Console.Error.WriteLine($"Error: {ex.Message}");
41+
return 1; // Error occurred, returning non-zero exit code
42+
}
43+
}
44+
45+
private static string GetTokenMetaDataWithTokenName(IToken token)
46+
{
47+
string tokenText = ReplaceSpecialCharacters(token.Text);
48+
string tokenName = token.Type == TokenConstants.EOF ? "EOF" : PythonLexer.DefaultVocabulary.GetDisplayName(token.Type);
49+
string channelText = token.Channel == TokenConstants.DefaultChannel ?
50+
"" :
51+
$"channel={PythonLexer.channelNames[token.Channel]},";
52+
53+
// Modified format: [@TokenIndex,StartIndex:StopIndex='Text',<TokenName>,channel=ChannelName,Line:Column]
54+
return $"[@{token.TokenIndex},{token.StartIndex}:{token.StopIndex}='{tokenText}',<{tokenName}>,{channelText}{token.Line}:{token.Column}]";
55+
}
56+
57+
private static string ReplaceSpecialCharacters(string text)
58+
{
59+
return text.Replace("\n", @"\n")
60+
.Replace("\r", @"\r")
61+
.Replace("\t", @"\t")
62+
.Replace("\f", @"\f");
63+
64+
}
65+
66+
public static ICharStream? GetEncodedInputStreamByPythonComment(string filePath)
67+
{
68+
string encodingName = "";
69+
var ws_commentPattern = new Regex(@"^[ \t\f]*(#.*)?$");
70+
71+
try
72+
{
73+
using FileStream fs = new(filePath, FileMode.Open, FileAccess.Read); // read in binary mode
74+
using StreamReader reader = new(fs, Encoding.ASCII);
75+
for (int lineCount = 0; lineCount < 2; lineCount++)
76+
{
77+
string? line = reader.ReadLine();
78+
if (line == null)
79+
{
80+
break; // EOF reached
81+
}
82+
83+
if (ws_commentPattern.IsMatch(line)) // WS? + COMMENT? found
84+
{
85+
encodingName = GetEncodingName(line);
86+
if (encodingName != "") // encoding found
87+
{
88+
break;
89+
}
90+
}
91+
else
92+
{
93+
break; // statement or backslash found (line is not empty, not whitespace(s), not comment)
94+
}
95+
}
96+
}
97+
catch (Exception)
98+
{
99+
// Console.WriteLine($"An error occurred: {e.Message}");
100+
}
101+
102+
const string DEFAULT_PYTHON_ENCODING = "utf-8"; // default encoding for Python source code
103+
if (encodingName == "")
104+
{
105+
encodingName = DEFAULT_PYTHON_ENCODING;
106+
}
107+
108+
try // encoding test for ANTLR4
109+
{
110+
return CharStreams.fromPath(filePath, Encoding.GetEncoding(encodingName));
111+
}
112+
catch (Exception)
113+
{
114+
return CharStreams.fromPath(filePath, Encoding.GetEncoding(DEFAULT_PYTHON_ENCODING));
115+
}
116+
117+
}
118+
119+
public static string GetEncodingName(string commentText) // https://peps.python.org/pep-0263/#defining-the-encoding
120+
{
121+
var encodingCommentPattern = new Regex(@"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)");
122+
var match = encodingCommentPattern.Match(commentText);
123+
if (match.Success)
124+
{
125+
string encodingName = match.Groups[1].Value;
126+
127+
// normalize encoding name
128+
var encodingMap = new Dictionary<string, string>
129+
{
130+
{ "cp1252", "latin1" },
131+
{ "latin-1", "latin1" },
132+
{ "iso-8859-1", "latin1" }
133+
// more encoding pairs
134+
};
135+
136+
return encodingMap.TryGetValue(encodingName.ToLower(), out var normalizedEncodingName)
137+
? normalizedEncodingName
138+
: encodingName;
139+
}
140+
return "";
141+
}
142+
}
143+
}

port_Dart/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Windows:
2323
```bash
2424
antlr4 -Dlanguage=Dart PythonLexer.g4
2525
antlr4 -Dlanguage=Dart PythonParser.g4
26-
dart dartgrun4py.dart example.py
26+
dart grun4py.dart example.py
2727
```
2828

2929
#### Related link:
File renamed without changes.

port_Dart/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: dartgrun4py
1+
name: grun4py
22

33
environment:
44
sdk: ^3.4.4

port_Go/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Windows:
3030
```bash
3131
go generate ./...
3232
go mod tidy
33-
go run gogrun4py.go example.py
33+
go run grun4py.go example.py
3434
```
3535

3636
#### Related link:

port_Go/gogrun4py.go

-44
This file was deleted.

0 commit comments

Comments
 (0)