-
-
Notifications
You must be signed in to change notification settings - Fork 727
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
commonlib: Add ANTLR #6168
base: main
Are you sure you want to change the base?
commonlib: Add ANTLR #6168
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
/* | ||
* The MIT License (MIT) | ||
* | ||
* Copyright (c) 2014 by Bart Kiers (original author) and Alexandre Vitorelli (contributor -> ported to CSharp) | ||
* Copyright (c) 2017-2020 by Ivan Kochurkin (Positive Technologies): | ||
added ECMAScript 6 support, cleared and transformed to the universal grammar. | ||
* Copyright (c) 2018 by Juan Alvarez (contributor -> ported to Go) | ||
* Copyright (c) 2019 by Student Main (contributor -> ES2020) | ||
* | ||
* Permission is hereby granted, free of charge, to any person | ||
* obtaining a copy of this software and associated documentation | ||
* files (the "Software"), to deal in the Software without | ||
* restriction, including without limitation the rights to use, | ||
* copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the | ||
* Software is furnished to do so, subject to the following | ||
* conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be | ||
* included in all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
* OTHER DEALINGS IN THE SOFTWARE. | ||
*/ | ||
|
||
// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false | ||
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine | ||
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true | ||
|
||
lexer grammar JavaScriptLexer; | ||
|
||
channels { | ||
ERROR | ||
} | ||
|
||
options { | ||
superClass = JavaScriptLexerBase; | ||
} | ||
|
||
// Insert here @header for C++ lexer. | ||
|
||
HashBangLine : { this.IsStartOfFile()}? '#!' ~[\r\n\u2028\u2029]*; // only allowed at start | ||
MultiLineComment : '/*' .*? '*/' -> channel(HIDDEN); | ||
SingleLineComment : '//' ~[\r\n\u2028\u2029]* -> channel(HIDDEN); | ||
RegularExpressionLiteral: | ||
'/' RegularExpressionFirstChar RegularExpressionChar* {this.IsRegexPossible()}? '/' IdentifierPart* | ||
; | ||
|
||
OpenBracket : '['; | ||
CloseBracket : ']'; | ||
OpenParen : '('; | ||
CloseParen : ')'; | ||
OpenBrace : '{' {this.ProcessOpenBrace();}; | ||
TemplateCloseBrace : {this.IsInTemplateString()}? '}' // Break lines here to ensure proper transformation by Go/transformGrammar.py | ||
{this.ProcessTemplateCloseBrace();} -> popMode; | ||
CloseBrace : '}' {this.ProcessCloseBrace();}; | ||
SemiColon : ';'; | ||
Comma : ','; | ||
Assign : '='; | ||
QuestionMark : '?'; | ||
QuestionMarkDot : '?.'; | ||
Colon : ':'; | ||
Ellipsis : '...'; | ||
Dot : '.'; | ||
PlusPlus : '++'; | ||
MinusMinus : '--'; | ||
Plus : '+'; | ||
Minus : '-'; | ||
BitNot : '~'; | ||
Not : '!'; | ||
Multiply : '*'; | ||
Divide : '/'; | ||
Modulus : '%'; | ||
Power : '**'; | ||
NullCoalesce : '??'; | ||
Hashtag : '#'; | ||
RightShiftArithmetic : '>>'; | ||
LeftShiftArithmetic : '<<'; | ||
RightShiftLogical : '>>>'; | ||
LessThan : '<'; | ||
MoreThan : '>'; | ||
LessThanEquals : '<='; | ||
GreaterThanEquals : '>='; | ||
Equals_ : '=='; | ||
NotEquals : '!='; | ||
IdentityEquals : '==='; | ||
IdentityNotEquals : '!=='; | ||
BitAnd : '&'; | ||
BitXOr : '^'; | ||
BitOr : '|'; | ||
And : '&&'; | ||
Or : '||'; | ||
MultiplyAssign : '*='; | ||
DivideAssign : '/='; | ||
ModulusAssign : '%='; | ||
PlusAssign : '+='; | ||
MinusAssign : '-='; | ||
LeftShiftArithmeticAssign : '<<='; | ||
RightShiftArithmeticAssign : '>>='; | ||
RightShiftLogicalAssign : '>>>='; | ||
BitAndAssign : '&='; | ||
BitXorAssign : '^='; | ||
BitOrAssign : '|='; | ||
PowerAssign : '**='; | ||
NullishCoalescingAssign : '??='; | ||
ARROW : '=>'; | ||
|
||
/// Null Literals | ||
|
||
NullLiteral: 'null'; | ||
|
||
/// Boolean Literals | ||
|
||
BooleanLiteral: 'true' | 'false'; | ||
|
||
/// Numeric Literals | ||
|
||
DecimalLiteral: | ||
DecimalIntegerLiteral '.' [0-9] [0-9_]* ExponentPart? | ||
| '.' [0-9] [0-9_]* ExponentPart? | ||
| DecimalIntegerLiteral ExponentPart? | ||
; | ||
|
||
/// Numeric Literals | ||
|
||
HexIntegerLiteral : '0' [xX] [0-9a-fA-F] HexDigit*; | ||
OctalIntegerLiteral : '0' [0-7]+ {!this.IsStrictMode()}?; | ||
OctalIntegerLiteral2 : '0' [oO] [0-7] [_0-7]*; | ||
BinaryIntegerLiteral : '0' [bB] [01] [_01]*; | ||
|
||
BigHexIntegerLiteral : '0' [xX] [0-9a-fA-F] HexDigit* 'n'; | ||
BigOctalIntegerLiteral : '0' [oO] [0-7] [_0-7]* 'n'; | ||
BigBinaryIntegerLiteral : '0' [bB] [01] [_01]* 'n'; | ||
BigDecimalIntegerLiteral : DecimalIntegerLiteral 'n'; | ||
|
||
/// Keywords | ||
|
||
Break : 'break'; | ||
Do : 'do'; | ||
Instanceof : 'instanceof'; | ||
Typeof : 'typeof'; | ||
Case : 'case'; | ||
Else : 'else'; | ||
New : 'new'; | ||
Var : 'var'; | ||
Catch : 'catch'; | ||
Finally : 'finally'; | ||
Return : 'return'; | ||
Void : 'void'; | ||
Continue : 'continue'; | ||
For : 'for'; | ||
Switch : 'switch'; | ||
While : 'while'; | ||
Debugger : 'debugger'; | ||
Function_ : 'function'; | ||
This : 'this'; | ||
With : 'with'; | ||
Default : 'default'; | ||
If : 'if'; | ||
Throw : 'throw'; | ||
Delete : 'delete'; | ||
In : 'in'; | ||
Try : 'try'; | ||
As : 'as'; | ||
From : 'from'; | ||
Of : 'of'; | ||
Yield : 'yield'; | ||
YieldStar : 'yield*'; | ||
|
||
/// Future Reserved Words | ||
|
||
Class : 'class'; | ||
Enum : 'enum'; | ||
Extends : 'extends'; | ||
Super : 'super'; | ||
Const : 'const'; | ||
Export : 'export'; | ||
Import : 'import'; | ||
|
||
Async : 'async'; | ||
Await : 'await'; | ||
|
||
/// The following tokens are also considered to be FutureReservedWords | ||
/// when parsing strict mode | ||
|
||
Implements : 'implements' {this.IsStrictMode()}?; | ||
StrictLet : 'let' {this.IsStrictMode()}?; | ||
NonStrictLet : 'let' {!this.IsStrictMode()}?; | ||
Private : 'private' {this.IsStrictMode()}?; | ||
Public : 'public' {this.IsStrictMode()}?; | ||
Interface : 'interface' {this.IsStrictMode()}?; | ||
Package : 'package' {this.IsStrictMode()}?; | ||
Protected : 'protected' {this.IsStrictMode()}?; | ||
Static : 'static' {this.IsStrictMode()}?; | ||
|
||
/// Identifier Names and Identifiers | ||
|
||
Identifier: IdentifierStart IdentifierPart*; | ||
/// String Literals | ||
StringLiteral: | ||
('"' DoubleStringCharacter* '"' | '\'' SingleStringCharacter* '\'') {this.ProcessStringLiteral();} | ||
; | ||
|
||
BackTick: '`' -> pushMode(TEMPLATE); | ||
|
||
WhiteSpaces: [\t\u000B\u000C\u0020\u00A0]+ -> channel(HIDDEN); | ||
|
||
LineTerminator: [\r\n\u2028\u2029] -> channel(HIDDEN); | ||
|
||
/// Comments | ||
|
||
HtmlComment : '<!--' .*? '-->' -> channel(HIDDEN); | ||
CDataComment : '<![CDATA[' .*? ']]>' -> channel(HIDDEN); | ||
UnexpectedCharacter : . -> channel(ERROR); | ||
|
||
mode TEMPLATE; | ||
|
||
BackTickInside : '`' -> type(BackTick), popMode; | ||
TemplateStringStartExpression : '${' {this.ProcessTemplateOpenBrace();} -> pushMode(DEFAULT_MODE); | ||
TemplateStringAtom : ~[`]; | ||
|
||
// Fragment rules | ||
|
||
fragment DoubleStringCharacter: ~["\\\r\n] | '\\' EscapeSequence | LineContinuation; | ||
|
||
fragment SingleStringCharacter: ~['\\\r\n] | '\\' EscapeSequence | LineContinuation; | ||
|
||
fragment EscapeSequence: | ||
CharacterEscapeSequence | ||
| '0' // no digit ahead! TODO | ||
| HexEscapeSequence | ||
| UnicodeEscapeSequence | ||
| ExtendedUnicodeEscapeSequence | ||
; | ||
|
||
fragment CharacterEscapeSequence: SingleEscapeCharacter | NonEscapeCharacter; | ||
|
||
fragment HexEscapeSequence: 'x' HexDigit HexDigit; | ||
|
||
fragment UnicodeEscapeSequence: | ||
'u' HexDigit HexDigit HexDigit HexDigit | ||
| 'u' '{' HexDigit HexDigit+ '}' | ||
; | ||
|
||
fragment ExtendedUnicodeEscapeSequence: 'u' '{' HexDigit+ '}'; | ||
|
||
fragment SingleEscapeCharacter: ['"\\bfnrtv]; | ||
|
||
fragment NonEscapeCharacter: ~['"\\bfnrtv0-9xu\r\n]; | ||
|
||
fragment EscapeCharacter: SingleEscapeCharacter | [0-9] | [xu]; | ||
|
||
fragment LineContinuation: '\\' [\r\n\u2028\u2029]+; | ||
|
||
fragment HexDigit: [_0-9a-fA-F]; | ||
|
||
fragment DecimalIntegerLiteral: '0' | [1-9] [0-9_]*; | ||
|
||
fragment ExponentPart: [eE] [+-]? [0-9_]+; | ||
|
||
fragment IdentifierPart: IdentifierStart | [\p{Mn}] | [\p{Nd}] | [\p{Pc}] | '\u200C' | '\u200D'; | ||
|
||
fragment IdentifierStart: [\p{L}] | [$_] | '\\' UnicodeEscapeSequence; | ||
|
||
fragment RegularExpressionFirstChar: | ||
~[*\r\n\u2028\u2029\\/[] | ||
| RegularExpressionBackslashSequence | ||
| '[' RegularExpressionClassChar* ']' | ||
; | ||
|
||
fragment RegularExpressionChar: | ||
~[\r\n\u2028\u2029\\/[] | ||
| RegularExpressionBackslashSequence | ||
| '[' RegularExpressionClassChar* ']' | ||
; | ||
|
||
fragment RegularExpressionClassChar: ~[\r\n\u2028\u2029\]\\] | RegularExpressionBackslashSequence; | ||
|
||
fragment RegularExpressionBackslashSequence: '\\' ~[\r\n\u2028\u2029]; |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
/* | ||
* Zed Attack Proxy (ZAP) and its related class files. | ||
* | ||
* ZAP is an HTTP/HTTPS proxy for assessing web application security. | ||
* | ||
* Copyright 2022 The ZAP Development Team | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.zaproxy.addon.commonlib.parserapi; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
import net.htmlparser.jericho.Element; | ||
import net.htmlparser.jericho.HTMLElementName; | ||
import net.htmlparser.jericho.Source; | ||
import org.antlr.v4.runtime.BaseErrorListener; | ||
import org.antlr.v4.runtime.CharStream; | ||
import org.antlr.v4.runtime.CharStreams; | ||
import org.antlr.v4.runtime.CommonTokenStream; | ||
import org.antlr.v4.runtime.RecognitionException; | ||
import org.antlr.v4.runtime.Recognizer; | ||
import org.antlr.v4.runtime.Token; | ||
import org.parosproxy.paros.network.HttpMessage; | ||
import org.zaproxy.addon.commonlib.parserapi.impl.JavaScriptLexer; | ||
import org.zaproxy.addon.commonlib.parserapi.impl.JavaScriptParser; | ||
|
||
public class ParserApi { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs a proper API to extract/parse the JavaScript content. While this code might still be useful for more than one scan rule it needs to be cleaned up first. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess we could remove the changelog entry and not bother announcing it until the other part is done? Or we can wait and expand it?I'm happy to push it further, just needed feedback. Do we need to start a doc? |
||
|
||
private static final char SINGLE_QUOTE_CHARACTER = '\''; | ||
private static final char DOUBLE_QUOTE_CHARACTER = '"'; | ||
private static final char FORWARD_SLASH_CHARACTER = '/'; | ||
|
||
public enum Context { | ||
NO_QUOTE, | ||
SINGLE_QUOTE, | ||
DOUBLE_QUOTE, | ||
SLASH_QUOTE | ||
} | ||
|
||
private String scriptCode; | ||
private int targetBlockNumber; | ||
|
||
public void getTargetScriptBlock(HttpMessage msg, String target) { | ||
String htmlCode = msg.getResponseBody().toString(); | ||
Source htmlSrc = new Source(htmlCode); | ||
List<Element> scripts = htmlSrc.getAllElements(HTMLElementName.SCRIPT); | ||
for (Element ele : scripts) { | ||
String code = ele.getContent().toString(); | ||
if (code.contains(target)) { | ||
break; | ||
} | ||
targetBlockNumber += 1; | ||
} | ||
} | ||
|
||
public void getTargetScriptCode(HttpMessage msg, String target) { | ||
String htmlCode = msg.getResponseBody().toString(); | ||
Source htmlSrc = new Source(htmlCode); | ||
scriptCode = | ||
htmlSrc.getAllElements(HTMLElementName.SCRIPT) | ||
.get(targetBlockNumber) | ||
.getContent() | ||
.toString(); | ||
} | ||
|
||
public boolean parseScript() throws IOException { | ||
CharStream charStream = CharStreams.fromString(scriptCode); | ||
JavaScriptLexer jsLexer = new JavaScriptLexer(charStream); | ||
CommonTokenStream cts = new CommonTokenStream(jsLexer); | ||
JavaScriptParser jsParser = new JavaScriptParser(cts); | ||
jsParser.removeErrorListeners(); | ||
jsParser.addErrorListener(ThrowOnSyntaxErrorListener.INSTANCE); | ||
|
||
try { | ||
jsParser.program(); | ||
} catch (Exception e) { | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
public boolean inExecutionContext(String target) throws IOException { | ||
CharStream charStream = CharStreams.fromString(scriptCode); | ||
JavaScriptLexer jsLexer = new JavaScriptLexer(charStream); | ||
|
||
Token token = jsLexer.nextToken(); | ||
while (token.getType() != -1) { | ||
if (token.getType() == JavaScriptLexer.Identifier && token.getText().equals(target)) { | ||
return true; | ||
} | ||
token = jsLexer.nextToken(); | ||
} | ||
|
||
return false; | ||
} | ||
|
||
public Context getContext(String target) throws IOException { | ||
CharStream charStream = CharStreams.fromString(scriptCode); | ||
JavaScriptLexer jsLexer = new JavaScriptLexer(charStream); | ||
|
||
Token token = jsLexer.nextToken(); | ||
while (token.getType() != -1) { | ||
String tokenText = token.getText(); | ||
if (tokenText.contains(target)) { | ||
switch (tokenText.charAt(0)) { | ||
case DOUBLE_QUOTE_CHARACTER: | ||
return Context.DOUBLE_QUOTE; | ||
case SINGLE_QUOTE_CHARACTER: | ||
return Context.SINGLE_QUOTE; | ||
case FORWARD_SLASH_CHARACTER: | ||
return Context.SLASH_QUOTE; | ||
default: | ||
return Context.NO_QUOTE; | ||
} | ||
} | ||
|
||
token = jsLexer.nextToken(); | ||
} | ||
|
||
return Context.NO_QUOTE; | ||
} | ||
|
||
private static class ThrowOnSyntaxErrorListener extends BaseErrorListener { | ||
|
||
static final ThrowOnSyntaxErrorListener INSTANCE = new ThrowOnSyntaxErrorListener(); | ||
|
||
// Reuse the exception, used just for control flow. | ||
private static final RuntimeException SYNTAX_EXCEPTION = | ||
new IllegalArgumentException("Syntax Error"); | ||
|
||
private ThrowOnSyntaxErrorListener() {} | ||
|
||
@Override | ||
public void syntaxError( | ||
Recognizer<?, ?> recognizer, | ||
Object offendingSymbol, | ||
int line, | ||
int charPositionInLine, | ||
String msg, | ||
RecognitionException e) { | ||
throw SYNTAX_EXCEPTION; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
## JavaScript Parser | ||
|
||
The JavaScript parser used in this add-on is generated using ANTLR with their [JavaScript grammar](https://github.com/antlr/grammars-v4/blob/14fc51dfd712a99663497035f1f63fa8eac1a225/javascript/javascript/). | ||
|
||
The following files were copied from the referenced repository: | ||
- [`JavaScriptLexerBase.java`](src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexerBase.java); | ||
- [`JavaScriptParserBase.java`](src/main/java/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParserBase.java); | ||
- [`JavaScriptLexer.g4`](src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptLexer.g4); | ||
- [`JavaScriptParser.g4`](src/main/antlr/org/zaproxy/addon/commonlib/parserapi/impl/JavaScriptParser.g4). | ||
|
||
The parser is automatically generated when the code is compiled through the [`antlr` Gradle plugin](https://docs.gradle.org/current/userguide/antlr_plugin.html). |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
// Refer to README.md | ||
package org.zaproxy.addon.commonlib.parserapi.impl; | ||
|
||
import java.util.ArrayDeque; | ||
import java.util.Deque; | ||
import org.antlr.v4.runtime.*; | ||
|
||
/** | ||
* All lexer methods that used in grammar (IsStrictMode) should start with Upper Case Char similar | ||
* to Lexer rules. | ||
*/ | ||
public abstract class JavaScriptLexerBase extends Lexer { | ||
/** | ||
* Stores values of nested modes. By default mode is strict or defined externally | ||
* (useStrictDefault) | ||
*/ | ||
private final Deque<Boolean> scopeStrictModes = new ArrayDeque<>(); | ||
|
||
private Token lastToken = null; | ||
|
||
/** Default value of strict mode Can be defined externally by setUseStrictDefault */ | ||
private boolean useStrictDefault = false; | ||
|
||
/** | ||
* Current value of strict mode Can be defined during parsing, see StringFunctions.js and | ||
* StringGlobal.js samples | ||
*/ | ||
private boolean useStrictCurrent = false; | ||
|
||
/** Preserves depth due to braces including template literals. */ | ||
private int currentDepth = 0; | ||
|
||
/** | ||
* Preserves the starting depth of template literals to correctly handle braces inside template | ||
* literals. | ||
*/ | ||
private Deque<Integer> templateDepthStack = new ArrayDeque<Integer>(); | ||
|
||
public JavaScriptLexerBase(CharStream input) { | ||
super(input); | ||
} | ||
|
||
public boolean IsStartOfFile() { | ||
return lastToken == null; | ||
} | ||
|
||
public boolean getStrictDefault() { | ||
return useStrictDefault; | ||
} | ||
|
||
public void setUseStrictDefault(boolean value) { | ||
useStrictDefault = value; | ||
useStrictCurrent = value; | ||
} | ||
|
||
public boolean IsStrictMode() { | ||
return useStrictCurrent; | ||
} | ||
|
||
public boolean IsInTemplateString() { | ||
return !templateDepthStack.isEmpty() && templateDepthStack.peek() == currentDepth; | ||
} | ||
|
||
/** | ||
* Return the next token from the character stream and records this last token in case it | ||
* resides on the default channel. This recorded token is used to determine when the lexer could | ||
* possibly match a regex literal. Also changes scopeStrictModes stack if tokenize special | ||
* string 'use strict'; | ||
* | ||
* @return the next token from the character stream. | ||
*/ | ||
@Override | ||
public Token nextToken() { | ||
Token next = super.nextToken(); | ||
|
||
if (next.getChannel() == Token.DEFAULT_CHANNEL) { | ||
// Keep track of the last token on the default channel. | ||
this.lastToken = next; | ||
} | ||
|
||
return next; | ||
} | ||
|
||
protected void ProcessOpenBrace() { | ||
currentDepth++; | ||
useStrictCurrent = | ||
scopeStrictModes.size() > 0 && scopeStrictModes.peek() ? true : useStrictDefault; | ||
scopeStrictModes.push(useStrictCurrent); | ||
} | ||
|
||
protected void ProcessCloseBrace() { | ||
useStrictCurrent = scopeStrictModes.size() > 0 ? scopeStrictModes.pop() : useStrictDefault; | ||
currentDepth--; | ||
} | ||
|
||
protected void ProcessTemplateOpenBrace() { | ||
currentDepth++; | ||
this.templateDepthStack.push(currentDepth); | ||
} | ||
|
||
protected void ProcessTemplateCloseBrace() { | ||
this.templateDepthStack.pop(); | ||
currentDepth--; | ||
} | ||
|
||
protected void ProcessStringLiteral() { | ||
if (lastToken == null || lastToken.getType() == JavaScriptLexer.OpenBrace) { | ||
String text = getText(); | ||
if (text.equals("\"use strict\"") || text.equals("'use strict'")) { | ||
if (scopeStrictModes.size() > 0) scopeStrictModes.pop(); | ||
useStrictCurrent = true; | ||
scopeStrictModes.push(useStrictCurrent); | ||
} | ||
} | ||
} | ||
|
||
/** Returns {@code true} if the lexer can match a regex literal. */ | ||
protected boolean IsRegexPossible() { | ||
|
||
if (this.lastToken == null) { | ||
// No token has been produced yet: at the start of the input, | ||
// no division is possible, so a regex literal _is_ possible. | ||
return true; | ||
} | ||
|
||
switch (this.lastToken.getType()) { | ||
case JavaScriptLexer.Identifier: | ||
case JavaScriptLexer.NullLiteral: | ||
case JavaScriptLexer.BooleanLiteral: | ||
case JavaScriptLexer.This: | ||
case JavaScriptLexer.CloseBracket: | ||
case JavaScriptLexer.CloseParen: | ||
case JavaScriptLexer.OctalIntegerLiteral: | ||
case JavaScriptLexer.DecimalLiteral: | ||
case JavaScriptLexer.HexIntegerLiteral: | ||
case JavaScriptLexer.StringLiteral: | ||
case JavaScriptLexer.PlusPlus: | ||
case JavaScriptLexer.MinusMinus: | ||
// After any of the tokens above, no regex literal can follow. | ||
return false; | ||
default: | ||
// In all other cases, a regex literal _is_ possible. | ||
return true; | ||
} | ||
} | ||
|
||
@Override | ||
public void reset() { | ||
this.scopeStrictModes.clear(); | ||
this.lastToken = null; | ||
this.useStrictDefault = false; | ||
this.useStrictCurrent = false; | ||
this.currentDepth = 0; | ||
this.templateDepthStack = new ArrayDeque<Integer>(); | ||
super.reset(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// Refer to README.md | ||
package org.zaproxy.addon.commonlib.parserapi.impl; | ||
|
||
import org.antlr.v4.runtime.*; | ||
|
||
/** | ||
* All parser methods that used in grammar (p, prev, notLineTerminator, etc.) should start with | ||
* lower case char similar to parser rules. | ||
*/ | ||
public abstract class JavaScriptParserBase extends Parser { | ||
public JavaScriptParserBase(TokenStream input) { | ||
super(input); | ||
} | ||
|
||
/** Short form for prev(String str) */ | ||
protected boolean p(String str) { | ||
return prev(str); | ||
} | ||
|
||
/** Whether the previous token value equals to @param str */ | ||
protected boolean prev(String str) { | ||
return _input.LT(-1).getText().equals(str); | ||
} | ||
|
||
/** Short form for next(String str) */ | ||
protected boolean n(String str) { | ||
return next(str); | ||
} | ||
|
||
/** Whether the next token value equals to @param str */ | ||
protected boolean next(String str) { | ||
return _input.LT(1).getText().equals(str); | ||
} | ||
|
||
protected boolean notLineTerminator() { | ||
return !lineTerminatorAhead(); | ||
} | ||
|
||
protected boolean notOpenBraceAndNotFunction() { | ||
int nextTokenType = _input.LT(1).getType(); | ||
return nextTokenType != JavaScriptParser.OpenBrace | ||
&& nextTokenType != JavaScriptParser.Function_; | ||
} | ||
|
||
protected boolean closeBrace() { | ||
return _input.LT(1).getType() == JavaScriptParser.CloseBrace; | ||
} | ||
|
||
/** | ||
* Returns {@code true} iff on the current index of the parser's token stream a token exists on | ||
* the {@code HIDDEN} channel which either is a line terminator, or is a multi line comment that | ||
* contains a line terminator. | ||
* | ||
* @return {@code true} iff on the current index of the parser's token stream a token exists on | ||
* the {@code HIDDEN} channel which either is a line terminator, or is a multi line comment | ||
* that contains a line terminator. | ||
*/ | ||
protected boolean lineTerminatorAhead() { | ||
|
||
// Get the token ahead of the current index. | ||
int possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 1; | ||
if (possibleIndexEosToken < 0) return false; | ||
Token ahead = _input.get(possibleIndexEosToken); | ||
|
||
if (ahead.getChannel() != Lexer.HIDDEN) { | ||
// We're only interested in tokens on the HIDDEN channel. | ||
return false; | ||
} | ||
|
||
if (ahead.getType() == JavaScriptParser.LineTerminator) { | ||
// There is definitely a line terminator ahead. | ||
return true; | ||
} | ||
|
||
if (ahead.getType() == JavaScriptParser.WhiteSpaces) { | ||
// Get the token ahead of the current whitespaces. | ||
possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 2; | ||
if (possibleIndexEosToken < 0) return false; | ||
ahead = _input.get(possibleIndexEosToken); | ||
} | ||
|
||
// Get the token's text and type. | ||
String text = ahead.getText(); | ||
int type = ahead.getType(); | ||
|
||
// Check if the token is, or contains a line terminator. | ||
return (type == JavaScriptParser.MultiLineComment | ||
&& (text.contains("\r") || text.contains("\n"))) | ||
|| (type == JavaScriptParser.LineTerminator); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have no idea if this is the best way to do this, it was the first way I found that worked.