asmtools/src/org/openjdk/asmtools/jcoder/Scanner.java

922 lines
26 KiB
Java

/*
* Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.asmtools.jcoder;
import static org.openjdk.asmtools.jcoder.JcodTokens.*;
import java.io.IOException;
import java.util.HashMap;
/**
* A Scanner for Jcoder tokens. Errors are reported to the environment object.<p>
*
* The scanner keeps track of the current token, the value of the current token (if any),
* and the start position of the current token.<p>
*
* The scan() method advances the scanner to the next token in the input.<p>
*
* The match() method is used to quickly match opening brackets (ie: '(', '{', or '[')
* with their closing counter part. This is useful during error recovery.<p>
*
* The compiler treats either "\n", "\r" or "\r\n" as the end of a line.<p>
*/
public class Scanner {
/*-------------------------------------------------------- */
/* Scanner Fields */
/**
* End of input
*/
public static final int EOF = -1;
public static final int LBRACE = 123; // "{"
private boolean debugCP = false;
private int numCPentrs = 0;
/**
* Where errors are reported
*/
protected SourceFile env;
/**
* Input stream
*/
protected SourceFile in;
HashMap<String, String> macros;
/**
* The current character
*/
protected int ch, prevCh = -1;
protected String macro;
protected int indexMacro;
/**
* Current token
*/
protected Token token;
/**
* The position of the current token
*/
protected int pos;
/**
* The position of the previous token
*/
protected int prevPos;
/* Token values. */
protected long longValue;
protected int intValue;
protected int intSize;
protected String stringValue;
protected ByteBuffer longStringValue;
protected int sign; // sign, when reading number
/* A doc comment preceding the most recent token */
protected String docComment;
/**
* A growable character buffer.
*/
private int count;
private char buffer[] = new char[32];
/*-------------------------------------------------------- */
/**
* Create a scanner to scan an input stream.
*/
protected Scanner(SourceFile sf, HashMap<String, String> macros)
throws IOException {
this.env = sf;
this.in = sf;
this.macros = macros;
ch = sf.read();
prevPos = sf.pos;
scan();
}
/**
* for use in jcfront.
*/
protected Scanner(SourceFile sf)
throws IOException {
this.env = sf;
this.in = sf;
this.macros = new HashMap<>();
ch = sf.read();
prevPos = sf.pos;
scan();
}
/* *********************************************** */
void setDebugCP(boolean enable) {
if (enable) {
numCPentrs = 0;
}
debugCP = enable;
}
void addConstDebug(ConstType ct) {
numCPentrs += 1;
env.traceln("\n Const[" + numCPentrs + "] = " + ct.printval());
}
void setMacro(String macro) {
this.macro = macro;
indexMacro = 0;
prevCh = ch;
}
void readCh() throws IOException {
if (macro != null) {
if (indexMacro < macro.length()) {
ch = macro.charAt(indexMacro);
}
macro = null;
}
if (prevCh >= 0) {
ch = prevCh;
prevCh = -1;
} else {
ch = in.read();
}
}
private void putc(int ch) {
if (count == buffer.length) {
char newBuffer[] = new char[buffer.length * 2];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
}
buffer[count++] = (char) ch;
}
private String bufferString() {
char buf[] = new char[count];
System.arraycopy(buffer, 0, buf, 0, count);
return new String(buf);
}
/**
* Scan a comment. This method should be called once the initial /, * and the next
* character have been read.
*/
private void skipComment() throws IOException {
while (true) {
switch (ch) {
case EOF:
env.error(pos, "eof.in.comment");
return;
case '*':
readCh();
if (ch == '/') {
readCh();
return;
}
break;
default:
readCh();
break;
}
}
}
/**
* Scan a doc comment. This method should be called once the initial /, * and * have
* been read. It gathers the content of the comment (witout leading spaces and '*'s)
* in the string buffer.
*/
private String scanDocComment() throws IOException {
count = 0;
if (ch == '*') {
do {
readCh();
} while (ch == '*');
if (ch == '/') {
readCh();
return "";
}
}
switch (ch) {
case '\n':
case ' ':
readCh();
break;
}
boolean seenstar = false;
int c = count;
while (true) {
switch (ch) {
case EOF:
env.error(pos, "eof.in.comment");
return bufferString();
case '\n':
putc('\n');
readCh();
seenstar = false;
c = count;
break;
case ' ':
case '\t':
putc(ch);
readCh();
break;
case '*':
if (seenstar) {
readCh();
if (ch == '/') {
readCh();
count = c;
return bufferString();
}
putc('*');
} else {
seenstar = true;
count = c;
do {
readCh();
} while (ch == '*');
switch (ch) {
case ' ':
readCh();
break;
case '/':
readCh();
count = c;
return bufferString();
}
}
break;
default:
if (!seenstar) {
seenstar = true;
}
putc(ch);
readCh();
c = count;
break;
}
}
}
/**
* Scan a decimal number
*/
private void scanDecNumber() throws IOException {
boolean overflow = false;
long value = ch - '0';
count = 0;
token = Token.INTVAL;
intSize = 2; // default
putc(ch); // save character in buffer
numberLoop:
for (;;) {
readCh();
switch (ch) {
case '8':
case '9':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
putc(ch);
overflow = overflow || (value * 10) / 10 != value;
value = (value * 10) + (ch - '0');
overflow = overflow || (value - 1 < -1);
break;
case 'b':
readCh();
intSize = 1;
break numberLoop;
case 's':
readCh();
intSize = 2;
break numberLoop;
case 'i':
readCh();
intSize = 4;
break numberLoop;
case 'l':
readCh();
intSize = 8;
break numberLoop;
default:
break numberLoop;
}
}
longValue = value;
intValue = (int) value;
// we have just finished reading the number. The next thing better
// not be a letter or digit.
if (Character.isJavaIdentifierPart((char) ch) || ch == '.') {
env.error(in.pos, "invalid.number", (new Character((char) ch)).toString());
do {
readCh();
} while (Character.isJavaIdentifierPart((char) ch) || ch == '.');
return;
}
if (overflow) {
env.error(pos, "overflow");
}
} // scanNumber()
/**
* Scan a hex number.
*/
private void scanHexNumber() throws IOException {
boolean overflow = false;
long value = 0;
int cypher;
count = 0;
token = Token.INTVAL;
intSize = 2; // default
putc(ch); // save character in buffer
numberLoop:
for (int k = 0;; k++) {
readCh();
switch (ch) {
case '8':
case '9':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
cypher = (char) ch - '0';
break;
case 'd':
case 'D':
case 'e':
case 'E':
case 'f':
case 'F':
case 'a':
case 'A':
case 'b':
case 'B':
case 'c':
case 'C':
cypher = 10 + Character.toLowerCase((char) ch) - 'a';
break;
default:
break numberLoop;
}
putc(ch);
overflow = overflow || ((value >>> 60) != 0);
value = (value << 4) + cypher;
intSize = (k + 1) / 2;
}
longValue = value;
intValue = (int) value;
// we have just finished reading the number. The next thing better
// not be a letter or digit.
if (Character.isJavaIdentifierPart((char) ch) || ch == '.') {
env.error(in.pos, "invalid.number", (new Character((char) ch)).toString());
do {
readCh();
} while (Character.isJavaIdentifierPart((char) ch) || ch == '.');
intValue = 0;
// } else if ( overflow || (intValue - 1 < -1) ) {
} else if (overflow) {
intValue = 0; // so we don't get second overflow in Parser
env.error(pos, "overflow");
}
} // scanNumber()
/**
* Scan an escape character.
*
* @return the character or -1 if it escaped an end-of-line.
*/
private int scanEscapeChar() throws IOException {
int p = in.pos;
readCh();
switch (ch) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
int n = ch - '0';
for (int i = 2; i > 0; i--) {
readCh();
switch (ch) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
n = (n << 3) + ch - '0';
break;
default:
if (n > 0xFF) {
env.error(p, "invalid.escape.char");
}
return n;
}
}
readCh();
if (n > 0xFF) {
env.error(p, "invalid.escape.char");
}
return n;
}
case '@':
readCh();
return '@';
case ':':
readCh();
return ':';
case 'r':
readCh();
return '\r';
case 'n':
readCh();
return '\n';
case 'f':
readCh();
return '\f';
case 'b':
readCh();
return '\b';
case 't':
readCh();
return '\t';
case '\\':
readCh();
return '\\';
case '\"':
readCh();
return '\"';
case '\'':
readCh();
return '\'';
}
env.error(p, "invalid.escape.char");
readCh();
return -1;
}
/**
* Scan a string. The current character should be the opening " of the string.
*/
private void scanString() throws IOException {
token = Token.STRINGVAL;
count = 0;
readCh();
loop:
for (;;) {
switch (ch) {
case EOF:
env.error(pos, "eof.in.string");
break loop;
case '\n':
readCh();
env.error(pos, "newline.in.string");
break loop;
case '"':
readCh();
break loop;
case '\\': {
int c = scanEscapeChar();
if (c >= 0) {
char ch = (char)c;
if ( ch == '@' || ch == ':' || ch == '\\') {
putc('\\');
}
putc(ch);
}
break;
}
default:
putc(ch);
readCh();
break;
}
}
stringValue = bufferString();
}
/**
* Scan a character array. The current character should be the opening ' of the array.
*/
private void scanCharArray() throws IOException {
token = Token.LONGSTRINGVAL;
ByteBuffer buf = new ByteBuffer();
count = 0;
readCh();
loop:
for (;;) {
int c = ch;
switch (ch) {
case EOF:
env.error(pos, "eof.in.string");
break loop;
case '\n':
readCh();
env.error(pos, "newline.in.string");
break loop;
case '\'':
readCh();
break loop;
case '\\':
c = scanEscapeChar();
if (c < 0) {
break;
}
// no break - continue
default:
// see description of java.io.DataOutput.writeUTF()
if ((c > 0) && (c <= 0x7F)) {
buf.write(c);
} else if ((c == 0) || ((c >= 0x80) && (c <= 0x7FF))) {
buf.write(0xC0 | (0x1F & (c >> 6)));
buf.write(0x80 | (0x3f & c));
} else {
buf.write(0xc0 | (0x0f & (c >> 12)));
buf.write(0x80 | (0x3f & (c >> 6)));
buf.write(0x80 | (0x3f & c));
}
readCh();
}
}
longStringValue = buf;
}
/**
* Scan an Identifier. The current character should be the first character of the
* identifier.
*/
private void scanIdentifier() throws IOException {
count = 0;
boolean compound = false;
for (;;) {
putc(ch);
readCh();
//env.traceln(" read:"+(char)ch);
if ((ch == '/') || (ch == '.') || (ch == '-')) {
//env.traceln(" =>compound");
compound = true;
continue;
} else if (!Character.isJavaIdentifierPart((char) ch)) {
break;
}
}
//env.traceln(" end:"+(char)ch);
stringValue = bufferString();
if (compound) {
token = Token.IDENT;
} else {
token = keyword_token_ident(stringValue);
if (token == Token.IDENT) {
intValue = constValue(stringValue);
if (intValue != -1) {
// this is a constant
if (debugCP) {
ConstType ct = constType(stringValue);
if (ct != null) {
addConstDebug(ct);
}
}
token = Token.INTVAL;
intSize = 1;
longValue = intValue;
} else {
//env.traceln(" ^^^^^^^^ Massive Weirdness here: Can't locate IDENT '" + stringValue + "'. ^^^^^^^^^^");
}
}
}
} // end scanIdentifier
// skip till symbol
protected void skipTill(int sym) throws IOException {
while (true) {
if( ch == EOF ) {
env.error(pos, "eof.in.comment");
return;
} else if (ch == sym) {
return;
}
readCh();
}
}
protected int xscan() throws IOException {
int retPos = pos;
prevPos = in.pos;
docComment = null;
sign = 1;
for (;;) {
pos = in.pos;
switch (ch) {
case EOF:
token = Token.EOF;
return retPos;
case '\n':
case ' ':
case '\t':
case '\f':
readCh();
break;
case '/':
readCh();
switch (ch) {
case '/':
// Parse a // comment
do {
readCh();
} while ((ch != EOF) && (ch != '\n'));
break;
case '*':
readCh();
if (ch == '*') {
docComment = scanDocComment();
} else {
skipComment();
}
break;
default:
token = Token.DIV;
return retPos;
}
break;
case '"':
scanString();
return retPos;
case '\'':
scanCharArray();
return retPos;
case '-':
sign = -sign; // hack: no check that numbers only are allowed after
case '+':
readCh();
break;
case '0':
readCh();
token = Token.INTVAL;
longValue = intValue = 0;
switch (ch) {
case 'x':
case 'X':
scanHexNumber();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
scanDecNumber();
break;
case 'b':
readCh();
intSize = 1;
break;
case 's':
readCh();
intSize = 2;
break;
case 'i':
readCh();
intSize = 4;
break;
case 'l':
readCh();
intSize = 8;
break;
default:
intSize = 2;
}
return retPos;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
scanDecNumber();
return retPos;
case '{':
readCh();
token = Token.LBRACE;
return retPos;
case '}':
readCh();
token = Token.RBRACE;
return retPos;
case '(':
readCh();
token = Token.LPAREN;
return retPos;
case ')':
readCh();
token = Token.RPAREN;
return retPos;
case '[':
readCh();
token = Token.LSQBRACKET;
return retPos;
case ']':
readCh();
token = Token.RSQBRACKET;
return retPos;
case ',':
readCh();
token = Token.COMMA;
return retPos;
case ';':
readCh();
token = Token.SEMICOLON;
return retPos;
case ':':
readCh();
token = Token.COLON;
return retPos;
case '=':
readCh();
if (ch == '=') {
readCh();
token = Token.EQ;
return retPos;
}
token = Token.ASSIGN;
return retPos;
case '\u001a':
// Our one concession to DOS.
readCh();
if (ch == EOF) {
token = Token.EOF;
return retPos;
}
env.error(pos, "funny.char");
readCh();
break;
case '#':
readCh();
scanDecNumber();
return retPos;
case '&': {
readCh();
retPos = pos;
if (!Character.isJavaIdentifierStart((char) ch)) {
env.error(pos, "identifier.expected");
}
scanIdentifier();
String macroId = stringValue;
String macro = (String) macros.get(macroId);
if (macro == null) {
env.error(pos, "macro.undecl", macroId);
throw new SyntaxError();
}
setMacro(macro);
readCh();
}
break;
default:
if (Character.isJavaIdentifierStart((char) ch)) {
scanIdentifier();
return retPos;
}
env.error(pos, "funny.char");
readCh();
break;
}
}
}
/**
* Scan to a matching '}', ']' or ')'. The current token must be a '{', '[' or '(';
*/
protected void match(Token open, Token close) throws IOException {
int depth = 1;
while (true) {
scan();
if (token == open) {
depth++;
} else if (token == close) {
if (--depth == 0) {
return;
}
} else if (token == Token.EOF) {
env.error(pos, "unbalanced.paren");
return;
}
}
}
/**
* Scan the next token.
*
* @return the position of the previous token.
*/
protected int scan() throws IOException {
int retPos = xscan();
//env.traceln("scanned:"+token+" ("+keywordName(token)+")");
return retPos;
}
/**
* Scan the next token.
*
* @return the position of the previous token.
*/
protected int scanMacro() throws IOException {
int retPos = xscan();
//env.traceln("scanned:"+token+" ("+keywordName(token)+")");
return retPos;
}
}