import java.io.*; public class Lex { public static final int EOF = 0; public static final int STRING = 1; public static final int SYMBOL = 2; public static final int CHAR_LITERAL = 3; public static final int NUMBER = 4; public static final int DOC = 5; public static final int EQUAL = 30; public static final int PERIOD = 31; public static final int SLASH = 32; public static final int LEFTBRACE = 33; public static final int RIGHTBRACE = 34; public static final int COMMA = 35; public static final int SEMICOLON = 36; public static final int ASTERISK = 37; public static final int LEFTPAREN = 38; public static final int RIGHTPAREN = 39; public static final int LEFTBRACKET = 40; public static final int RIGHTBRACKET = 41; public static final int OCTOTHORPE = 42; public static final int ELIPSIS = 43; public static final int UNKNOWN = 1000; private DataInputStream in; private int tokenType; private StringBuffer token = new StringBuffer(); private char character; private boolean eofHit; private boolean tokenIsSymbol; private String fileName; private int lineNumber = 1; public Lex(String fileName) { this.fileName = fileName; try { FileInputStream fis = new FileInputStream(fileName); BufferedInputStream bis = new BufferedInputStream(fis); DataInputStream dis = new DataInputStream(bis); this.in = dis; character = ' '; eofHit = false; gtkn(); } catch(FileNotFoundException e) { System.out.println(e); eofHit = true; } } public Lex(File file) { this(file.getAbsolutePath()); } public int gtkn() { tokenIsSymbol = false; token.setLength(0); if(eofHit) return(tokenType = EOF); while(Character.isWhitespace(character)) gc(); if(eofHit) return(tokenType = EOF); if(Character.isLetter(character) || (character == '_')) { tokenIsSymbol = true; while(Character.isLetterOrDigit(character) || (character == '_') || (character == '.')) pgc(); return(tokenType = Keyword.identify(token.toString())); } if(Character.isDigit(character)) { while(Character.isLetterOrDigit(character)) pgc(); return(tokenType = NUMBER); } switch(character) { case '"': gc(); while(character != '"') { if(character == '\\') { pgc(); if(character == '\\') { gc(); } else { pgc(); } } else { pgc(); } } gc(); return(tokenType = STRING); case '\'': gc(); while(character != '\'') { if(character == '\\') { pgc(); if(character == '\\') { gc(); } else { pgc(); } } else { pgc(); } } gc(); return(tokenType = CHAR_LITERAL); case '/': pgc(); if(character == '*') { pgc(); if(character == '*') { return(getDoc()); } else { skipPast('*','/'); return(gtkn()); } } else if(character == '/') { skipPast('\n'); return(gtkn()); } tokenType = SLASH; return(tokenType); case '*': pgc(); return(tokenType = ASTERISK); case ',': pgc(); return(tokenType = COMMA); case ';': pgc(); return(tokenType = SEMICOLON); case '=': pgc(); return(tokenType = EQUAL); case '.': pgc(); if(character == '.') { while(character == '.') pgc(); return(tokenType = ELIPSIS); } return(tokenType = PERIOD); case '{': pgc(); return(tokenType = LEFTBRACE); case '}': pgc(); return(tokenType = RIGHTBRACE); case '(': pgc(); return(tokenType = LEFTPAREN); case ')': pgc(); return(tokenType = RIGHTPAREN); case '[': pgc(); return(tokenType = LEFTBRACKET); case ']': pgc(); return(tokenType = RIGHTBRACKET); case '#': /* Skip past the pre-processor command */ skipToEndOfLine(); return(gtkn()); default: tokenType = UNKNOWN; pgc(); return(tokenType); } } public void skipPast(char c1,char c2) { while(!eofHit) { skipPast(c1); if(character == c2) { gc(); return; } } } public void skipPast(char c) { while(!eofHit) { if(character == c) { gc(); return; } gc(); } } /** Read and skip every character until the end of line is found. This is done by monitoring the line counter. */ public void skipToEndOfLine() { int startLine = lineNumber; while((startLine == lineNumber) && (!eofHit)) gc(); } /** Skip to either the end of file or until the token is of the specified type. */ public void skipTo(int type) { while((!eofHit) && (tokenType != type)) gtkn(); } /** Skip to the end of file or until the token is of either of the specified types. */ public void skipTo(int type1,int type2) { while((!eofHit) && (tokenType != type1) && (tokenType != type2)) gtkn(); } /** The current token is of a type that can contain stuff. Skip forward until its partner is found. Leave the found partner as the current token and return. */ public void skipNested() throws SyntaxException { int braceDepth = 0; int parenDepth = 0; int bracketDepth = 0; if(tokenType == LEFTBRACE) braceDepth = 1; if(tokenType == LEFTPAREN) parenDepth = 1; if(tokenType == LEFTBRACKET) bracketDepth = 1; while((braceDepth + parenDepth + bracketDepth) != 0) { switch(gtkn()) { case LEFTBRACE: braceDepth++; break; case LEFTPAREN: parenDepth++; break; case LEFTBRACKET: bracketDepth++; break; case RIGHTBRACE: braceDepth--; break; case RIGHTPAREN: parenDepth--; break; case RIGHTBRACKET: bracketDepth--; break; case EOF: expected("closing paren, brace, or bracket"); break; } } } /** Skip to a specific character while ignoring all characters inside parentheses, brackets, or braces. */ public void skipNestedTo(int type) throws SyntaxException { int braceDepth = 0; int parenDepth = 0; int bracketDepth = 0; while((!eofHit) && (tokenType != type)) { if(tokenType == LEFTBRACE) braceDepth = 1; if(tokenType == LEFTPAREN) parenDepth = 1; if(tokenType == LEFTBRACKET) bracketDepth = 1; if((braceDepth + parenDepth + bracketDepth) != 0) { while((braceDepth + parenDepth + bracketDepth) != 0) { switch(gtkn()) { case LEFTBRACE: braceDepth++; break; case LEFTPAREN: parenDepth++; break; case LEFTBRACKET: bracketDepth++; break; case RIGHTBRACE: braceDepth--; break; case RIGHTPAREN: parenDepth--; break; case RIGHTBRACKET: bracketDepth--; break; case EOF: expected("closing paren, brace, or bracket"); break; } } } else { gtkn(); } } } /** Two characters are in the new token: a slash and an asterisk. The current character is an asterisk. Continue the scan until the entire comment block has been transferred to the current token. */ private int getDoc() { while(!eofHit) { pgc(); if(character == '*') { while(character == '*') pgc(); if(character == '/') { pgc(); return(tokenType = DOC); } } } return(tokenType = EOF); } private final void pgc() { token.append(character); gc(); } private void gc() { if(eofHit) { character = '\u0000'; return; } try { character = (char)in.readByte(); if(character == '\n') ++lineNumber; } catch(EOFException e) { character = '\u0000'; eofHit = true; try { in.close(); } catch(IOException ex) { System.out.println(ex); } } catch(IOException e) { System.out.println(e); System.exit(1); } } public String getToken() { return(token.toString()); } public int getTokenType() { return(tokenType); } public boolean isSymbol() { return(tokenIsSymbol); } /** Set to the end of file condition just as if the entire file had already been read. */ public void setToEof() { eofHit = true; tokenType = EOF; } public void expected(String expect) throws SyntaxException { SyntaxException e = new SyntaxException(); e.setLine("Line " + lineNumber + " of " + fileName); e.setExpected("Expected " + expect); if(eofHit) e.setFound("Found EOF"); else e.setFound("Found " + token.toString()); throw(e); } public String toString() { return("type=" + tokenType + " token=\"" + token.toString() + "\""); } }