From 829234bfb8d481413bf0466562fe87e8c396defd Mon Sep 17 00:00:00 2001 From: Graham Anderson Date: Thu, 16 Aug 2012 12:04:00 +0000 Subject: [PATCH 17/19] Go language support --- go.c | 670 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ parsers.h | 2 + source.mak | 2 + 3 files changed, 674 insertions(+) create mode 100644 go.c diff --git a/go.c b/go.c new file mode 100644 index 0000000..6bd3a36 --- /dev/null +++ b/go.c @@ -0,0 +1,670 @@ +/* +* INCLUDE FILES +*/ +#include "general.h" /* must always come first */ +#include + +#include "debug.h" +#include "entry.h" +#include "keyword.h" +#include "read.h" +#include "main.h" +#include "routines.h" +#include "vstring.h" +#include "options.h" + +/* + * MACROS + */ +#define isType(token,t) (boolean) ((token)->type == (t)) +#define isKeyword(token,k) (boolean) ((token)->keyword == (k)) + +/* + * DATA DECLARATIONS + */ + +typedef enum eException { ExceptionNone, ExceptionEOF } exception_t; + +typedef enum eKeywordId { + KEYWORD_NONE = -1, + KEYWORD_package, + KEYWORD_import, + KEYWORD_const, + KEYWORD_type, + KEYWORD_var, + KEYWORD_func, + KEYWORD_struct, + KEYWORD_interface, + KEYWORD_map, + KEYWORD_chan +} keywordId; + +/* Used to determine whether keyword is valid for the current language and + * what its ID is. + */ +typedef struct sKeywordDesc { + const char *name; + keywordId id; +} keywordDesc; + +typedef enum eTokenType { + TOKEN_NONE = -1, + TOKEN_CHARACTER, + // Don't need TOKEN_FORWARD_SLASH + TOKEN_FORWARD_SLASH, + TOKEN_KEYWORD, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_OPEN_PAREN, + TOKEN_CLOSE_PAREN, + TOKEN_OPEN_CURLY, + TOKEN_CLOSE_CURLY, + TOKEN_OPEN_SQUARE, + TOKEN_CLOSE_SQUARE, + TOKEN_SEMICOLON, + TOKEN_STAR, + TOKEN_LEFT_ARROW, + TOKEN_DOT, + TOKEN_COMMA +} tokenType; + +typedef struct sTokenInfo { + tokenType type; + keywordId keyword; + vString *string; /* the name of the token */ + unsigned long lineNumber; /* line number of tag */ + fpos_t filePosition; /* file position of line containing name */ +} tokenInfo; + +/* +* DATA DEFINITIONS +*/ + +static int Lang_go; +static jmp_buf Exception; +static vString *scope; + +typedef enum { + GOTAG_UNDEFINED = -1, + GOTAG_PACKAGE, + GOTAG_FUNCTION, + GOTAG_CONST, + GOTAG_TYPE, + GOTAG_VAR, +} goKind; + +static kindOption GoKinds[] = { + {TRUE, 'p', "package", "packages"}, + {TRUE, 'f', "func", "functions"}, + {TRUE, 'c', "const", "constants"}, + {TRUE, 't', "type", "types"}, + {TRUE, 'v', "var", "variables"} +}; + +static keywordDesc GoKeywordTable[] = { + {"package", KEYWORD_package}, + {"import", KEYWORD_import}, + {"const", KEYWORD_const}, + {"type", KEYWORD_type}, + {"var", KEYWORD_var}, + {"func", KEYWORD_func}, + {"struct", KEYWORD_struct}, + {"interface", KEYWORD_interface}, + {"map", KEYWORD_map}, + {"chan", KEYWORD_chan} +}; + +/* +* FUNCTION DEFINITIONS +*/ + +// XXX UTF-8 +static boolean isIdentChar (const int c) +{ + return (boolean) + (isalpha (c) || isdigit (c) || c == '$' || + c == '@' || c == '_' || c == '#' || c > 128); +} + +static void initialize (const langType language) +{ + size_t i; + const size_t count = + sizeof (GoKeywordTable) / sizeof (GoKeywordTable[0]); + Lang_go = language; + for (i = 0; i < count; ++i) + { + const keywordDesc *const p = &GoKeywordTable[i]; + addKeyword (p->name, language, (int) p->id); + } +} + +static tokenInfo *newToken (void) +{ + tokenInfo *const token = xMalloc (1, tokenInfo); + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + token->string = vStringNew (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + return token; +} + +static void deleteToken (tokenInfo * const token) +{ + if (token != NULL) + { + vStringDelete (token->string); + eFree (token); + } +} + +/* + * Parsing functions + */ + +static void parseString (vString *const string, const int delimiter) +{ + boolean end = FALSE; + while (!end) + { + int c = fileGetc (); + if (c == EOF) + end = TRUE; + else if (c == '\\' && delimiter != '`') + { + c = fileGetc (); /* This maybe a ' or ". */ + vStringPut (string, c); + } + else if (c == delimiter) + end = TRUE; + else + vStringPut (string, c); + } + vStringTerminate (string); +} + +static void parseIdentifier (vString *const string, const int firstChar) +{ + int c = firstChar; + //Assert (isIdentChar (c)); + do + { + vStringPut (string, c); + c = fileGetc (); + } while (isIdentChar (c)); + vStringTerminate (string); + fileUngetc (c); /* always unget, LF might add a semicolon */ +} + +static void readToken (tokenInfo *const token) +{ + int c; + static tokenType lastTokenType = TOKEN_NONE; + + token->type = TOKEN_NONE; + token->keyword = KEYWORD_NONE; + vStringClear (token->string); + +getNextChar: + do + { + c = fileGetc (); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || + lastTokenType == TOKEN_STRING || + lastTokenType == TOKEN_CLOSE_PAREN || + lastTokenType == TOKEN_CLOSE_CURLY || + lastTokenType == TOKEN_CLOSE_SQUARE)) + { + token->type = TOKEN_SEMICOLON; + goto done; + } + } + while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); + + switch (c) + { + case EOF: + longjmp (Exception, (int)ExceptionEOF); + break; + + case '/': + { + boolean hasNewline = FALSE; + int d = fileGetc (); + switch (d) + { + case '/': + fileSkipToCharacter ('\n'); + /* Line comments start with the + * character sequence // and + * continue through the next + * newline. A line comment acts + * like a newline. */ + fileUngetc ('\n'); + goto getNextChar; + case '*': + do + { + int d; + do + { + d = fileGetc (); + if (d == '\n') + { + hasNewline = TRUE; + } + } while (d != EOF && d != '*'); + + c = fileGetc (); + if (c == '/') + break; + else + fileUngetc (c); + } while (c != EOF && c != '\0'); + + fileUngetc (hasNewline ? '\n' : ' '); + goto getNextChar; + default: + token->type = TOKEN_FORWARD_SLASH; + fileUngetc (d); + break; + } + } + break; + + case '"': + case '\'': + case '`': + token->type = TOKEN_STRING; + parseString (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + break; + + case '<': + { + int d = fileGetc (); + if (d == '-') + { + token->type = TOKEN_LEFT_ARROW; + break; + } + else + goto getNextChar; + } + + case '(': + token->type = TOKEN_OPEN_PAREN; + break; + + case ')': + token->type = TOKEN_CLOSE_PAREN; + break; + + case '{': + token->type = TOKEN_OPEN_CURLY; + break; + + case '}': + token->type = TOKEN_CLOSE_CURLY; + break; + + case '[': + token->type = TOKEN_OPEN_SQUARE; + break; + + case ']': + token->type = TOKEN_CLOSE_SQUARE; + break; + + case '*': + token->type = TOKEN_STAR; + break; + + case '.': + token->type = TOKEN_DOT; + break; + + case ',': + token->type = TOKEN_COMMA; + break; + + default: + parseIdentifier (token->string, c); + token->lineNumber = getSourceLineNumber (); + token->filePosition = getInputFilePosition (); + token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); + if (isKeyword (token, KEYWORD_NONE)) + token->type = TOKEN_IDENTIFIER; + else + token->type = TOKEN_KEYWORD; + break; + } + +done: + lastTokenType = token->type; +} + +static void skipToMatched (tokenInfo *const token) +{ + int nest_level = 0; + tokenType open_token; + tokenType close_token; + + switch (token->type) + { + case TOKEN_OPEN_PAREN: + open_token = TOKEN_OPEN_PAREN; + close_token = TOKEN_CLOSE_PAREN; + break; + case TOKEN_OPEN_CURLY: + open_token = TOKEN_OPEN_CURLY; + close_token = TOKEN_CLOSE_CURLY; + break; + case TOKEN_OPEN_SQUARE: + open_token = TOKEN_OPEN_SQUARE; + close_token = TOKEN_CLOSE_SQUARE; + break; + default: + return; + } + + /* + * This routine will skip to a matching closing token. + * It will also handle nested tokens like the (, ) below. + * ( name varchar(30), text binary(10) ) + */ + if (isType (token, open_token)) + { + nest_level++; + while (!(isType (token, close_token) && (nest_level == 0))) + { + readToken (token); + if (isType (token, open_token)) + { + nest_level++; + } + if (isType (token, close_token)) + { + if (nest_level > 0) + { + nest_level--; + } + } + } + readToken (token); + } +} + +static void skipType (tokenInfo *const token) +{ +again: + // Type = TypeName | TypeLit | "(" Type ")" . + if (isType (token, TOKEN_OPEN_PAREN)) + { + skipToMatched (token); + return; + } + + // TypeName = QualifiedIdent. + // QualifiedIdent = [ PackageName "." ] identifier . + // PackageName = identifier . + if (isType (token, TOKEN_IDENTIFIER)) + { + readToken (token); + if (isType (token, TOKEN_DOT)) + { + readToken (token); + Assert (isType (token, TOKEN_IDENTIFIER)); + readToken (token); + } + return; + } + + // StructType = "struct" "{" { FieldDecl ";" } "}" + // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . + if (isKeyword (token, KEYWORD_struct) || isKeyword (token, KEYWORD_interface)) + { + readToken (token); + Assert (isType (token, TOKEN_OPEN_CURLY)); + skipToMatched (token); + return; + } + + // ArrayType = "[" ArrayLength "]" ElementType . + // SliceType = "[" "]" ElementType . + // ElementType = Type . + if (isType (token, TOKEN_OPEN_SQUARE)) + { + skipToMatched (token); + goto again; + } + + // PointerType = "*" BaseType . + // BaseType = Type . + // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType . + if (isType (token, TOKEN_STAR) || isKeyword (token, KEYWORD_chan) || isType (token, TOKEN_LEFT_ARROW)) + { + readToken (token); + goto again; + } + + // MapType = "map" "[" KeyType "]" ElementType . + // KeyType = Type . + if (isKeyword (token, KEYWORD_map)) + { + readToken (token); + Assert (isType (token, TOKEN_OPEN_SQUARE)); + skipToMatched (token); + goto again; + } + + // FunctionType = "func" Signature . + // Signature = Parameters [ Result ] . + // Result = Parameters | Type . + // Parameters = "(" [ ParameterList [ "," ] ] ")" . + if (isKeyword (token, KEYWORD_func)) + { + readToken (token); + Assert (isType (token, TOKEN_OPEN_PAREN)); + // Parameters + skipToMatched (token); + // Result is parameters or type or nothing. skipType treats anything + // surrounded by parentheses as a type, and does nothing if what + // follows is not a type. + goto again; + } +} + +// Skip to the next semicolon, skipping over matching brackets. +static void skipToTopLevelSemicolon (tokenInfo *const token) +{ + while (!isType (token, TOKEN_SEMICOLON)) + { + readToken (token); + skipToMatched (token); + } +} + +static void makeTag (tokenInfo *const token, const goKind kind) +{ + const char *const name = vStringValue (token->string); + + tagEntryInfo e; + initTagEntry (&e, name); + + if (!GoKinds [kind].enabled) + return; + + e.lineNumber = token->lineNumber; + e.filePosition = token->filePosition; + e.kindName = GoKinds [kind].name; + e.kind = GoKinds [kind].letter; + + makeTagEntry (&e); + + if (scope && Option.include.qualifiedTags) + { + vString *qualifiedName = vStringNew (); + vStringCopy (qualifiedName, scope); + vStringCatS (qualifiedName, "."); + vStringCat (qualifiedName, token->string); + e.name = vStringValue (qualifiedName); + makeTagEntry (&e); + vStringDelete (qualifiedName); + } +} + +static void parsePackage (tokenInfo *const token) +{ + tokenInfo *const name = newToken (); + + readToken (name); + Assert (isType (name, TOKEN_IDENTIFIER)); + makeTag (name, GOTAG_PACKAGE); + if (!scope && Option.include.qualifiedTags) + { + scope = vStringNew (); + vStringCopy (scope, name->string); + } + + deleteToken (name); +} + +static void parseFunctionOrMethod (tokenInfo *const token) +{ + // FunctionDecl = "func" identifier Signature [ Body ] . + // Body = Block. + // + // MethodDecl = "func" Receiver MethodName Signature [ Body ] . + // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" . + // BaseTypeName = identifier . + tokenInfo *const name = newToken (); + + // Skip over receiver. + readToken (name); + if (isType (name, TOKEN_OPEN_PAREN)) + skipToMatched (name); + + Assert (isType (name, TOKEN_IDENTIFIER)); + + // Skip over parameters. + readToken (token); + skipToMatched (token); + + // Skip over result. + skipType (token); + + // Skip over function body. + if (isType (token, TOKEN_OPEN_CURLY)) + skipToMatched (token); + + makeTag (name, GOTAG_FUNCTION); + + deleteToken (name); +} + +static void parseConstTypeVar (tokenInfo *const token, goKind kind) +{ + // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . + // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . + // IdentifierList = identifier { "," identifier } . + // ExpressionList = Expression { "," Expression } . + // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . + // TypeSpec = identifier Type . + // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . + // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . + tokenInfo *const name = newToken (); + boolean usesParens = FALSE; + + readToken (name); + + if (isType (name, TOKEN_OPEN_PAREN)) + { + usesParens = TRUE; + readToken (name); + } + +again: + while (1) + { + makeTag (name, kind); + readToken (token); + if (!isType (token, TOKEN_COMMA) && !isType (token, TOKEN_CLOSE_PAREN)) + break; + readToken (name); + } + + skipType (token); + skipToTopLevelSemicolon (token); + + if (usesParens) + { + readToken (name); + if (!isType (name, TOKEN_CLOSE_PAREN)) + goto again; + } + + deleteToken (name); +} + +static void parseGoFile (tokenInfo *const token) +{ + do + { + readToken (token); + + if (isType (token, TOKEN_KEYWORD)) + { + switch (token->keyword) + { + case KEYWORD_package: + parsePackage (token); + break; + case KEYWORD_func: + parseFunctionOrMethod (token); + break; + case KEYWORD_const: + parseConstTypeVar (token, GOTAG_CONST); + break; + case KEYWORD_type: + parseConstTypeVar (token, GOTAG_TYPE); + break; + case KEYWORD_var: + parseConstTypeVar (token, GOTAG_VAR); + break; + default: + break; + } + } + } while (TRUE); +} + +static void findGoTags (void) +{ + tokenInfo *const token = newToken (); + exception_t exception; + + exception = (exception_t) (setjmp (Exception)); + while (exception == ExceptionNone) + parseGoFile (token); + + deleteToken (token); + vStringDelete (scope); + scope = NULL; +} + +extern parserDefinition *GoParser (void) +{ + static const char *const extensions[] = { "go", NULL }; + parserDefinition *def = parserNew ("Go"); + def->kinds = GoKinds; + def->kindCount = KIND_COUNT (GoKinds); + def->extensions = extensions; + def->parser = findGoTags; + def->initialize = initialize; + return def; +} diff --git a/parsers.h b/parsers.h index e4a50e1..b150506 100644 --- a/parsers.h +++ b/parsers.h @@ -32,6 +32,7 @@ ErlangParser, \ FlexParser, \ FortranParser, \ + GoParser, \ HtmlParser, \ JavaParser, \ JavaScriptParser, \ @@ -76,6 +77,7 @@ FlexParser, \ FortranParser, \ GenieParser, \ + GoParser, \ HtmlParser, \ JavaParser, \ JavaScriptParser, \ diff --git a/source.mak b/source.mak index 3e5f740..d4c4c26 100644 --- a/source.mak +++ b/source.mak @@ -24,6 +24,7 @@ SOURCES = \ flex.c \ fortran.c \ get.c \ + go.c \ html.c \ jscript.c \ keyword.c \ @@ -86,6 +87,7 @@ OBJECTS = \ flex.$(OBJEXT) \ fortran.$(OBJEXT) \ get.$(OBJEXT) \ + go.$(OBJEXT) \ html.$(OBJEXT) \ jscript.$(OBJEXT) \ keyword.$(OBJEXT) \ -- 2.11.0