Syntax

grammar KGSQL;

/**
 * Knowledge Graph System Query Language ANTLR4 Grammar.  
 */

@header {
package kgsql.parser;
}

root : command;

command : prologue ( selectQuery | askQuery
  | constructQuery | insertRequest | deleteRequest );

prologue : prefixDecl*;

prefixDecl : Prefix NameSpace Identifier;

selectQuery : Select Variable+ whereClause;

askQuery : Ask whereClause;

constructQuery : Construct patternBlock whereClause;

insertRequest : Insert patternBlock whereClause;

deleteRequest : Delete Variable+ whereClause;

whereClause : Where?
  '{' patternBlock? ( filter '.'? patternBlock? )* '}';

patternBlock : patternsSameSubject ( '.' patternBlock? )?;

filter : Filter constraint;

patternsSameSubject : ( noun | linkedList ) predicateList?;

predicateList : verb objectList ( ';' ( verb objectList )? )*;

objectList : object ( ',' object )*;

object : noun | linkedList;

noun : resourceOrVariable multiplicity?
  | '[' resourceOrVariable resourceOrVariable? typeUnion ']' multiplicity?;

verb : typeUnion multiplicity? | '[' resourceOrVariable typeUnion ']' multiplicity?;

typeUnion : Variable | prefixedName ( '|' prefixedName )*;

resourceOrVariable : prefixedName | typedLiteral | numericLiteral | True | False | Variable;

linkedList : '(' ( resourceOrVariable | linkedList )* ')'
  | '[' '(' ( resourceOrVariable | linkedList )* ')' prefixedName ']';

prefixedName : NameSpace LocalName;

constraint : '(' expression ')' | LocalName '(' expressionList? ')';

expressionList : expression ( ',' expression )*;

expression : conditionalAndExpression ( '||' conditionalAndExpression )*;

conditionalAndExpression : relationalExpression ( '&&' relationalExpression )*;

relationalExpression : additiveExpression ( '=' additiveExpression | '!=' additiveExpression
  | '<' additiveExpression | '>' additiveExpression
  | '<=' additiveExpression | '>=' additiveExpression )?;

additiveExpression : multiplicativeExpression ('+' multiplicativeExpression
  | '-' multiplicativeExpression | NatPositive | NatNegative | RealPositive | RealNegative )*;

multiplicativeExpression : unaryExpression ( '*' unaryExpression | '/' unaryExpression )*;

unaryExpression : primaryExpression | '+' primaryExpression | '-' primaryExpression | '!' primaryExpression;

primaryExpression : '(' expression ')' | LocalName '(' expressionList? ')' | resourceOrVariable | askQuery;

typedLiteral : Literal Lang? | '[' Literal Lang? prefixedName ']' | Literal Lang? '^^' prefixedName;

numericLiteral : Nat | NatPositive | NatNegative | UnsignedReal | RealPositive | RealNegative;

multiplicity : '{' integer '}' | '{' integer '..' integer '}';

integer : Nat | NatPositive | NatNegative | '*';

// Lexical Scanner Tokens

// In general, KGSQL is case-sensitive,
// but the following reserved words are case-insensitive:

Prefix : [Pp][Rr][Ee][Ff][Ii][Xx] WS;
Select : [Ss][Ee][Ll][Ee][Cc][Tt] WS;
Ask : [Aa][Ss][Kk] WS;
Construct : [Cc][Oo][Nn][Ss][Tt][Rr][Uu][Cc][Tt] WS;
Insert : [Ii][Nn][Ss][Ee][Rr][Tt] WS;
Delete : [Dd][Ee][Ll][Ee][Tt][Ee] WS;
Where : [Ww][Hh][Ee][Rr][Ee];
Filter : [Ff][Ii][Ll][Tt][Ee][Rr];
True : [Tt][Rr][Uu][Ee];
False : [Ff][Aa][Ll][Ss][Ee];

// Identifiers
// The forbidden characters in an identifier are: spaces " < > \ ^ ` { | }

Identifier : '<' (~[\u0000-\u0020\u0022\u003C\u003E\u005C\u005E\u0060\u007B\u007C\u007D])* '>';

// Language Codes

Lang : '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*;

// Numbers

fragment UnsignedDecimal : [0-9]+ '.' [0-9]+? | '.' [0-9]+;
fragment Exponent : [eE] [+-]? [0-9]+;
fragment UnsignedDouble : [0-9]+ '.' [0-9]+? Exponent | '.' [0-9]+ Exponent | [0-9]+ Exponent;

Nat : [0-9]+;
NatPositive : '+' Nat;
NatNegative : '-' Nat;
UnsignedReal : UnsignedDecimal | UnsignedDouble;
RealPositive : '+' UnsignedReal;
RealNegative : '-' UnsignedReal;

// String Literals

fragment EscapedCharacter : '\\' [tbnrf'"];
fragment StringLiteral1 : '\'' ( (~[\u0027\u005C\u000A\u000D]) | EscapedCharacter )* '\'';
fragment StringLiteral2 : '"' ( (~[\u0022\u005C\u000A\u000D]) | EscapedCharacter )* '"';
Literal : StringLiteral1 | StringLiteral2;

// Names

fragment CharsBase : [A-Z] | [a-z] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF]
 | [\u0370-\u037D] | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF]
 | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD];
fragment CharsU : CharsBase | '_';
fragment VarName : ( CharsU | [0-9] ) ( CharsU | [0-9] | [\u00B7] | [\u0300-\u036F] | [\u203F-\u2040] )*;
fragment Chars : CharsU | '-' | [0-9] | [\u00B7] | [\u0300-\u036F] | [\u203F-\u2040];
fragment PrefixName : CharsBase ( ( Chars | '.' )* Chars )?;

NameSpace : ':' | PrefixName ':';
LocalName : ( CharsU | [0-9] ) ( ( Chars | '.' )* Chars )?;
Variable : ( '?' | '$' ) VarName;

// Whitespace

fragment WS : (' '|'\t'|'\n'|'\r')+;
S :  WS -> skip;