// LICENSE AT END OF FILE (MIT).

/*
** Weclome to Metadesk!
** 
** Metadesk is a data description language designed to look like a programming
** language, and this is the accompanying parser library. While you are free to
** use it however you see fit, here are a couple of the uses we have intended
** to support:
**  + quickly writing a C or C++ metaprogram from scratch
**  + building "low budget" domain specific languages, such as marked-up
**    webpage content, or asset metadata
**  + creating robust and flexible config systems for applications
** 
** If it's your first time with Metadesk, check out the "How to Build" section
** below, and consider looking at the examples included with the library. The
** examples_directory.txt will help you find your way from the intro examples
** through all the more advanced aspects of the library you might like to
** learn about.
**
** Direct issues, questions, suggestions, requests, etc to:
** https://github.com/Dion-Systems/metadesk
** 
**
** How To Build:
**
** The library is set up as a direct source-include library, so if you have a
** single unit build you can just #include "md.h" and "md.c". If you have a
** multiple unit build you can #include "md.h" where necessary and add "md.c"
** as a separate compilation unit (extra care has to be taken if you intend to
** use overrides in a multiple unit build).
**
** See `bin/compile_flags.txt` for the flags to build with.
**
** The tests and examples can be built with the bash scripts in bin. There are
** a few things to know to use these scripts:
**  1. First you should run `bld_init.sh` which will initialize your copy of
**     Metadesk's build system.
**  2. On Linux the shell scripts should work as written. On Windows you will
**     need to use a bash interpreter specifically. Generally the `bash.exe`
**     that comes with an install of git on Windows works well for this.
**     Add it to your path or setup a batch script that calls it and then
**     pass the bash scripts to the interpreter to build.
**  3. You should be able to run the scripts:
**      `build_tests.sh`
**      `build_examples.sh`
**      `run_tests.sh`
**      `run_examples.sh`
**      `type_metadata_example.sh`
*/

#ifndef MD_H
#define MD_H

#define VERSION_MAJ 1
#define VERSION_MIN 0

//~ Set default values for controls
#if !defined(DEFAULT_BASIC_TYPES)
# define DEFAULT_BASIC_TYPES 1
#endif
#if !defined(DEFAULT_MEMSET)
# define DEFAULT_MEMSET 1
#endif
#if !defined(DEFAULT_FILE_LOAD)
# define DEFAULT_FILE_LOAD 1
#endif
#if !defined(DEFAULT_FILE_ITER)
# define DEFAULT_FILE_ITER 1
#endif
#if !defined(DEFAULT_MEMORY)
# define DEFAULT_MEMORY 1
#endif
#if !defined(DEFAULT_ARENA)
# define DEFAULT_ARENA 1
#endif
#if !defined(DEFAULT_SCRATCH)
# define DEFAULT_SCRATCH 1
#endif
#if !defined(DEFAULT_SPRINTF)
# define DEFAULT_SPRINTF 1
#endif

#if !defined(DISABLE_PRINT_HELPERS)
# define DISABLE_PRINT_HELPERS 0
#endif


//~/////////////////////////////////////////////////////////////////////////////
////////////////////////////// Context Cracking ////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

#if defined(__clang__)

# define COMPILER_CLANG 1

# if defined(__APPLE__) && defined(__MACH__)
#  define OS_MAC 1
# elif defined(__gnu_linux__)
#  define OS_LINUX 1
# elif defined(_WIN32)
#  define OS_WINDOWS 1
# else
#  error This compiler/platform combo is not supported yet
# endif

# if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64)
#  define ARCH_X64 1
# elif defined(i386) || defined(__i386) || defined(__i386__)
#  define ARCH_X86 1
# elif defined(__aarch64__)
#  define ARCH_ARM64 1
# elif defined(__arm__)
#  define ARCH_ARM32 1
# else
#  error architecture not supported yet
# endif

#elif defined(_MSC_VER)

# define COMPILER_CL 1

# if defined(_WIN32)
#  define OS_WINDOWS 1
# else
#  error This compiler/platform combo is not supported yet
# endif

# if defined(_M_AMD64)
#  define ARCH_X64 1
# elif defined(_M_IX86)
#  define ARCH_X86 1
# elif defined(_M_ARM64)
#  define ARCH_ARM64 1
# elif defined(_M_ARM)
#  define ARCH_ARM32 1
# else
#  error architecture not supported yet
# endif

# if _MSC_VER >= 1920
#  define COMPILER_CL_YEAR 2019
# elif _MSC_VER >= 1910
#  define COMPILER_CL_YEAR 2017
# elif _MSC_VER >= 1900
#  define COMPILER_CL_YEAR 2015
# elif _MSC_VER >= 1800
#  define COMPILER_CL_YEAR 2013
# elif _MSC_VER >= 1700
#  define COMPILER_CL_YEAR 2012
# elif _MSC_VER >= 1600
#  define COMPILER_CL_YEAR 2010
# elif _MSC_VER >= 1500
#  define COMPILER_CL_YEAR 2008
# elif _MSC_VER >= 1400
#  define COMPILER_CL_YEAR 2005
# else
#  define COMPILER_CL_YEAR 0
# endif

#elif defined(__GNUC__) || defined(__GNUG__)

# define COMPILER_GCC 1

# if defined(__gnu_linux__)
#  define OS_LINUX 1
# else
#  error This compiler/platform combo is not supported yet
# endif

# if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64)
#  define ARCH_X64 1
# elif defined(i386) || defined(__i386) || defined(__i386__)
#  define ARCH_X86 1
# elif defined(__aarch64__)
#  define ARCH_ARM64 1
# elif defined(__arm__)
#  define ARCH_ARM32 1
# else
#  error architecture not supported yet
# endif

#else
# error This compiler is not supported yet
#endif

#if defined(ARCH_X64)
# define ARCH_64BIT 1
#elif defined(ARCH_X86)
# define ARCH_32BIT 1
#endif

#if defined(__cplusplus)
# define LANG_CPP 1

// We can't get this 100% correct thanks to Microsoft's compiler.
// So this check lets us pre-define CPP_VERSION if we have to.
# if !defined(CPP_VERSION)
#  if defined(COMPILER_CL)
// CL is annoying and didn't update __cplusplus over time
// If it is available _MSVC_LANG serves the same role
#   if defined(_MSVC_LANG)
#    if _MSVC_LANG <= 199711L
#     define CPP_VERSION 98
#    elif _MSVC_LANG <= 201103L
#     define CPP_VERSION 11
#    elif _MSVC_LANG <= 201402L
#     define CPP_VERSION 14
#    elif _MSVC_LANG <= 201703L
#     define CPP_VERSION 17
#    elif _MSVC_LANG <= 202002L
#     define CPP_VERSION 20
#    else
#     define CPP_VERSION 23
#    endif
// If we don't have _MSVC_LANG we can guess from the compiler version
#   else
#    if COMPILER_CL_YEAR <= 2010
#     define CPP_VERSION 98
#    elif COMPILER_CL_YEAR <= 2015
#     define CPP_VERSION 11
#    else
#     define CPP_VERSION 17
#    endif
#   endif
#  else
// Other compilers use __cplusplus correctly
#   if __cplusplus <= 199711L
#    define CPP_VERSION 98
#   elif __cplusplus <= 201103L
#    define CPP_VERSION 11
#   elif __cplusplus <= 201402L
#    define CPP_VERSION 14
#   elif __cplusplus <= 201703L
#    define CPP_VERSION 17
#   elif __cplusplus <= 202002L
#    define CPP_VERSION 20
#   else
#    define CPP_VERSION 23
#   endif
#  endif
# endif

#else
# define LANG_C 1
#endif

// zeroify

#if !defined(ARCH_32BIT)
# define ARCH_32BIT 0
#endif
#if !defined(ARCH_64BIT)
# define ARCH_64BIT 0
#endif
#if !defined(ARCH_X64)
# define ARCH_X64 0
#endif
#if !defined(ARCH_X86)
# define ARCH_X86 0
#endif
#if !defined(ARCH_ARM64)
# define ARCH_ARM64 0
#endif
#if !defined(ARCH_ARM32)
# define ARCH_ARM32 0
#endif
#if !defined(COMPILER_CL)
# define COMPILER_CL 0
#endif
#if !defined(COMPILER_GCC)
# define COMPILER_GCC 0
#endif
#if !defined(COMPILER_CLANG)
# define COMPILER_CLANG 0
#endif
#if !defined(OS_WINDOWS)
# define OS_WINDOWS 0
#endif
#if !defined(OS_LINUX)
# define OS_LINUX 0
#endif
#if !defined(OS_MAC)
# define OS_MAC 0
#endif
#if !defined(LANG_C)
# define LANG_C 0
#endif
#if !defined(LANG_CPP)
# define LANG_CPP 0
#endif
#if !defined(CPP_VERSION)
# define CPP_VERSION 0
#endif

#if LANG_CPP
# define ZERO_STRUCT {}
#else
# define ZERO_STRUCT {0}
#endif

#if LANG_C
# define C_LINKAGE_BEGIN
# define C_LINKAGE_END
#else
# define C_LINKAGE_BEGIN extern "C"{
# define C_LINKAGE_END }
#endif

#if COMPILER_CL
# define THREAD_LOCAL __declspec(thread)
#elif COMPILER_GCC || COMPILER_CLANG
# define THREAD_LOCAL __thread
#endif

//~/////////////////////////////////////////////////////////////////////////////
///////////////////////////// Helpers, Macros, Etc /////////////////////////////
////////////////////////////////////////////////////////////////////////////////

//~ Linkage Wrappers

#if !defined(FUNCTION)
# define FUNCTION
#endif

#if !defined(GLOBAL)
# define GLOBAL static
#endif

//~ Basic Utilities

#define Assert(c) if (!(c)) { *(volatile u64 *)0 = 0; }
#define StaticAssert(c,label) u8 static_assert_##label[(c)?(1):(-1)]
#define ArrayCount(a) (sizeof(a) / sizeof((a)[0]))

#define Min(a,b) (((a)<(b))?(a):(b))
#define Max(a,b) (((a)>(b))?(a):(b))
#define ClampBot(a,b) Max(a,b)
#define ClampTop(a,b) Min(a,b)

#define AlignPow2(x,b) (((x)+((b)-1))&(~((b)-1)))

//~ Linked List Macros

// terminator modes
#define CheckNull(p) ((p)==0)
#define SetNull(p) ((p)=0)
#define CheckNil(p) (NodeIsNil(p))
#define SetNil(p) ((p)=NilNode())

// implementations
#define QueuePush_NZ(f,l,n,next,zchk,zset) (zchk(f)?\
(f)=(l)=(n):\
((l)->next=(n),(l)=(n),zset((n)->next)))
#define QueuePop_NZ(f,l,next,zset) ((f)==(l)?\
(zset(f),zset(l)):\
((f)=(f)->next))
#define StackPush_N(f,n,next) ((n)->next=(f),(f)=(n))
#define StackPop_NZ(f,next,zchk) (zchk(f)?0:((f)=(f)->next))

#define DblPushBack_NPZ(f,l,n,next,prev,zchk,zset) \
(zchk(f)?\
((f)=(l)=(n),zset((n)->next),zset((n)->prev)):\
((n)->prev=(l),(l)->next=(n),(l)=(n),zset((n)->next)))
#define DblRemove_NPZ(f,l,n,next,prev,zchk,zset) (((f)==(n))?\
((f)=(f)->next, (zchk(f) ? (zset(l)) : zset((f)->prev))):\
((l)==(n))?\
((l)=(l)->prev, (zchk(l) ? (zset(f)) : zset((l)->next))):\
((zchk((n)->next) ? (0) : ((n)->next->prev=(n)->prev)),\
(zchk((n)->prev) ? (0) : ((n)->prev->next=(n)->next))))


// compositions
#define QueuePush(f,l,n) QueuePush_NZ(f,l,n,next,CheckNull,SetNull)
#define QueuePop(f,l)    QueuePop_NZ(f,l,next,SetNull)
#define StackPush(f,n)   StackPush_N(f,n,next)
#define StackPop(f)      StackPop_NZ(f,next,CheckNull)
#define DblPushBack(f,l,n)  DblPushBack_NPZ(f,l,n,next,prev,CheckNull,SetNull)
#define DblPushFront(f,l,n) DblPushBack_NPZ(l,f,n,prev,next,CheckNull,SetNull)
#define DblRemove(f,l,n)    DblRemove_NPZ(f,l,n,next,prev,CheckNull,SetNull)

#define NodeDblPushBack(f,l,n)  DblPushBack_NPZ(f,l,n,next,prev,CheckNil,SetNil)
#define NodeDblPushFront(f,l,n) DblPushBack_NPZ(l,f,n,prev,next,CheckNil,SetNil)
#define NodeDblRemove(f,l,n)    DblRemove_NPZ(f,l,n,next,prev,CheckNil,SetNil)


//~ Memory Operations

#define MemorySet(p,v,z)    (IMPL_Memset(p,v,z))
#define MemoryZero(p,z)     (IMPL_Memset(p,0,z))
#define MemoryZeroStruct(p) (IMPL_Memset(p,0,sizeof(*(p))))
#define MemoryCopy(d,s,z)   (IMPL_Memmove(d,s,z))

//~ sprintf
#if DEFAULT_SPRINTF
#define STB_SPRINTF_DECORATE(name) md_stbsp_##name
#define IMPL_Vsnprintf md_stbsp_vsnprintf
#include "md_stb_sprintf.h"
#endif

//~/////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// Types /////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

//~ Basic Types

#include <stdarg.h>

#if defined(DEFAULT_BASIC_TYPES)

#include <stdint.h>
typedef int8_t   i8;
typedef int16_t  i16;
typedef int32_t  i32;
typedef int64_t  i64;
typedef uint8_t  u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef float    f32;
typedef double   f64;

#endif

typedef i8  b8;
typedef i16 b16;
typedef i32 b32;
typedef i64 b64;

//~ Default Arena

#if DEFAULT_ARENA

typedef struct ArenaDefault ArenaDefault;
struct ArenaDefault
{
    ArenaDefault *prev;
    ArenaDefault *current;
    u64 base_pos;
    u64 pos;
    u64 cmt;
    u64 cap;
    u64 align;
};
#define IMPL_Arena ArenaDefault

#endif

//~ Abstract Arena

#if !defined(IMPL_Arena)
# error Missing implementation for IMPL_Arena
#endif

typedef IMPL_Arena Arena;

//~ Arena Helpers

typedef struct ArenaTemp ArenaTemp;
struct ArenaTemp
{
    Arena *arena;
    u64 pos;
};

//~ Basic Unicode string types.

typedef struct String8 String8;
struct String8
{
    u8 *str;
    u64 size;
};

typedef struct String16 String16;
struct String16
{
    u16 *str;
    u64 size;
};

typedef struct String32 String32;
struct String32
{
    u32 *str;
    u64 size;
};

typedef struct String8Node String8Node;
struct String8Node
{
    String8Node *next;
    String8 string;
};

typedef struct String8List String8List;
struct String8List
{
    u64 node_count;
    u64 total_size;
    String8Node *first;
    String8Node *last;
};

typedef struct StringJoin StringJoin;
struct StringJoin
{
    String8 pre;
    String8 mid;
    String8 post;
};

// NOTE(rjf): @maintenance These three flag types must not overlap.
typedef u32 MatchFlags;
typedef u32 StringMatchFlags;
typedef u32 NodeMatchFlags;
enum
{
    MatchFlag_FindLast = (1<<0),
};
enum
{
    StringMatchFlag_CaseInsensitive  = (1<<4),
    StringMatchFlag_RightSideSloppy  = (1<<5),
    StringMatchFlag_SlashInsensitive = (1<<6),
};
enum
{
    NodeMatchFlag_Tags             = (1<<16),
    NodeMatchFlag_TagArguments     = (1<<17),
    NodeMatchFlag_NodeFlags        = (1<<18),
};

typedef struct DecodedCodepoint DecodedCodepoint;
struct DecodedCodepoint
{
    u32 codepoint;
    u32 advance;
};

typedef enum IdentifierStyle
{
    IdentifierStyle_UpperCamelCase,
    IdentifierStyle_LowerCamelCase,
    IdentifierStyle_UpperCase,
    IdentifierStyle_LowerCase,
}
IdentifierStyle;

//~ Node types that are used to build all ASTs.

typedef enum NodeKind
{
    // NOTE(rjf): @maintenance Must be kept in sync with StringFromNodeKind.
    
    NodeKind_Nil,
    
    // NOTE(rjf): Generated by parser
    NodeKind_File,
    NodeKind_ErrorMarker,
    
    // NOTE(rjf): Parsed from user Metadesk code
    NodeKind_Main,
    NodeKind_Tag,
    
    // NOTE(rjf): User-created data structures
    NodeKind_List,
    NodeKind_Reference,
    
    NodeKind_COUNT,
}
NodeKind;

typedef u64 NodeFlags;
#define NodeFlag_AfterFromBefore(f) ((f) << 1)
enum
{
    // NOTE(rjf): @maintenance Must be kept in sync with StringListFromNodeFlags.
    
    // NOTE(rjf): @maintenance Because of NodeFlag_AfterFromBefore, it is
    // *required* that every single pair of "Before*" or "After*" flags be in
    // the correct order which is that the Before* flag comes first, and the
    // After* flag comes immediately after (After* being the more significant
    // bit).
    
    NodeFlag_HasParenLeft               = (1<<0),
    NodeFlag_HasParenRight              = (1<<1),
    NodeFlag_HasBracketLeft             = (1<<2),
    NodeFlag_HasBracketRight            = (1<<3),
    NodeFlag_HasBraceLeft               = (1<<4),
    NodeFlag_HasBraceRight              = (1<<5),
    
    NodeFlag_MaskSetDelimiters          = (0x3F<<0),
    
    NodeFlag_IsBeforeSemicolon          = (1<<6),
    NodeFlag_IsAfterSemicolon           = (1<<7),
    NodeFlag_IsBeforeComma              = (1<<8),
    NodeFlag_IsAfterComma               = (1<<9),
    
    NodeFlag_MaskSeperators             = (0xF<<6),
    
    NodeFlag_StringSingleQuote       = (1<<10),
    NodeFlag_StringDoubleQuote       = (1<<11),
    NodeFlag_StringTick              = (1<<12),
    NodeFlag_StringTriplet           = (1<<13),
    
    NodeFlag_MaskStringDelimiters    = (0xF<<10),
    
    NodeFlag_Numeric                 = (1<<14),
    NodeFlag_Identifier              = (1<<15),
    NodeFlag_StringLiteral           = (1<<16),
    NodeFlag_Symbol                  = (1<<17),
    
    NodeFlag_MaskLabelKind           = (0xF<<14),
};

typedef struct Node Node;
struct Node
{
    // Tree relationship data.
    Node *next;
    Node *prev;
    Node *parent;
    Node *first_child;
    Node *last_child;
    
    // Tag list.
    Node *first_tag;
    Node *last_tag;
    
    // Node info.
    NodeKind kind;
    NodeFlags flags;
    String8 string;
    String8 raw_string;
    
    // Source code location information.
    u64 offset;
    
    // Reference.
    Node *ref_target;
    
    // Comments.
    // @usage prev_comment/next_comment should be considered "hidden". Rely on
    // the functions PrevCommentFromNode/NextCommentFromNode to access
    // these. Directly access to these is likely to break in a future version.
    String8 prev_comment;
    String8 next_comment;
};

//~ Code Location Info.

typedef struct CodeLoc CodeLoc;
struct CodeLoc
{
    String8 filename;
    u32 line;
    u32 column;
};

//~ String-To-Ptr and Ptr-To-Ptr tables

typedef struct MapKey MapKey;
struct MapKey
{
    u64 hash;
    u64 size;
    void *ptr;
};

typedef struct MapSlot MapSlot;
struct MapSlot
{
    MapSlot *next;
    MapKey key;
    void *val;
};

typedef struct MapBucket MapBucket;
struct MapBucket
{
    MapSlot *first;
    MapSlot *last;
};

typedef struct Map Map;
struct Map
{
    MapBucket *buckets;
    u64 bucket_count;
};

//~ Tokens

typedef u32 TokenKind;
enum
{
    TokenKind_Identifier          = (1<<0),
    TokenKind_Numeric             = (1<<1),
    TokenKind_StringLiteral       = (1<<2),
    TokenKind_Symbol              = (1<<3),
    TokenKind_Reserved            = (1<<4),
    TokenKind_Comment             = (1<<5),
    TokenKind_Whitespace          = (1<<6),
    TokenKind_Newline             = (1<<7),
    TokenKind_BrokenComment       = (1<<8),
    TokenKind_BrokenStringLiteral = (1<<9),
    TokenKind_BadCharacter        = (1<<10),
};

typedef u32 MD_TokenGroups;
enum
{
    TokenGroup_Comment = TokenKind_Comment,
    TokenGroup_Whitespace = (TokenKind_Whitespace|
                                TokenKind_Newline),
    TokenGroup_Irregular = (TokenGroup_Comment|
                               TokenGroup_Whitespace),
    TokenGroup_Regular = ~TokenGroup_Irregular,
    TokenGroup_Label   = (TokenKind_Identifier|
                             TokenKind_Numeric|
                             TokenKind_StringLiteral|
                             TokenKind_Symbol),
    TokenGroup_Error   = (TokenKind_BrokenComment|
                             TokenKind_BrokenStringLiteral|
                             TokenKind_BadCharacter),
};

typedef struct Token Token;
struct Token
{
    TokenKind kind;
    NodeFlags node_flags;
    String8 string;
    String8 raw_string;
};

//~ Parsing State

typedef enum MessageKind
{
    // NOTE(rjf): @maintenance This enum needs to be sorted in order of
    // severity.
    MessageKind_Null,
    MessageKind_Note,
    MessageKind_Warning,
    MessageKind_Error,
    MessageKind_FatalError,
}
MessageKind;

typedef struct Message Message;
struct Message
{
    Message *next;
    Node *node;
    MessageKind kind;
    String8 string;
    void *user_ptr;
};

typedef struct MessageList MessageList;
struct MessageList
{
    MessageKind max_message_kind;
    // TODO(allen): rename
    u64 node_count;
    Message *first;
    Message *last;
};

typedef enum ParseSetRule
{
    ParseSetRule_EndOnDelimiter,
    ParseSetRule_Global,
} ParseSetRule;

typedef struct ParseResult ParseResult;
struct ParseResult
{
    Node *node;
    u64 string_advance;
    MessageList errors;
};

//~ Expression Parsing

typedef enum ExprOprKind
{
    ExprOprKind_Null,
    ExprOprKind_Prefix,
    ExprOprKind_Postfix,
    ExprOprKind_Binary,
    ExprOprKind_BinaryRightAssociative,
    ExprOprKind_COUNT,
} ExprOprKind;

typedef struct ExprOpr ExprOpr;
struct ExprOpr
{
    struct ExprOpr *next;
    u32 op_id;
    ExprOprKind kind;
    u32 precedence;
    String8 string;
    void *op_ptr;
};

typedef struct ExprOprList ExprOprList;
struct ExprOprList
{
    ExprOpr *first;
    ExprOpr *last;
    u64 count;
};

typedef struct ExprOprTable ExprOprTable;
struct ExprOprTable
{
    // TODO(mal): @upgrade_potential Hash?
    ExprOprList table[ExprOprKind_COUNT];
};

typedef struct Expr Expr;
struct Expr
{
    struct Expr *parent;
    union
    {
        struct Expr *left;
        struct Expr *unary_operand;
    };
    struct Expr *right;
    ExprOpr *op;
    Node *md_node;
};

typedef struct ExprParseResult ExprParseResult;
struct ExprParseResult
{
    Expr *expr;
    MessageList errors;
};

// TODO(allen): nil Expr

typedef struct ExprParseCtx ExprParseCtx;
struct ExprParseCtx
{
    ExprOprTable *op_table;
    
#define POSTFIX_SETLIKE_OP_COUNT 5   // (), [], {}, [), (]
    struct
    {
        ExprOpr *postfix_set_ops[POSTFIX_SETLIKE_OP_COUNT];
        NodeFlags postfix_set_flags[POSTFIX_SETLIKE_OP_COUNT];
    } accel;
#undef POSTFIX_SETLIKE_OP_COUNT
    
    MessageList errors;
};

typedef void (*BakeOperatorErrorHandler)(MessageKind kind, String8 s);

//~ String Generation Types

typedef u32 GenerateFlags;
enum
{
    GenerateFlag_Tags         = (1<<0),
    GenerateFlag_TagArguments = (1<<1),
    GenerateFlag_Children     = (1<<2),
    GenerateFlag_Comments     = (1<<3),
    GenerateFlag_NodeKind     = (1<<4),
    GenerateFlag_NodeFlags    = (1<<5),
    GenerateFlag_Location     = (1<<6),
    
    GenerateFlags_Tree = (GenerateFlag_Tags |
                             GenerateFlag_TagArguments |
                             GenerateFlag_Children),
    GenerateFlags_All  = 0xffffffff,
};

//~ Command line parsing helper types.

typedef struct CmdLineOption CmdLineOption;
struct CmdLineOption
{
    CmdLineOption *next;
    String8 name;
    String8List values;
};

typedef struct CmdLine CmdLine;
struct CmdLine
{
    String8List inputs;
    CmdLineOption *first_option;
    CmdLineOption *last_option;
};

//~ File system access types.

typedef u32 FileFlags;
enum
{
    FileFlag_Directory = (1<<0),
};

typedef struct FileInfo FileInfo;
struct FileInfo
{
    FileFlags flags;
    String8 filename;
    u64 file_size;
};

typedef struct FileIter FileIter;
struct FileIter
{
    // This is opaque state to store OS-specific file-system iteration data.
    u8 opaque[640];
};

//~/////////////////////////////////////////////////////////////////////////////
////////////////////////////////// Functions ///////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

//~ Arena

FUNCTION Arena*    ArenaAlloc(void);
FUNCTION void         ArenaRelease(Arena *arena);

FUNCTION void*        ArenaPush(Arena *arena, u64 size);
FUNCTION void         ArenaPutBack(Arena *arena, u64 size);
FUNCTION void         ArenaSetAlign(Arena *arena, u64 boundary);
FUNCTION void         ArenaPushAlign(Arena *arena, u64 boundary);
FUNCTION void         ArenaClear(Arena *arena);

#define PushArray(a,T,c) (T*)(ArenaPush((a), sizeof(T)*(c)))
#define PushArrayZero(a,T,c) (T*)(MemoryZero(PushArray(a,T,c), sizeof(T)*(c)))

FUNCTION ArenaTemp ArenaBeginTemp(Arena *arena);
FUNCTION void         ArenaEndTemp(ArenaTemp temp);

//~ Arena Scratch Pool

FUNCTION ArenaTemp GetScratch(Arena **conflicts, u64 count);

#define ReleaseScratch(scratch) ArenaEndTemp(scratch)

//~ Characters

FUNCTION b32 CharIsAlpha(u8 c);
FUNCTION b32 CharIsAlphaUpper(u8 c);
FUNCTION b32 CharIsAlphaLower(u8 c);
FUNCTION b32 CharIsDigit(u8 c);
FUNCTION b32 CharIsUnreservedSymbol(u8 c);
FUNCTION b32 CharIsReservedSymbol(u8 c);
FUNCTION b32 CharIsSpace(u8 c);
FUNCTION u8  CharToUpper(u8 c);
FUNCTION u8  CharToLower(u8 c);
FUNCTION u8  CharToForwardSlash(u8 c);

//~ Strings

FUNCTION u64         CalculateCStringLength(char *cstr);

FUNCTION String8     S8(u8 *str, u64 size);
#define S8CString(s)    S8((u8 *)(s), CalculateCStringLength(s))

#if LANG_C
# define S8Lit(s)        (String8){(u8 *)(s), sizeof(s)-1}
#elif LANG_CPP
# define S8Lit(s)        S8((u8*)(s), sizeof(s) - 1)
#endif
#define S8LitComp(s)     {(u8 *)(s), sizeof(s)-1}

#if CPP_VERSION >= 11
static inline String8
operator "" _md(const char *s, size_t size)
{
    String8 str = S8((u8 *)s, (u64)size);
    return str;
}
#endif

FUNCTION String8     S8Range(u8 *first, u8 *opl);

FUNCTION String8     S8Substring(String8 str, u64 min, u64 max);
FUNCTION String8     S8Skip(String8 str, u64 min);
FUNCTION String8     S8Chop(String8 str, u64 nmax);
FUNCTION String8     S8Prefix(String8 str, u64 size);
FUNCTION String8     S8Suffix(String8 str, u64 size);

FUNCTION b32         S8Match(String8 a, String8 b, MatchFlags flags);
FUNCTION u64         S8FindSubstring(String8 str, String8 substring,
                                              u64 start_pos, MatchFlags flags);

FUNCTION String8     S8Copy(Arena *arena, String8 string);
FUNCTION String8     S8FmtV(Arena *arena, char *fmt, va_list args);

FUNCTION String8     S8Fmt(Arena *arena, char *fmt, ...);

#define S8VArg(s) (int)(s).size, (s).str

FUNCTION void           S8ListPush(Arena *arena, String8List *list,
                                         String8 string);
FUNCTION void           S8ListPushFmt(Arena *arena, String8List *list,
                                            char *fmt, ...);

FUNCTION void           S8ListConcat(String8List *list, String8List *to_push);
FUNCTION String8List S8Split(Arena *arena, String8 string, int split_count,
                                      String8 *splits);
FUNCTION String8     S8ListJoin(Arena *arena, String8List list,
                                         StringJoin *join);

FUNCTION String8     S8Stylize(Arena *arena, String8 string,
                                        IdentifierStyle style, String8 separator);

//~ Unicode Conversions

FUNCTION DecodedCodepoint DecodeCodepointFromUtf8(u8 *str, u64 max);
FUNCTION DecodedCodepoint DecodeCodepointFromUtf16(u16 *str, u64 max);
FUNCTION u32         Utf8FromCodepoint(u8 *out, u32 codepoint);
FUNCTION u32         Utf16FromCodepoint(u16 *out, u32 codepoint);
FUNCTION String8     S8FromS16(Arena *arena, String16 str);
FUNCTION String16    S16FromS8(Arena *arena, String8 str);
FUNCTION String8     S8FromS32(Arena *arena, String32 str);
FUNCTION String32    S32FromS8(Arena *arena, String8 str);

//~ String Skipping/Chopping Helpers

// This is intended for removing extensions.
FUNCTION String8 PathChopLastPeriod(String8 string);

// This is intended for removing everything but the filename.
FUNCTION String8 PathSkipLastSlash(String8 string);

// This is intended for getting an extension from a filename.
FUNCTION String8 PathSkipLastPeriod(String8 string);

// This is intended for getting the folder string from a full path.
FUNCTION String8 PathChopLastSlash(String8 string);

FUNCTION String8 S8SkipWhitespace(String8 string);
FUNCTION String8 S8ChopWhitespace(String8 string);

//~ Numeric Strings

FUNCTION b32     StringIsU64(String8 string, u32 radix);
FUNCTION b32     StringIsCStyleInt(String8 string);

FUNCTION u64     U64FromString(String8 string, u32 radix);
FUNCTION i64     CStyleIntFromString(String8 string);
FUNCTION f64     F64FromString(String8 string);

FUNCTION String8 CStyleHexStringFromU64(Arena *arena, u64 x, b32 caps);

//~ Enum/Flag Strings

FUNCTION String8     StringFromNodeKind(NodeKind kind);
FUNCTION String8List StringListFromNodeFlags(Arena *arena, NodeFlags flags);

//~ Map Table Data Structure

FUNCTION u64 HashStr(String8 string);
FUNCTION u64 HashPtr(void *p);

FUNCTION Map      MapMakeBucketCount(Arena *arena, u64 bucket_count);
FUNCTION Map      MapMake(Arena *arena);
FUNCTION MapKey   MapKeyStr(String8 string);
FUNCTION MapKey   MapKeyPtr(void *ptr);
FUNCTION MapSlot* MapLookup(Map *map, MapKey key);
FUNCTION MapSlot* MapScan(MapSlot *first_slot, MapKey key);
FUNCTION MapSlot* MapInsert(Arena *arena, Map *map, MapKey key, void *val);
FUNCTION MapSlot* MapOverwrite(Arena *arena, Map *map, MapKey key,
                                        void *val);

//~ Parsing

FUNCTION Token       TokenFromString(String8 string);
FUNCTION u64         LexAdvanceFromSkips(String8 string, TokenKind skip_kinds);
FUNCTION ParseResult ParseResultZero(void);
FUNCTION ParseResult ParseNodeSet(Arena *arena, String8 string, u64 offset, Node *parent,
                                           ParseSetRule rule);
FUNCTION ParseResult ParseOneNode(Arena *arena, String8 string, u64 offset);
FUNCTION ParseResult ParseWholeString(Arena *arena, String8 filename, String8 contents);

FUNCTION ParseResult ParseWholeFile(Arena *arena, String8 filename);

//~ Messages (Errors/Warnings)

FUNCTION Node*   MakeErrorMarkerNode(Arena *arena, String8 parse_contents,
                                              u64 offset);

FUNCTION Message*MakeNodeError(Arena *arena, Node *node,
                                        MessageKind kind, String8 str);
FUNCTION Message*MakeDetachedError(Arena *arena, MessageKind kind,
                                            String8 str, void *ptr);
FUNCTION Message*MakeTokenError(Arena *arena, String8 parse_contents,
                                         Token token, MessageKind kind,
                                         String8 str);

FUNCTION void       MessageListPush(MessageList *list, Message *message);
FUNCTION void       MessageListConcat(MessageList *list, MessageList *to_push);

//~ Location Conversion

FUNCTION CodeLoc CodeLocFromFileOffset(String8 filename, u8 *base, u64 offset);
FUNCTION CodeLoc CodeLocFromNode(Node *node);

//~ Tree/List Building

FUNCTION b32   NodeIsNil(Node *node);
FUNCTION Node *NilNode(void);
FUNCTION Node *MakeNode(Arena *arena, NodeKind kind, String8 string,
                                 String8 raw_string, u64 offset);
FUNCTION void     PushChild(Node *parent, Node *new_child);
FUNCTION void     PushTag(Node *node, Node *tag);

FUNCTION Node *MakeList(Arena *arena);
FUNCTION void     ListConcatInPlace(Node *list, Node *to_push);
FUNCTION Node *PushNewReference(Arena *arena, Node *list, Node *target);

//~ Introspection Helpers

// These calls are for getting info from nodes, and introspecting
// on trees that are returned to you by the parser.

FUNCTION Node *  FirstNodeWithString(Node *first, String8 string, MatchFlags flags);
FUNCTION Node *  NodeAtIndex(Node *first, int n);
FUNCTION Node *  FirstNodeWithFlags(Node *first, NodeFlags flags);
FUNCTION int        IndexFromNode(Node *node);
FUNCTION Node *  RootFromNode(Node *node);
FUNCTION Node *  MD_ChildFromString(Node *node, String8 child_string, MatchFlags flags);
FUNCTION Node *  TagFromString(Node *node, String8 tag_string, MatchFlags flags);
FUNCTION Node *  ChildFromIndex(Node *node, int n);
FUNCTION Node *  TagFromIndex(Node *node, int n);
FUNCTION Node *  TagArgFromIndex(Node *node, String8 tag_string, MatchFlags flags, int n);
FUNCTION Node *  TagArgFromString(Node *node, String8 tag_string, MatchFlags tag_str_flags, String8 arg_string, MatchFlags arg_str_flags);
FUNCTION b32     NodeHasChild(Node *node, String8 string, MatchFlags flags);
FUNCTION b32     NodeHasTag(Node *node, String8 string, MatchFlags flags);
FUNCTION i64     ChildCountFromNode(Node *node);
FUNCTION i64     TagCountFromNode(Node *node);
FUNCTION Node *  ResolveNodeFromReference(Node *node);
FUNCTION Node*   NodeNextWithLimit(Node *node, Node *opl);

FUNCTION String8 PrevCommentFromNode(Node *node);
FUNCTION String8 NextCommentFromNode(Node *node);

// NOTE(rjf): For-Loop Helpers
#define EachNode(it, first) Node *it = (first); !NodeIsNil(it); it = it->next

//~ Error/Warning Helpers

FUNCTION String8 StringFromMessageKind(MessageKind kind);

#define FmtCodeLoc "%.*s:%i:%i:"
#define CodeLocVArg(loc) S8VArg((loc).filename), (loc).line, (loc).column

FUNCTION String8 MD_FormatMessage(Arena *arena, CodeLoc loc, MessageKind kind,
                                        String8 string);

#if !DISABLE_PRINT_HELPERS
#include <stdio.h>
FUNCTION void PrintMessage(FILE *file, CodeLoc loc, MessageKind kind,
                                 String8 string);
FUNCTION void PrintMessageFmt(FILE *file, CodeLoc code_loc, MessageKind kind,
                                    char *fmt, ...);

#define PrintGenNoteCComment(f) fprintf((f), "// generated by %s:%d\n", __FILE__, __LINE__)
#endif

//~ Tree Comparison/Verification

FUNCTION b32 NodeMatch(Node *a, Node *b, MatchFlags flags);
FUNCTION b32 NodeDeepMatch(Node *a, Node *b, MatchFlags flags);

//~ Expression Parsing

FUNCTION void               ExprOprPush(Arena *arena, ExprOprList *list,
                                              ExprOprKind kind, u64 precedence,
                                              String8 op_string,
                                              u32 op_id, void *op_ptr);

FUNCTION ExprOprTable    ExprBakeOprTableFromList(Arena *arena,
                                                           ExprOprList *list);
FUNCTION ExprOpr*        ExprOprFromKindString(ExprOprTable *table,
                                                        ExprOprKind kind, String8 s);

FUNCTION ExprParseResult ExprParse(Arena *arena, ExprOprTable *op_table,
                                            Node *first, Node *one_past_last);

FUNCTION Expr* Expr_NewLeaf(Arena *arena, Node *node);
FUNCTION Expr* Expr_NewOpr(Arena *arena, ExprOpr *op, Node *op_node,
                                    Expr *left, Expr *right);

FUNCTION ExprParseCtx ExprParse_MakeContext(ExprOprTable *table);

FUNCTION Expr* ExprParse_TopLevel(Arena *arena, ExprParseCtx *ctx,
                                           Node *first, Node *opl);
FUNCTION b32   ExprParse_OprConsume(ExprParseCtx *ctx,
                                             Node **iter, Node *opl,
                                             ExprOprKind kind,
                                             u32 min_precedence,
                                             ExprOpr **op_out);
FUNCTION Expr* ExprParse_Atom(Arena *arena, ExprParseCtx *ctx,
                                       Node **iter, Node *first, Node *opl);
FUNCTION Expr* ExprParse_MinPrecedence(Arena *arena, ExprParseCtx *ctx,
                                                Node **iter, Node *first, Node *opl,
                                                u32 min_precedence);


//~ String Generation

FUNCTION void DebugDumpFromNode(Arena *arena, String8List *out, Node *node,
                                      int indent, String8 indent_string,
                                      GenerateFlags flags);
FUNCTION void ReconstructionFromNode(Arena *arena, String8List *out, Node *node,
                                           int indent, String8 indent_string);

//~ Command Line Argument Helper

FUNCTION String8List StringListFromArgCV(Arena *arena, int argument_count,
                                                  char **arguments);
FUNCTION CmdLine MakeCmdLineFromOptions(Arena *arena, String8List options);
FUNCTION String8List CmdLineValuesFromString(CmdLine cmdln, String8 name);
FUNCTION b32 CmdLineB32FromString(CmdLine cmdln, String8 name);
FUNCTION i64 CmdLineI64FromString(CmdLine cmdln, String8 name);

//~ File System

FUNCTION String8  LoadEntireFile(Arena *arena, String8 filename);
FUNCTION b32      FileIterBegin(FileIter *it, String8 path);
FUNCTION FileInfo FileIterNext(Arena *arena, FileIter *it);
FUNCTION void        FileIterEnd(FileIter *it);

#endif // MD_H

/*
Copyright 2021 Dion Systems LLC

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/