Some kind of tokenizing, accidentally started creating a taxonomy instead of solving the problem

main
parent 14d33756cd
commit 015487162b

@ -94,7 +94,6 @@ typedef struct TextChunkList
TextChunk text; TextChunk text;
} TextChunkList; } TextChunkList;
typedef enum typedef enum
{ {
ARG_CHARACTER, ARG_CHARACTER,
@ -286,11 +285,16 @@ typedef enum HealthStatus {
HEALTH_verge_of_death, HEALTH_verge_of_death,
} HealthStatus; } HealthStatus;
// Whatever this health is, it can be perceived by others, e.g it's public
typedef struct Health { typedef struct Health {
HealthStatus status; HealthStatus status;
float drunkenness; // 1.0 is max drunkenness float drunkenness; // 1.0 is max drunkenness
} Health; } Health;
// these are items and events that are available during the game, but 'rendered' to different structs
// when sent to the AI as text so that they're more stable. I.E, if you change the name of an item or an index,
// old memories still work, old reactions to items in a room still work, etc.
typedef enum ItemKind { typedef enum ItemKind {
ITEM_none, ITEM_none,
ITEM_whiskey, ITEM_whiskey,
@ -307,36 +311,84 @@ typedef enum EventKind {
EVENT_stopped_talking, EVENT_stopped_talking,
} EventKind; } EventKind;
// these are the structs as presented to the AI, without a dependence on game data.
typedef struct ItemInSituation {
TextChunk name;
TextChunk description; // might include some state, e.g 'the beer was drank 5 times'
ItemKind actual_item_kind; // used to map back to gameplay items, sometimes might be invalid item if that's not the purpose of the item in a situation
} ItemInSituation;
typedef struct Response { typedef struct Response {
TextChunk text; TextChunk text; // for speech or memory
ActionKind action;
NpcKind target; // both of these indices correspond to what was provided in the CharacterSituation
int memory_slot; int action_index;
int target_index;
int memory_index;
} Response; } Response;
typedef BUFF(Response, 5) FullResponse; // what the AI is allowed to output typedef BUFF(Response, 5) FullResponse; // what the AI is allowed to output
typedef struct CharacterPerception { typedef struct CharacterPerception {
Health health; Health health;
TextChunk name;
ItemInSituation held_item;
} CharacterPerception; } CharacterPerception;
typedef struct ScenePerception {
BUFF(CharacterPerception, 10) characters;
BUFF(ItemKind, 10) items_on_floor; // available to be picked up or navigated to
} ScenePerception;
typedef struct CharacterStatus { typedef struct CharacterStatus {
TextChunk goal;
u64 in_room;
Item held_item; Item held_item;
Health health; Health health;
} CharacterStatus; } CharacterStatus;
typedef enum TargetKind {
TARGET_invalid,
TARGET_path,
TARGET_person,
TARGET_item,
} TargetKind;
typedef struct Target {
TextChunk name;
TextChunk description;
TargetKind kind;
} Target;
// the situation for somebody // the situation for somebody
typedef struct CharacterSituation { typedef struct CharacterSituation {
TextChunk goal; // kind of like the most important memory, self described character's goal right now
TextChunk memories[4]; // explicit numbered memories TextChunk memories[4]; // explicit numbered memories
BUFF(TextChunk, 5) events; // events that this character has observed BUFF(TextChunk, 5) events; // events that this character has observed in the plain english form
BUFF(Target, 10) targets;
CharacterStatus my_status;
CharacterStatus status; CharacterStatus status;
} CharacterSituation; } CharacterSituation;
/*
Training samples must remain stable as the game is changed, is the decision here: i.e, if the characters
in the situations are edited/changed, the training samples KEEP the old characters. This is so custom characters
don't become invalid when the game is updated. I'm making the same decision with the items, instead of storing
an across-update-stable 'item ID', I want to store the name of the item. All items can be used and have the same 'API'
so no need of updating there is necessary.
I.E: the situation is freestanding and doesn't refer to any other data. Not NpcKind, not ItemKind, not anything.
Even the list of available actions and their arguments are stored in the situation. It's basically like pure JSON data.
Also, you can't deserialize training samples easily because they exact text, and such a thing should NEVER happen.
Training sample into gamestate = bad time. For things like recording gamestate for replays, there's a real man serialization
codepath into binary.
*/
typedef struct TrainingSample { typedef struct TrainingSample {
CharacterSituation situation; CharacterSituation situation;
Response response; FullResponse response;
} TrainingSample; } TrainingSample;
typedef struct Npc { typedef struct Npc {

@ -29,41 +29,33 @@
#include <windows.h> // for sleep. #include <windows.h> // for sleep.
void error_impl(Arena *arena, String8List *errors, String8 message) typedef struct Error {
struct Error *next, *prev;
String8 message;
int line;
} Error;
typedef struct ErrorList {
int count;
Error *first, *last;
} ErrorList;
void ErrorPush(Arena *arena, ErrorList *list, Error message)
{ {
S8ListPush(arena, errors, message); Error *new_err = PushArrayZero(arena, Error, 1);
// this is a function so you can breakpoint here and discover when errors occur *new_err = message;
}
#define error(fmt_str, ...) error_impl(arena, errors, S8Fmt(arena, fmt_str, __VA_ARGS__)) QueuePush(list->first, list->last, new_err);
list->count += 1;
// Allows you to not need to quote children of a parent.
String8 all_children_as_string(Arena *arena, Node *parent)
{
String8List children = {0};
for (Node *cur = parent->first_child; !NodeIsNil(cur); cur = cur->next)
{
S8ListPush(arena, &children, cur->string);
}
return S8ListJoin(arena, children, &(StringJoin){.mid = S8Lit(" ")});
} }
Node *get_child_should_exist(Arena *arena, String8List *errors, Node *parent, String8 child_name) void error_impl(Arena *arena, ErrorList *errors, int line_in_toparse, String8 message)
{ {
if (errors->node_count > 0) ErrorPush(arena, errors, (Error){.line = line_in_toparse, .message = message});
return NilNode(); // this is a function so you can breakpoint here and discover when errors occur
Node *child = MD_ChildFromString(parent, child_name, StringMatchFlag_CaseInsensitive);
if (NodeIsNil(child))
{
error("Couldn't find child with name '%.*s' on node '%.*s'", S8VArg(child_name), S8VArg(parent->string));
}
return child;
} }
typedef struct { #define error(line_in_toparse, fmt_str, ...) error_impl(arena, errors, line_in_toparse, S8Fmt(arena, fmt_str, __VA_ARGS__))
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
@ -71,41 +63,72 @@ int main(int argc, char **argv)
(void)argv; (void)argv;
Arena *arena = ArenaAlloc(); Arena *arena = ArenaAlloc();
ParseResult result = ParseWholeFile(arena, S8Lit("playground.mdesk")); Sleep(200); // have to wait for console to pop up, super annoying
if (result.errors.node_count > 0)
{ // Prose is the name for this file format where you describe the souls of characters
printf("Failed to parse file:\n");
for (Message *cur = result.errors.first; cur->next; cur = cur->next) // tokenize
typedef struct ProseToken
{ {
printf("%.*s\n", S8VArg(cur->string)); struct ProseToken *next, *prev;
} String8 field;
} int field_number; // this is -1 if no field_number, e.g if 'Field Text #0:' isn't specified and had no '#', then this would be -1
else String8 value; // may be an empty string, if it's trying to be like, an object
int indentation;
int line;
} ProseToken;
ErrorList errors_lit = {0};
ErrorList *errors = &errors_lit;
Npc out = {0};
// all arena allocations done from here are temporary. As it just copies data into Npc
// parse 'playground.txt' into 'out'
{ {
Node *node = result.node; // read the file
String8List errors_list = {0}; String8 to_parse = LoadEntireFile(arena, S8Lit("playground.txt"));
String8List *errors = &errors_list;
Npc out; // tokenize to_parse
chunk_from_s8(&out.name, all_children_as_string(arena, get_child_should_exist(arena, errors, node, S8Lit("name")))); ProseToken *tokenized_first = 0;
ProseToken *tokenized_last = 0;
if (errors->node_count == 0) {
String8List as_lines = S8Split(arena, to_parse, 1, &S8Lit("\n"));
int line = 1; // lines start at 1
for (String8Node *cur = as_lines.first; cur; cur = cur->next)
{ {
// unit testing asserts int indentation = 0;
assert(S8Match(TextChunkString8(out.name), S8Lit("Roger Penrose"), 0)); while(indentation < cur->string.size && cur->string.str[indentation] == '\t') indentation += 1;
String8 no_funny_business = S8SkipWhitespace(S8ChopWhitespace(cur->string));
if(no_funny_business.size == 0) continue;
String8List along_colon = S8Split(arena, no_funny_business, 1, &S8Lit(":"));
if(along_colon.node_count != 2 && along_colon.node_count != 1) {
error(line, "Requires exactly one ':' on the line to delimit the field and value. Got %d", along_colon.node_count - 1);
} else {
ProseToken *token_out = PushArrayZero(arena, ProseToken, 1);
token_out->field_number = -1;
if(along_colon.node_count == 2)
token_out->value = along_colon.last->string;
token_out->line = line;
token_out->indentation = indentation;
DblPushBack(tokenized_first, tokenized_last, token_out);
}
line += 1;
} }
else }
}
if (errors->count > 0)
{ {
printf("Corrupt character soul:\n"); printf("Failed with errors:\n");
for (String8Node *cur = errors->first; cur->next; cur = cur->next) for (Error *cur = errors->first; cur; cur = cur->next)
{ {
printf("%.*s\n", S8VArg(cur->string)); printf("On line %d of input: %.*s\n", cur->line, S8VArg(cur->message));
} }
assert(false); assert(false);
} }
}
printf("Success.\n"); printf("Success.\n");
__debugbreak(); __debugbreak();
} }

@ -6,6 +6,7 @@ Soul:
Memory #1: Silly Memory #1: Silly
Perception: Perception:
Some testing thing:
In Room: Farm In Room: Farm
Characters Around Me: Characters Around Me:
John: John:
@ -29,3 +30,4 @@ Soul:
Speech: Why are you like that dude? Speech: Why are you like that dude?
Throw: Throw:
To: John To: John

Loading…
Cancel
Save