Some kind of tokenizing, accidentally started creating a taxonomy instead of solving the problem

main
Cameron Murphy Reikes 7 months ago
parent 14d33756cd
commit 015487162b

@ -94,7 +94,6 @@ typedef struct TextChunkList
TextChunk text;
} TextChunkList;
typedef enum
{
ARG_CHARACTER,
@ -286,11 +285,16 @@ typedef enum HealthStatus {
HEALTH_verge_of_death,
} HealthStatus;
// Whatever this health is, it can be perceived by others, e.g it's public
typedef struct Health {
HealthStatus status;
float drunkenness; // 1.0 is max drunkenness
} Health;
// these are items and events that are available during the game, but 'rendered' to different structs
// when sent to the AI as text so that they're more stable. I.E, if you change the name of an item or an index,
// old memories still work, old reactions to items in a room still work, etc.
typedef enum ItemKind {
ITEM_none,
ITEM_whiskey,
@ -307,36 +311,84 @@ typedef enum EventKind {
EVENT_stopped_talking,
} EventKind;
// these are the structs as presented to the AI, without a dependence on game data.
typedef struct ItemInSituation {
TextChunk name;
TextChunk description; // might include some state, e.g 'the beer was drank 5 times'
ItemKind actual_item_kind; // used to map back to gameplay items, sometimes might be invalid item if that's not the purpose of the item in a situation
} ItemInSituation;
typedef struct Response {
TextChunk text;
ActionKind action;
NpcKind target;
int memory_slot;
TextChunk text; // for speech or memory
// both of these indices correspond to what was provided in the CharacterSituation
int action_index;
int target_index;
int memory_index;
} Response;
typedef BUFF(Response, 5) FullResponse; // what the AI is allowed to output
typedef struct CharacterPerception {
Health health;
TextChunk name;
ItemInSituation held_item;
} CharacterPerception;
typedef struct ScenePerception {
BUFF(CharacterPerception, 10) characters;
BUFF(ItemKind, 10) items_on_floor; // available to be picked up or navigated to
} ScenePerception;
typedef struct CharacterStatus {
TextChunk goal;
u64 in_room;
Item held_item;
Health health;
} CharacterStatus;
typedef enum TargetKind {
TARGET_invalid,
TARGET_path,
TARGET_person,
TARGET_item,
} TargetKind;
typedef struct Target {
TextChunk name;
TextChunk description;
TargetKind kind;
} Target;
// the situation for somebody
typedef struct CharacterSituation {
TextChunk goal; // kind of like the most important memory, self described character's goal right now
TextChunk memories[4]; // explicit numbered memories
BUFF(TextChunk, 5) events; // events that this character has observed
BUFF(TextChunk, 5) events; // events that this character has observed in the plain english form
BUFF(Target, 10) targets;
CharacterStatus my_status;
CharacterStatus status;
} CharacterSituation;
/*
Training samples must remain stable as the game is changed, is the decision here: i.e, if the characters
in the situations are edited/changed, the training samples KEEP the old characters. This is so custom characters
don't become invalid when the game is updated. I'm making the same decision with the items, instead of storing
an across-update-stable 'item ID', I want to store the name of the item. All items can be used and have the same 'API'
so no need of updating there is necessary.
I.E: the situation is freestanding and doesn't refer to any other data. Not NpcKind, not ItemKind, not anything.
Even the list of available actions and their arguments are stored in the situation. It's basically like pure JSON data.
Also, you can't deserialize training samples easily because they exact text, and such a thing should NEVER happen.
Training sample into gamestate = bad time. For things like recording gamestate for replays, there's a real man serialization
codepath into binary.
*/
typedef struct TrainingSample {
CharacterSituation situation;
Response response;
FullResponse response;
} TrainingSample;
typedef struct Npc {

@ -29,41 +29,33 @@
#include <windows.h> // for sleep.
void error_impl(Arena *arena, String8List *errors, String8 message)
typedef struct Error {
struct Error *next, *prev;
String8 message;
int line;
} Error;
typedef struct ErrorList {
int count;
Error *first, *last;
} ErrorList;
void ErrorPush(Arena *arena, ErrorList *list, Error message)
{
S8ListPush(arena, errors, message);
// this is a function so you can breakpoint here and discover when errors occur
}
#define error(fmt_str, ...) error_impl(arena, errors, S8Fmt(arena, fmt_str, __VA_ARGS__))
// Allows you to not need to quote children of a parent.
String8 all_children_as_string(Arena *arena, Node *parent)
{
String8List children = {0};
for (Node *cur = parent->first_child; !NodeIsNil(cur); cur = cur->next)
{
S8ListPush(arena, &children, cur->string);
}
return S8ListJoin(arena, children, &(StringJoin){.mid = S8Lit(" ")});
Error *new_err = PushArrayZero(arena, Error, 1);
*new_err = message;
QueuePush(list->first, list->last, new_err);
list->count += 1;
}
Node *get_child_should_exist(Arena *arena, String8List *errors, Node *parent, String8 child_name)
void error_impl(Arena *arena, ErrorList *errors, int line_in_toparse, String8 message)
{
if (errors->node_count > 0)
return NilNode();
Node *child = MD_ChildFromString(parent, child_name, StringMatchFlag_CaseInsensitive);
if (NodeIsNil(child))
{
error("Couldn't find child with name '%.*s' on node '%.*s'", S8VArg(child_name), S8VArg(parent->string));
}
return child;
ErrorPush(arena, errors, (Error){.line = line_in_toparse, .message = message});
// this is a function so you can breakpoint here and discover when errors occur
}
typedef struct {
}
#define error(line_in_toparse, fmt_str, ...) error_impl(arena, errors, line_in_toparse, S8Fmt(arena, fmt_str, __VA_ARGS__))
int main(int argc, char **argv)
{
@ -71,41 +63,72 @@ int main(int argc, char **argv)
(void)argv;
Arena *arena = ArenaAlloc();
ParseResult result = ParseWholeFile(arena, S8Lit("playground.mdesk"));
if (result.errors.node_count > 0)
Sleep(200); // have to wait for console to pop up, super annoying
// Prose is the name for this file format where you describe the souls of characters
// tokenize
typedef struct ProseToken
{
printf("Failed to parse file:\n");
for (Message *cur = result.errors.first; cur->next; cur = cur->next)
{
printf("%.*s\n", S8VArg(cur->string));
}
}
else
struct ProseToken *next, *prev;
String8 field;
int field_number; // this is -1 if no field_number, e.g if 'Field Text #0:' isn't specified and had no '#', then this would be -1
String8 value; // may be an empty string, if it's trying to be like, an object
int indentation;
int line;
} ProseToken;
ErrorList errors_lit = {0};
ErrorList *errors = &errors_lit;
Npc out = {0};
// all arena allocations done from here are temporary. As it just copies data into Npc
// parse 'playground.txt' into 'out'
{
Node *node = result.node;
String8List errors_list = {0};
String8List *errors = &errors_list;
Npc out;
chunk_from_s8(&out.name, all_children_as_string(arena, get_child_should_exist(arena, errors, node, S8Lit("name"))));
// read the file
String8 to_parse = LoadEntireFile(arena, S8Lit("playground.txt"));
if (errors->node_count == 0)
// tokenize to_parse
ProseToken *tokenized_first = 0;
ProseToken *tokenized_last = 0;
{
// unit testing asserts
assert(S8Match(TextChunkString8(out.name), S8Lit("Roger Penrose"), 0));
}
else
{
printf("Corrupt character soul:\n");
for (String8Node *cur = errors->first; cur->next; cur = cur->next)
String8List as_lines = S8Split(arena, to_parse, 1, &S8Lit("\n"));
int line = 1; // lines start at 1
for (String8Node *cur = as_lines.first; cur; cur = cur->next)
{
printf("%.*s\n", S8VArg(cur->string));
int indentation = 0;
while(indentation < cur->string.size && cur->string.str[indentation] == '\t') indentation += 1;
String8 no_funny_business = S8SkipWhitespace(S8ChopWhitespace(cur->string));
if(no_funny_business.size == 0) continue;
String8List along_colon = S8Split(arena, no_funny_business, 1, &S8Lit(":"));
if(along_colon.node_count != 2 && along_colon.node_count != 1) {
error(line, "Requires exactly one ':' on the line to delimit the field and value. Got %d", along_colon.node_count - 1);
} else {
ProseToken *token_out = PushArrayZero(arena, ProseToken, 1);
token_out->field_number = -1;
if(along_colon.node_count == 2)
token_out->value = along_colon.last->string;
token_out->line = line;
token_out->indentation = indentation;
DblPushBack(tokenized_first, tokenized_last, token_out);
}
line += 1;
}
assert(false);
}
}
if (errors->count > 0)
{
printf("Failed with errors:\n");
for (Error *cur = errors->first; cur; cur = cur->next)
{
printf("On line %d of input: %.*s\n", cur->line, S8VArg(cur->message));
}
assert(false);
}
printf("Success.\n");
__debugbreak();
}

@ -1,31 +1,33 @@
Name: Roger Penrose
Description: He is an illusive testing character, who confounds even the most brilliant philosophers and doctors to this date.
Soul:
Situation #0:
Memory #0: I'm being
Memory #1: Silly
Situation #0:
Memory #0: I'm being
Memory #1: Silly
Perception:
In Room: Farm
Characters Around Me:
John:
Health Status: decent
Drunkenness: 0.0
Holding: nothing
Items Around Me:
Whiskey
Perception:
Some testing thing:
In Room: Farm
Characters Around Me:
John:
Health Status: decent
Drunkenness: 0.0
Holding: nothing
Items Around Me:
Whiskey
My Status:
Health Status: decent
Drunkenness: 0.5
Holding: whiskey
My Status:
Health Status: decent
Drunkenness: 0.5
Holding: whiskey
Previous Event #0: John said to me, "Why are you like that bro?"
Previous Event #1: I said to John, "Just because"
Previous Event #0: John said to me, "Why are you like that bro?"
Previous Event #1: I said to John, "Just because"
Response:
Say_To:
To: John
Speech: Why are you like that dude?
Throw:
To: John
Response:
Say_To:
To: John
Speech: Why are you like that dude?
Throw:
To: John
Loading…
Cancel
Save