Compare commits

...

4 Commits

1
.gitignore vendored

@ -1 +1,2 @@
.*
ffdb_test

122
test.c

@ -5,30 +5,136 @@
#include <sys/stat.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define TEST_DB "/tmp/ffdb-test"
static bool test_result( bool result, const char* test_name )
{
if( result ) {
printf( "[ PASS ] %s\n", test_name );
} else {
printf( "[ FAIL ] %s\n", test_name );
}
return result;
}
#define TEST( cond ) if( ((cond)) ) { printf( "\t[TEST] %s passed\n", #cond ); } else { printf( "\t[TEST] %s failed at " __FILE__ ":%d\n", #cond, __LINE__ ); result &= false; }
static void reset()
{
system( "rm -Rf " TEST_DB );
mkdir( TEST_DB, 0755 );
};
static bool test_1_simple_set()
{
bool result = true;
reset();
result &= ffdb_trie_set( TEST_DB, "https://example.com/actor", "T" );
result &= ffdb_trie_set( TEST_DB, "https://apogee.polaris-1.work/owner/actor", "T" );
if( !result ) {
printf( "[FAILED] ffdb test 1: simple set\n" );
return test_result( result, "ffdb test 1: simple set" );
}
static bool test_2_check_overwrite()
{
bool result = true;
reset();
TEST( ffdb_trie_set( TEST_DB, "633", "T" ) );
TEST( ffdb_trie_set( TEST_DB, "632", "T" ) );
TEST( ffdb_trie_set( TEST_DB, "633", "T" ) );
char* value;
TEST( value = ffdb_trie_get( TEST_DB, "633" ) );
TEST( 0 == strcmp(value,"T") );
free(value);
return test_result( result, "ffdb test 2: overwrite test" );
}
static bool test_3_consolidation_on_get()
{
reset();
bool result = true;
FILE* f = fopen(TEST_DB "/%ROOT|", "w");
fprintf(f,"{\"1\":2}\n");
fclose(f);
f = fopen( TEST_DB "/%ROOT|1", "w" );
fprintf(f,"{\"1\":\"T\",\"2\":\"F\"}\n");
fclose(f);
char* value = ffdb_trie_get( TEST_DB, "11" );
TEST( 0 == strcmp(value,"T") );
free(value);
value = ffdb_trie_get( TEST_DB, "12" );
TEST( 0 == strcmp(value,"F") );
free(value);
struct stat s = {0};
TEST( 0 != stat( TEST_DB "/%ROOT|1", &s ) );
TEST( 0 == stat( TEST_DB "/%ROOT|", &s ) );
return test_result( result, "ffdb test 3: consolidation on get" );
}
bool test_4_apogee_stub_corruption()
{
reset();
bool result = true;
TEST( ffdb_trie_set( TEST_DB, "992", "T" ) );
for( int i = 0; i < 3; ++i ) {
TEST( ffdb_trie_set( TEST_DB, "992", NULL ) );
}
TEST( ffdb_trie_set( TEST_DB, "993", NULL ) );
TEST( ffdb_trie_set( TEST_DB, "994", NULL ) );
TEST( ffdb_trie_set( TEST_DB, "995", "T" ) );
for( int i = 0; i < 5; ++i ) {
TEST( ffdb_trie_set( TEST_DB, "995", NULL ) );
}
for( int i = 0; i < 2; ++i ) {
TEST( ffdb_trie_set( TEST_DB, "994", NULL ) );
}
for( int i = 0; i < 2; ++i ) {
TEST( ffdb_trie_set( TEST_DB, "996", "T" ) );
}
TEST( ffdb_trie_set( TEST_DB, "997", NULL ) );
TEST( ffdb_trie_set( TEST_DB, "998", "T" ) );
return result;
char* value;
TEST( !( value = ffdb_trie_get( TEST_DB, "9966" ) ) );
free(value);
TEST( ffdb_trie_set( TEST_DB, "998", NULL ) );
TEST( !( value = ffdb_trie_get( TEST_DB, "9966" ) ) );
free(value);
return test_result( result, "ffdb test 4: apogee stub corruption" );
}
bool ffdb_test()
{
system( "rm -Rvf " TEST_DB );
mkdir( TEST_DB, 0755 );
return true &&
test_1_simple_set()
return true
&& test_1_simple_set()
&& test_2_check_overwrite()
&& test_3_consolidation_on_get()
&& test_4_apogee_stub_corruption()
;
}
#ifdef STANDALONE_TEST
int main()
{
return ffdb_test() ? EXIT_SUCCESS : EXIT_FAILURE;
}
#endif

@ -0,0 +1,15 @@
#!/bin/bash
gcc \
*.c \
$(find ../json -name '*.c') \
$(find ../collections/ -name '*.c' ) \
../sha256/sha256.c \
../util/format.c ../reflect/reflect.c \
-DSTANDALONE_TEST \
$* \
-I ../ \
-o ffdb_test
exec ./ffdb_test

528
trie.c

@ -10,8 +10,9 @@
#include <unistd.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <signal.h>
//#define DEBUG
#define DEBUG
#ifdef DEBUG
#define DEBUG_printf printf
@ -45,16 +46,34 @@ struct trie_entry
char* filename;
char* prefix;
struct trie_entry* file_root;
struct trie_entry* root;
bool dirty;
};
static struct trie_entry* trie_entry_load_and_try_consolidate( const char* filename, struct trie_entry* e, struct edge* ed, bool* needs_freed );
static void trie_entry_free( struct trie_entry* e );
static bool trie_entry_save_to_file( const char* filename, struct trie_entry* e );
static void fixup_consolidated_trie( struct trie_entry* e );
static void trie_entry_free_composite( struct trie_entry* e )
{
if( e->dirty ) {
trie_entry_save_to_file( e->filename, e );
}
for( int i = 0; i < e->edges.count; ++i ) {
free(e->edges.items[i].label);
free(e->edges.items[i].value);
if( e->edges.items[i].child_trie ) {
trie_entry_free( e->edges.items[i].child_trie );
}
e->edges.items[i].child_trie = NULL;
}
free(e->edges.items);
free(e->filename);
free(e->prefix);
free(e->edges.items); e->edges.items = NULL;
free(e->filename); e->filename = NULL;
free(e->prefix); e->prefix = NULL;
}
static void trie_entry_free( struct trie_entry* e )
{
@ -63,32 +82,31 @@ static void trie_entry_free( struct trie_entry* e )
free(e);
}
static bool trie_entry_load( FILE* f, struct trie_entry* e )
static bool trie_entry_load_inside( struct json_pull_parser* jpp, struct trie_entry* e )
{
struct json_pull_parser jpp = {
.f = f,
.curr_state = jpp_initial_state,
};
if( !jpp.f ) { return false; }
assert( e->prefix );
assert( e->filename );
assert( e->file_root );
assert( e->root );
int save;
if( !json_pull_parser_begin_object(&jpp,&save) ) { goto failed; }
if( !json_pull_parser_begin_object(jpp,&save) ) { goto failed; }
e->edges.count = 0;
char* edge_label = NULL;
while(( edge_label = json_pull_parser_read_object_key(&jpp) )) {
while(( edge_label = json_pull_parser_read_object_key(jpp) )) {
e->edges.count += 1;
e->edges.items = realloc( e->edges.items, sizeof(struct edge) * e->edges.count );
struct edge* ed = &e->edges.items[ e->edges.count-1 ];
memset(ed,0,sizeof(*ed));
ed->label = edge_label;
if( json_pull_parser_read_int( &jpp, &ed->count ) ) {
if( json_pull_parser_read_int( jpp, &ed->count ) ) {
continue;
}
char* value = json_pull_parser_read_string(&jpp);
char* value = json_pull_parser_read_string(jpp);
if( value ) {
ed->value = value;
ed->count = 1;
@ -98,44 +116,95 @@ static bool trie_entry_load( FILE* f, struct trie_entry* e )
struct trie_entry* child = NULL;
child = malloc(sizeof(*child));
memset(child,0,sizeof(*child));
if( trie_entry_load( f, child ) ) {
child->prefix = aformat( "%s%s", e->prefix, ed->label );
child->filename = strdup(e->filename);
child->file_root = e->file_root;
child->root = e->root;
if( trie_entry_load_inside( jpp, child ) ) {
ed->child_trie = child;
ed->count = 0;
for( int i = 0; i < child->edges.count; ++i ) {
ed->count += child->edges.items[i].count;
}
continue;
} else {
DEBUG_printf( "failed!!!\n" );
}
free(child);
goto failed;
}
if( !json_pull_parser_end_object(&jpp,&save) ) { goto failed; }
if( !json_pull_parser_end_object(jpp,&save) ) {
DEBUG_printf( "expecting end of object, but didn't get one\n" );
goto failed;
}
return true;
failed:
return false;
}
static bool trie_entry_load( FILE* f, struct trie_entry* e )
{
if( !f ) { return false; }
struct json_pull_parser jpp = {
.f = f,
.curr_state = jpp_initial_state,
};
bool result = trie_entry_load_inside( &jpp, e );
assert( e->prefix );
//e->prefix = strdup("");
if( !result ) {
printf( "parse error. Remaining data:\n" );
int c;
while( (c=fgetc(f)) != EOF ) {
fputc( c, stdout );
}
}
return result;
}
static bool trie_entry_load_from_file( const char* filename, struct trie_entry* e )
{
DEBUG_printf( "Loading trie at %s\n", filename );
assert( !strstr(filename,"(null)") );
e->filename = strdup(filename);
e->file_root = e;
FILE* f = fopen( filename, "r" );
if( !f ) { return false; }
if( !f ) {
printf( "Failed to open file %s\n", filename );
if( strstr(filename,"%ROOT|")[6] ) {
//__builtin_trap();
fflush(stdout);
raise(SIGTRAP);
}
return false;
}
bool result = trie_entry_load( f, e );
fclose(f);
DEBUG_printf( "e = { .edges = [...], .filename = %s, .prefix = %s }\n",
e->filename, e->prefix
);
return result;
}
static void trie_entry_save( FILE* f, struct trie_entry* e )
static void trie_entry_save_inside( FILE* f, struct trie_entry* e, int indent )
{
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->count == 1 && !ed->value ) {
if( ed->count == 1 && !ed->value && !ed->child_trie ) {
fflush(stdout);
raise(SIGTRAP);
// Somehow the count on an edge was lower than the actual number of
// children. Don't save with invalid edge (implicit += 1 to edge count)
return;
@ -154,7 +223,6 @@ static void trie_entry_save( FILE* f, struct trie_entry* e )
}
}
if( lowest != i ) {
struct edge tmp;
memcpy( &tmp, &e->edges.items[i], sizeof(tmp) );
@ -164,27 +232,124 @@ static void trie_entry_save( FILE* f, struct trie_entry* e )
}
fprintf( f, "{" );
indent += 1;
for( int i = 0; i < e->edges.count; ++i ) {
if( i != 0 ) {
fprintf( f, ",\n\t" );
fprintf( f, ",\n" );
} else {
fprintf( f, "\n\t" );
fprintf( f, "\n" );
}
for( int i = 0; i < indent; ++i ) { fprintf( f, "\t" ); }
struct edge* ed = &e->edges.items[i];
json_write_string( f, ed->label );
fprintf( f, ": " );
if( ed->count > 1 ) {
if( ed->child_trie ) {
trie_entry_save_inside( f, ed->child_trie, indent );
} else if( ed->count > 1 ) {
fprintf( f, "%d", ed->count );
} else {
assert( ed->value );
json_write_string( f, ed->value );
}
}
fprintf( f, "\n}\n" );
indent -= 1;
fprintf( f, "\n" );
for( int i = 0; i < indent; ++i ) { fprintf( f, "\t" ); }
fprintf( f, "}" );
}
static void trie_entry_save( FILE* f, struct trie_entry* e )
{
trie_entry_save_inside( f, e, 0 );
}
static size_t trie_entry_measure( struct trie_entry* e )
{
char* data = NULL;
size_t result = 0;
FILE* f = open_memstream( &data, &result );
trie_entry_save( f, e );
fclose(f);
free(data);
DEBUG_printf( "trie_entry_measure( e=%s ) = %ld\n", e->filename, result );
return result;
}
static int trie_entry_count_child_entries( struct trie_entry* e )
{
assert(e);
int result = 0;
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->child_trie ) {
result += trie_entry_count_child_entries( ed->child_trie );
result += 1;
}
}
//DEBUG_printf( "node %p has %d child entries\n", e, result );
return result;
}
static void trie_entry_break_at( struct trie_entry* e, int* break_at )
{
assert(e);
assert(break_at);
assert(e->file_root);
assert(e->file_root->prefix);
assert(e->file_root->filename);
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->child_trie ) {
*break_at -= 1;
if( !*break_at ) {
printf( "\tbreaking node with prefix=%s\n", ed->child_trie->prefix );
printf( "\t\te->root = %p\n", e->root );
printf( "\t\te->root->filename = %s\n", e->root->filename );
//printf( "\t\te->file_root->prefix = %s\n", e->file_root->prefix );
struct trie_entry* broken_e = ed->child_trie;
char* new_filename = aformat( "%s%s", e->root->filename, broken_e->prefix );
printf( "\t\tnew_filename = %s\n", new_filename );
printf( "\t\ted->count = %d\n", ed->count );
broken_e->filename = new_filename;
broken_e->file_root = broken_e;
fixup_consolidated_trie( broken_e );
trie_entry_save_to_file( broken_e->filename, broken_e );
trie_entry_save_to_file( e->file_root->filename, e->file_root );
} else {
trie_entry_break_at( ed->child_trie, break_at );
}
}
if( !*break_at ) {
return;
}
}
}
static bool trie_entry_save_to_file( const char* filename, struct trie_entry* e )
{
DEBUG_printf( "Saving trie to %s\n", filename );
assert(e);
size_t s = trie_entry_measure( e );
if( s > 4096 ) {
printf( "split trie_entry, s=%ld, e=%p, e->file_root=%p\n", s, e, e->file_root );
int count = trie_entry_count_child_entries( e );
printf( "\tchild tries = %d\n", count );
int break_at = rand() % count;
printf( "\tsplitting at child # %d\n", break_at );
trie_entry_break_at( e, &break_at );
}
/*
if( 0 == strcmp(filename,"data/accounts/000m/000k/004/timeline" ) ) {
__builtin_trap();
}
//*/
char tmp_filename[512];
snprintf( tmp_filename, sizeof(tmp_filename), "%s.tmp", filename );
FILE* f = fopen(tmp_filename,"w");
@ -194,6 +359,7 @@ static bool trie_entry_save_to_file( const char* filename, struct trie_entry* e
fclose(f);
rename( tmp_filename, filename );
DEBUG_printf( "Saved trie to %s\n", filename );
return true;
}
@ -279,8 +445,10 @@ static int trie_entry_add_new_edge( struct trie_entry* e, const char* key, const
ed2->label = strdup(key);
ed2->value = strdup(value);
ed2->count = 1;
ed2->child_trie = NULL;
trie_entry_save_to_file( e->file_root->filename, e->file_root );
trie_entry_save_to_file( e->filename, e );
return trie_entry_set_result_added_new;
}
static int trie_entry_delete_existing( struct trie_entry* e, struct edge* ed, const char* key, int parent_count )
@ -294,7 +462,7 @@ static int trie_entry_delete_existing( struct trie_entry* e, struct edge* ed, co
free(tmp.label);
}
e->edges.count -= 1;
trie_entry_save_to_file( e->filename, e );
trie_entry_save_to_file( e->file_root->filename, e->file_root );
return trie_entry_set_result_deleted_existing;
}
static int trie_entry_update_existing( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int parent_count )
@ -305,61 +473,73 @@ static int trie_entry_update_existing( struct trie_entry* e, struct edge* ed, co
return trie_entry_delete_existing( e, ed, key, parent_count );
}
ed->value = strdup(value);
trie_entry_save_to_file( e->filename, e );
/*
if( this_count > parent_count ) {
DEBUG_printf( "size mismatch detected. TODO: correct\n" );
}
*/
trie_entry_save_to_file( e->file_root->filename, e->file_root );
return trie_entry_set_result_updated_existing;
}
static int trie_entry_split_existing_edge( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int prefix_len, int parent_count )
{
assert( e->prefix );
DEBUG_printf( "split_existing_edge\n" );
if( !value ) {
return trie_entry_set_result_no_change;
}
{
char* new_prefix = strndup( key, prefix_len );
char* new_filename = malloc(strlen(e->filename)+prefix_len+1);
strcpy(new_filename,e->filename);
strcat(new_filename,new_prefix);
// Create the new node
struct trie_entry* new_e = malloc(sizeof(struct trie_entry));
memset(new_e,0,sizeof(*new_e));
new_e->filename = new_filename;
new_e->prefix = aformat( "%s%s", e->prefix, new_prefix );
new_e->edges.items = malloc( sizeof(struct edge) * 2 );
memset( new_e->edges.items, 0, sizeof(struct edge) * 2 );
new_e->edges.items[0].label = strdup(&ed->label[prefix_len]);
if( ed->count == 1 ) {
new_e->edges.items[0].value = ed->value;
}
new_e->edges.items[0].count = ed->count;
char* new_prefix = strndup( key, prefix_len );
char* new_filename = malloc(strlen(e->filename)+prefix_len+1);
strcpy(new_filename,e->filename);
strcat(new_filename,new_prefix);
new_e->edges.items[1].label = strdup(&key[prefix_len]);
new_e->edges.items[1].value = strdup(value);
new_e->edges.items[1].count = 1;
new_e->edges.count = 2;
// Create the new node
struct trie_entry* new_e = malloc(sizeof(struct trie_entry));
memset(new_e,0,sizeof(*new_e));
new_e->filename = new_filename;
new_e->prefix = aformat( "%s%s", e->prefix, new_prefix );
// Save to file
trie_entry_save_to_file( new_e->filename, new_e );
new_e->edges.items = malloc( sizeof(struct edge) * 2 );
memset( new_e->edges.items, 0, sizeof(struct edge) * 2 );
// Update the existing edge
free(ed->label);
ed->label = new_prefix;
ed->value = NULL;
ed->count = new_e->edges.items[0].count + new_e->edges.items[1].count;
new_e->edges.items[0].label = strdup(&ed->label[prefix_len]);
if( ed->count == 1 ) {
new_e->edges.items[0].value = ed->value;
}
new_e->edges.items[0].count = ed->count;
new_e->edges.items[0].child_trie = ed->child_trie;
new_e->edges.items[1].label = strdup(&key[prefix_len]);
new_e->edges.items[1].value = strdup(value);
new_e->edges.items[1].count = 1;
new_e->edges.count = 2;
// Update the existing edge
free(ed->label);
ed->label = new_prefix;
ed->value = NULL;
ed->count = new_e->edges.items[0].count + new_e->edges.items[1].count;
size_t s = trie_entry_measure(e->file_root);
if( s > 4096 ) {
DEBUG_printf( "Forcing file split (s=%lu)\n", s );
DEBUG_printf( "new_e = %p\n", new_e );
DEBUG_printf( "e = %p\n", e );
new_e->dirty = true;
trie_entry_free(new_e);
trie_entry_save_to_file( e->filename, e );
ed->child_trie = NULL;
} else {
ed->child_trie = new_e;
new_e->file_root = e->file_root;
new_e->root = e->root;
}
e->file_root->dirty = true;
return trie_entry_set_result_added_new;
}
static int trie_entry_traverse_existing_edge( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int prefix_len, int parent_count )
{
assert( e->prefix );
DEBUG_printf( "traverse_existing_edge\n" );
if( ed->count == 1 ) {
DEBUG_printf( "single value %s\n", ed->label );
@ -374,34 +554,53 @@ static int trie_entry_traverse_existing_edge( struct trie_entry* e, struct edge*
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
DEBUG_printf( "Traversing down %s (filename=%s) key=%s, remaining=%s, count=%d\n", ed->label, filename, key, &key[prefix_len], ed->count );
struct trie_entry branch;
memset(&branch,0,sizeof(branch));
trie_entry_load_from_file( filename, &branch );
int result = trie_entry_set( &branch, &key[prefix_len], value, trie_entry_calculate_size( &branch ) );
int result;
struct trie_entry* branch;
bool needs_freed = false;
if( ed->child_trie ) {
branch = ed->child_trie;
branch->filename = aformat("%s%s", e->filename, ed->label );
branch->prefix = aformat( "%s%s", e->prefix, ed->label );
} else {
branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
}
result = trie_entry_set( branch, &key[prefix_len], value, trie_entry_calculate_size( branch ) );
DEBUG_printf( "result=%d\n", result );
// update count
if( result == trie_entry_set_result_added_new ) {
DEBUG_printf( "Updated edge count +1\n" );
ed->count += 1;
trie_entry_save_to_file( e->filename, e );
} else if( result == trie_entry_set_result_deleted_existing ) {
DEBUG_printf( "Updated edge count -1\n" );
ed->count -= 1;
if( branch.edges.count == 1 ) {
char* new_label = aformat( "%s%s", ed->label, branch.edges.items[0].label );
if( branch->edges.count == 1 ) {
char* new_label = aformat( "%s%s", ed->label, branch->edges.items[0].label );
free(ed->label); ed->label = new_label;
free(ed->value);
if( branch.edges.items[0].value ) {
ed->value = strdup(branch.edges.items[0].value);
if( branch->edges.items[0].value ) {
ed->value = strdup(branch->edges.items[0].value);
} else {
ed->value = NULL;
}
remove( branch.filename );
if( ed->child_trie ) {
ed->child_trie->dirty = false;
trie_entry_free(ed->child_trie);
ed->child_trie = NULL;
} else {
trie_entry_save_to_file( e->file_root->filename, e->file_root );
printf( "deleting (2) %s\n", branch->filename );
remove( branch->filename );
}
}
trie_entry_save_to_file( e->filename, e );
}
trie_entry_free_composite( &branch );
if( needs_freed ) {
trie_entry_free(branch);
}
DEBUG_printf( "Traverse result = %d\n", result );
/*
@ -422,17 +621,20 @@ static int trie_entry_set( struct trie_entry* e, const char* key, const char* va
int key_length = strlen(key);
#ifdef DEBUG
/*
int this_count = trie_entry_calculate_size( e );
DEBUG_printf( "root this_count = %d, parent_count = %d\n", this_count, parent_count );
*/
#endif
DEBUG_printf( "e->prefix = \"%s\"\n", e->prefix );
DEBUG_printf( "key: %s\n", key );
for( int i = 0; i < e->edges.count; ++i ) {
ed = &e->edges.items[i];
prefix_len = prefix_match( key, ed->label );
DEBUG_printf( "label[%d]: %s, prefix_len=%d\n", i, ed->label, prefix_len );
DEBUG_fflush(stdout);
//DEBUG_printf( "label[%d]: %s, prefix_len=%d\n", i, ed->label, prefix_len );
//DEBUG_fflush(stdout);
if( prefix_len == strlen( ed->label ) ) {
if( prefix_len == 0 ) {
if( key_length == 0 ) {
@ -453,6 +655,9 @@ static struct trie_entry* load_root_node( const char* filename )
{
struct trie_entry* root = malloc(sizeof(*root));
memset(root,0,sizeof(*root));
root->prefix = strdup("");
root->file_root = root;
root->root = root;
char buffer[512];
snprintf( buffer, sizeof(buffer), "%s/%%ROOT|", filename );
@ -464,6 +669,7 @@ static struct trie_entry* load_root_node( const char* filename )
bool ffdb_trie_set( const char* filename, const char* key, const char* value )
{
DEBUG_printf( "ffdb_trie_set( filename = '%s', key = '%s', value = '%s' )\n", filename, key, value );
struct trie_entry* root = NULL;
char* key_escaped = escape(key);
bool result = false;
@ -485,9 +691,6 @@ bool ffdb_trie_set( const char* filename, const char* key, const char* value )
printf( "Failed to set %s to %s\n", key, value );
goto failed;
}
if( res != trie_entry_set_result_updated_existing ) {
trie_entry_save_to_file( filename, root );
}
result = true;
cleanup:
@ -496,13 +699,13 @@ cleanup:
flock( fd, LOCK_UN );
close(fd);
printf( "\n" );
return result;
failed:
result = false;
goto cleanup;
}
static char* lookup_key( struct trie_entry* e, const char* key )
{
int key_length = strlen(key);
@ -531,16 +734,24 @@ static char* lookup_key( struct trie_entry* e, const char* key )
const char* remaining = &key[prefix_len];
//printf( "match=%s, remaining=%s\n", ed->label, remaining );
if( ed->count > 1 ) {
if( ed->child_trie ) {
return lookup_key( ed->child_trie, remaining );
} else if( ed->count > 1 ) {
//branch->prefix = aformat( "%s%s", e->prefix, ed->label );
//DEBUG_printf( "e->filename=%s\n", e->filename );
//DEBUG_printf( "branch.prefix=%s\n", branch.prefix );
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
snprintf( filename, sizeof(filename), "%s%s%s", e->root->filename, e->prefix, ed->label );
struct trie_entry branch;
memset(&branch,0,sizeof(branch));
bool needs_freed = false;
struct trie_entry* branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
trie_entry_load_from_file( filename, &branch );
char* result = lookup_key( &branch, remaining );
trie_entry_free_composite(&branch);
char* result = lookup_key( branch, remaining );
if( needs_freed ) {
trie_entry_free(branch);
}
return result;
} else {
if( 0 == strcmp( ed->label, key ) ) {
@ -557,6 +768,7 @@ static char* lookup_key( struct trie_entry* e, const char* key )
char* ffdb_trie_get( const char* filename, const char* key )
{
printf( "ffdb_trie_get( %s, %s )\n", filename, key );
struct trie_entry* root = NULL;
char* key_escaped = escape(key);
char* result = NULL;
@ -569,6 +781,7 @@ char* ffdb_trie_get( const char* filename, const char* key )
cleanup:
free(key_escaped);
trie_entry_free(root);
printf( "\n" );
return result;
failed:
free(result);
@ -617,8 +830,81 @@ struct string_array {
int count;
};
static void fixup_consolidated_trie( struct trie_entry* e )
{
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->child_trie ) {
ed->child_trie->file_root = e->file_root;
free(ed->child_trie->filename);
ed->child_trie->filename = strdup(e->file_root->filename);
fixup_consolidated_trie(ed->child_trie);
}
}
}
static struct trie_entry* trie_entry_load_and_try_consolidate( const char* filename, struct trie_entry* e, struct edge* ed, bool* needs_freed )
{
assert( e->file_root );
assert( e->file_root->filename );
assert( e->root );
assert( e->prefix );
size_t s = trie_entry_measure( e->file_root );
DEBUG_printf( "size=%lu, filename=%s\n", s, e->file_root->filename );
if( s < 4096 ) {
DEBUG_printf( "trie entry %s is %ld bytes, and has file child (%s), candidate for merge\n", e->file_root->filename, s, filename );
ed->child_trie = malloc(sizeof(struct trie_entry));
memset(ed->child_trie,0,sizeof(struct trie_entry));
ed->child_trie->prefix = aformat( "%s%s", e->prefix, ed->label );
ed->child_trie->root = e->root;
trie_entry_load_from_file( filename, ed->child_trie );
ed->child_trie->file_root = e->file_root;
fixup_consolidated_trie( ed->child_trie );
ed->count = 0;
for( int i = 0; i < ed->child_trie->edges.count; ++i ) {
ed->count += ed->child_trie->edges.items[i].count;
}
trie_entry_save_to_file( e->file_root->filename, e->file_root );
DEBUG_printf( "deleting (1) %s\n", filename );
remove(filename);
*needs_freed = false;
assert( ed->child_trie );
return ed->child_trie;
} else {
//DEBUG_printf( "Loading trie at %s\n", filename );
// Load items from the branch
struct trie_entry* branch;
branch = malloc(sizeof(*branch));
memset(branch,0,sizeof(*branch));
branch->prefix = aformat( "%s%s", e->prefix, ed->label );
branch->root = e->root;
trie_entry_load_from_file( filename, branch );
assert( branch );
*needs_freed = true;
return branch;
}
}
static void load_items( struct trie_entry* e, int offset, int limit, struct string_array* keys, struct string_array* values )
{
assert( e );
assert( e->prefix );
assert( e->edges.count == 0 || e->edges.items );
assert( keys || values );
if( limit <= 0 ) { return; }
DEBUG_printf( "Looking at edges in '%s'\n", e->prefix );
@ -636,25 +922,42 @@ static void load_items( struct trie_entry* e, int offset, int limit, struct stri
if( branch_count > limit ) {
branch_count = limit;
}
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
// Load items from the branch
struct trie_entry branch;
memset(&branch,0,sizeof(branch));
trie_entry_load_from_file( filename, &branch );
branch.prefix = aformat( "%s%s", e->prefix, ed->label );
load_items( &branch, offset, branch_count, keys, values );
trie_entry_free_composite( &branch );
offset = 0;
limit -= branch_count;
if( ed->child_trie ) {
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
DEBUG_printf( "Traversing down child trie, effective filename=%s\n", filename );
ed->child_trie->filename = strdup(filename);
ed->child_trie->prefix = aformat( "%s%s", e->prefix, ed->label );
load_items( ed->child_trie, offset, branch_count, keys, values );
offset = 0;
limit -= branch_count;
} else {
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
bool needs_freed = false;
struct trie_entry* branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
load_items( branch, offset, branch_count, keys, values );
if( needs_freed ) {
trie_entry_free(branch);
}
offset = 0;
limit -= branch_count;
}
} else if( limit > 0 ) {
// leaf - include
DEBUG_printf( "Loading from '%s', offset=%d\n", e->prefix, offset );
char* key = aformat( "%s%s", e->prefix, ed->label );
DEBUG_printf( "Loading item %s=%s from '%s', offset=%d\n", key, ed->value, e->prefix, offset );
if( keys ) {
char* str = aformat( "%s%s", e->prefix, ed->label );
array_append( keys, sizeof(str), &str );
array_append( keys, sizeof(key), &key );
} else {
free(key);
}
if( values ) {
char* str = strdup( ed->value );
@ -662,24 +965,34 @@ static void load_items( struct trie_entry* e, int offset, int limit, struct stri
}
limit -= 1;
}
if( limit == 0 ) {
break;
}
}
DEBUG_printf( "Out of edges in '%s'\n", e->prefix );
}
int ffbd_trie_index_for_key( const char* filename, const char* key )
int ffdb_trie_index_for_key( const char* filename, const char* key )
{
return -1;
}
void ffdb_trie_list( const char* filename, int offset, int limit, void* keys_ptr, void* values_ptr )
void ffdb_trie_list( const char* filename, int offset, int limit, void* keys_ptr, void* values_ptr )
{
DEBUG_printf( "ffdb_trie_list( %s, %d, %d, %p, %p )\n", filename, offset, limit, keys_ptr, values_ptr );
struct trie_entry* root = load_root_node( filename );
root->prefix = strdup("");
load_items( root, offset, limit, keys_ptr, values_ptr );
printf( "---- root->filename = %s\n", root->filename );
trie_entry_free(root);
printf( "---- root released\n\n" );
}
bool ffdb_trie_get_index( const char* filename, int offset, char** key, char** value )
{
DEBUG_printf( "ffdb_trie_get_index( %s, %d, %p, %p )\n", filename, offset, key, value );
bool result = false;
/// Convenience function to get single item at offset
@ -709,5 +1022,6 @@ bool ffdb_trie_get_index( const char* filename, int offset, char** key, char**
free(keys.items);
free(values.items);
printf( "\n" );
return result;
}

Loading…
Cancel
Save