You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ffdb/trie.c

1028 lines
26 KiB
C

#include "trie.h"
#include "json/json.h"
#include "collections/array.h"
#include "util/format.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <signal.h>
#define DEBUG
#ifdef DEBUG
#define DEBUG_printf printf
#define DEBUG_fflush fflush
#else
#define DEBUG_printf(...)
#define DEBUG_fflush(...)
#endif
enum {
tt_value = 1,
tt_inline = 2,
tt_extern = 3,
};
struct trie_entry;
struct edge
{
char* label;
char* value;
int count;
struct trie_entry* child_trie;
};
struct trie_entry
{
struct {
struct edge* items;
int count;
} edges;
char* filename;
char* prefix;
struct trie_entry* file_root;
struct trie_entry* root;
bool dirty;
};
static struct trie_entry* trie_entry_load_and_try_consolidate( const char* filename, struct trie_entry* e, struct edge* ed, bool* needs_freed );
static void trie_entry_free( struct trie_entry* e );
static bool trie_entry_save_to_file( const char* filename, struct trie_entry* e );
static void fixup_consolidated_trie( struct trie_entry* e );
static void trie_entry_free_composite( struct trie_entry* e )
{
if( e->dirty ) {
trie_entry_save_to_file( e->filename, e );
}
for( int i = 0; i < e->edges.count; ++i ) {
free(e->edges.items[i].label);
free(e->edges.items[i].value);
if( e->edges.items[i].child_trie ) {
trie_entry_free( e->edges.items[i].child_trie );
}
e->edges.items[i].child_trie = NULL;
}
free(e->edges.items); e->edges.items = NULL;
free(e->filename); e->filename = NULL;
free(e->prefix); e->prefix = NULL;
}
static void trie_entry_free( struct trie_entry* e )
{
if( !e ) { return; }
trie_entry_free_composite(e);
free(e);
}
static bool trie_entry_load_inside( struct json_pull_parser* jpp, struct trie_entry* e )
{
assert( e->prefix );
assert( e->filename );
assert( e->file_root );
assert( e->root );
int save;
if( !json_pull_parser_begin_object(jpp,&save) ) { goto failed; }
e->edges.count = 0;
char* edge_label = NULL;
while(( edge_label = json_pull_parser_read_object_key(jpp) )) {
e->edges.count += 1;
e->edges.items = realloc( e->edges.items, sizeof(struct edge) * e->edges.count );
struct edge* ed = &e->edges.items[ e->edges.count-1 ];
memset(ed,0,sizeof(*ed));
ed->label = edge_label;
if( json_pull_parser_read_int( jpp, &ed->count ) ) {
continue;
}
char* value = json_pull_parser_read_string(jpp);
if( value ) {
ed->value = value;
ed->count = 1;
continue;
}
struct trie_entry* child = NULL;
child = malloc(sizeof(*child));
memset(child,0,sizeof(*child));
child->prefix = aformat( "%s%s", e->prefix, ed->label );
child->filename = strdup(e->filename);
child->file_root = e->file_root;
child->root = e->root;
if( trie_entry_load_inside( jpp, child ) ) {
ed->child_trie = child;
ed->count = 0;
for( int i = 0; i < child->edges.count; ++i ) {
ed->count += child->edges.items[i].count;
}
continue;
} else {
DEBUG_printf( "failed!!!\n" );
}
free(child);
goto failed;
}
if( !json_pull_parser_end_object(jpp,&save) ) {
DEBUG_printf( "expecting end of object, but didn't get one\n" );
goto failed;
}
return true;
failed:
return false;
}
static bool trie_entry_load( FILE* f, struct trie_entry* e )
{
if( !f ) { return false; }
struct json_pull_parser jpp = {
.f = f,
.curr_state = jpp_initial_state,
};
bool result = trie_entry_load_inside( &jpp, e );
assert( e->prefix );
//e->prefix = strdup("");
if( !result ) {
printf( "parse error. Remaining data:\n" );
int c;
while( (c=fgetc(f)) != EOF ) {
fputc( c, stdout );
}
}
return result;
}
static bool trie_entry_load_from_file( const char* filename, struct trie_entry* e )
{
DEBUG_printf( "Loading trie at %s\n", filename );
assert( !strstr(filename,"(null)") );
e->filename = strdup(filename);
e->file_root = e;
FILE* f = fopen( filename, "r" );
if( !f ) {
printf( "Failed to open file %s\n", filename );
if( strstr(filename,"%ROOT|")[6] ) {
//__builtin_trap();
fflush(stdout);
raise(SIGTRAP);
}
return false;
}
bool result = trie_entry_load( f, e );
fclose(f);
DEBUG_printf( "e = { .edges = [...], .filename = %s, .prefix = %s }\n",
e->filename, e->prefix
);
return result;
}
static void trie_entry_save_inside( FILE* f, struct trie_entry* e, int indent )
{
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->count == 1 && !ed->value && !ed->child_trie ) {
fflush(stdout);
raise(SIGTRAP);
// Somehow the count on an edge was lower than the actual number of
// children. Don't save with invalid edge (implicit += 1 to edge count)
return;
}
}
// Selection Sort by edge
for( int i = 0; i < e->edges.count ; ++i ) {
int lowest = i;
const char* lowest_label = e->edges.items[i].label;
for( int j = i + 1; j < e->edges.count; ++j ) {
if( strcmp( lowest_label, e->edges.items[j].label ) < 0 ) {
lowest = j;
lowest_label = e->edges.items[j].label;
}
}
if( lowest != i ) {
struct edge tmp;
memcpy( &tmp, &e->edges.items[i], sizeof(tmp) );
memcpy( &e->edges.items[i], &e->edges.items[lowest], sizeof(tmp) );
memcpy( &e->edges.items[lowest], &tmp, sizeof(tmp) );
}
}
fprintf( f, "{" );
indent += 1;
for( int i = 0; i < e->edges.count; ++i ) {
if( i != 0 ) {
fprintf( f, ",\n" );
} else {
fprintf( f, "\n" );
}
for( int i = 0; i < indent; ++i ) { fprintf( f, "\t" ); }
struct edge* ed = &e->edges.items[i];
json_write_string( f, ed->label );
fprintf( f, ": " );
if( ed->child_trie ) {
trie_entry_save_inside( f, ed->child_trie, indent );
} else if( ed->count > 1 ) {
fprintf( f, "%d", ed->count );
} else {
assert( ed->value );
json_write_string( f, ed->value );
}
}
indent -= 1;
fprintf( f, "\n" );
for( int i = 0; i < indent; ++i ) { fprintf( f, "\t" ); }
fprintf( f, "}" );
}
static void trie_entry_save( FILE* f, struct trie_entry* e )
{
trie_entry_save_inside( f, e, 0 );
}
static size_t trie_entry_measure( struct trie_entry* e )
{
char* data = NULL;
size_t result = 0;
FILE* f = open_memstream( &data, &result );
trie_entry_save( f, e );
fclose(f);
free(data);
DEBUG_printf( "trie_entry_measure( e=%s ) = %ld\n", e->filename, result );
return result;
}
static int trie_entry_count_child_entries( struct trie_entry* e )
{
assert(e);
int result = 0;
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->child_trie ) {
result += trie_entry_count_child_entries( ed->child_trie );
result += 1;
}
}
//DEBUG_printf( "node %p has %d child entries\n", e, result );
return result;
}
static void trie_entry_break_at( struct trie_entry* e, int* break_at )
{
assert(e);
assert(break_at);
assert(e->file_root);
assert(e->file_root->prefix);
assert(e->file_root->filename);
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->child_trie ) {
*break_at -= 1;
if( !*break_at ) {
printf( "\tbreaking node with prefix=%s\n", ed->child_trie->prefix );
printf( "\t\te->root = %p\n", e->root );
printf( "\t\te->root->filename = %s\n", e->root->filename );
//printf( "\t\te->file_root->prefix = %s\n", e->file_root->prefix );
struct trie_entry* broken_e = ed->child_trie;
char* new_filename = aformat( "%s%s", e->root->filename, broken_e->prefix );
printf( "\t\tnew_filename = %s\n", new_filename );
printf( "\t\ted->count = %d\n", ed->count );
broken_e->filename = new_filename;
broken_e->file_root = broken_e;
fixup_consolidated_trie( broken_e );
trie_entry_save_to_file( broken_e->filename, broken_e );
trie_entry_save_to_file( e->file_root->filename, e->file_root );
} else {
trie_entry_break_at( ed->child_trie, break_at );
}
}
if( !*break_at ) {
return;
}
}
}
static bool trie_entry_save_to_file( const char* filename, struct trie_entry* e )
{
DEBUG_printf( "Saving trie to %s\n", filename );
assert(e);
size_t s = trie_entry_measure( e );
if( s > 4096 ) {
printf( "split trie_entry, s=%ld, e=%p, e->file_root=%p\n", s, e, e->file_root );
int count = trie_entry_count_child_entries( e );
printf( "\tchild tries = %d\n", count );
int break_at = rand() % count;
printf( "\tsplitting at child # %d\n", break_at );
trie_entry_break_at( e, &break_at );
}
/*
if( 0 == strcmp(filename,"data/accounts/000m/000k/004/timeline" ) ) {
__builtin_trap();
}
//*/
char tmp_filename[512];
snprintf( tmp_filename, sizeof(tmp_filename), "%s.tmp", filename );
FILE* f = fopen(tmp_filename,"w");
if( !f ) { return false; }
trie_entry_save( f, e );
fclose(f);
rename( tmp_filename, filename );
DEBUG_printf( "Saved trie to %s\n", filename );
return true;
}
// Escape '/' as "%|" and '%' as "%%"
static char* escape( const char* str )
{
int size = 0;
for( const char* i = str; *i; ++i ) {
switch( *i ) {
case '/': size += 2; break;
case '%': size += 2; break;
default: size += 1; break;
}
}
char* result = malloc(size+1);
char* o = result;
for( const char* i = str; *i; ++i ) {
switch( *i ) {
case '/':
o[0] = '%';
o[1] = '|';
o += 2;
break;
case '%':
o[0] = '%';
o[1] = '%';
o += 2;
break;
default:
*o = *i;
++o;
}
}
*o = '\0';
return result;
}
static int prefix_match( const char* a, const char* b )
{
int res = 0;
while( *a && *b && *a == *b ) {
a += 1;
b += 1;
res += 1;
}
return res;
}
static int trie_entry_calculate_size( struct trie_entry* e )
{
int this_count = 0;
struct edge* ed = NULL;
for( int i = 0; i < e->edges.count; ++i ) {
ed = &e->edges.items[i];
this_count += ed->count;
}
return this_count;
}
enum {
trie_entry_set_result_failed_to_add = 0,
trie_entry_set_result_updated_existing = 1,
trie_entry_set_result_added_new = 2,
trie_entry_set_result_deleted_existing = 3,
trie_entry_set_result_no_change = 4,
};
static int trie_entry_set( struct trie_entry* e, const char* key, const char* value, int parent_count );
static int trie_entry_add_new_edge( struct trie_entry* e, const char* key, const char* value, int parent_count )
{
if( !value ) {
DEBUG_printf( "value is null, not adding\n" );
return trie_entry_set_result_no_change;
}
DEBUG_printf( "add_new_edge %s\n", key );
e->edges.count += 1;
e->edges.items = realloc( e->edges.items, sizeof(struct edge) * e->edges.count );
struct edge* ed2 = &e->edges.items[e->edges.count-1];
ed2->label = strdup(key);
ed2->value = strdup(value);
ed2->count = 1;
ed2->child_trie = NULL;
trie_entry_save_to_file( e->file_root->filename, e->file_root );
return trie_entry_set_result_added_new;
}
static int trie_entry_delete_existing( struct trie_entry* e, struct edge* ed, const char* key, int parent_count )
{
int i = ed - e->edges.items;
if( i != e->edges.count - 1 ) {
struct edge tmp;
memcpy( &tmp, ed, sizeof(*ed) );
memcpy( ed, &e->edges.items[ e->edges.count - 1 ], sizeof(*ed) );
free(tmp.label);
}
e->edges.count -= 1;
trie_entry_save_to_file( e->file_root->filename, e->file_root );
return trie_entry_set_result_deleted_existing;
}
static int trie_entry_update_existing( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int parent_count )
{
DEBUG_printf( "updating existing value\n" );
free(ed->value);
if( value == NULL ) {
return trie_entry_delete_existing( e, ed, key, parent_count );
}
ed->value = strdup(value);
trie_entry_save_to_file( e->file_root->filename, e->file_root );
return trie_entry_set_result_updated_existing;
}
static int trie_entry_split_existing_edge( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int prefix_len, int parent_count )
{
assert( e->prefix );
DEBUG_printf( "split_existing_edge\n" );
if( !value ) {
return trie_entry_set_result_no_change;
}
char* new_prefix = strndup( key, prefix_len );
char* new_filename = malloc(strlen(e->filename)+prefix_len+1);
strcpy(new_filename,e->filename);
strcat(new_filename,new_prefix);
// Create the new node
struct trie_entry* new_e = malloc(sizeof(struct trie_entry));
memset(new_e,0,sizeof(*new_e));
new_e->filename = new_filename;
new_e->prefix = aformat( "%s%s", e->prefix, new_prefix );
new_e->edges.items = malloc( sizeof(struct edge) * 2 );
memset( new_e->edges.items, 0, sizeof(struct edge) * 2 );
new_e->edges.items[0].label = strdup(&ed->label[prefix_len]);
if( ed->count == 1 ) {
new_e->edges.items[0].value = ed->value;
}
new_e->edges.items[0].count = ed->count;
new_e->edges.items[0].child_trie = ed->child_trie;
new_e->edges.items[1].label = strdup(&key[prefix_len]);
new_e->edges.items[1].value = strdup(value);
new_e->edges.items[1].count = 1;
new_e->edges.count = 2;
// Update the existing edge
free(ed->label);
ed->label = new_prefix;
ed->value = NULL;
ed->count = new_e->edges.items[0].count + new_e->edges.items[1].count;
size_t s = trie_entry_measure(e->file_root);
if( s > 4096 ) {
DEBUG_printf( "Forcing file split (s=%lu)\n", s );
DEBUG_printf( "new_e = %p\n", new_e );
DEBUG_printf( "e = %p\n", e );
new_e->dirty = true;
trie_entry_free(new_e);
ed->child_trie = NULL;
} else {
ed->child_trie = new_e;
new_e->file_root = e->file_root;
new_e->root = e->root;
}
e->file_root->dirty = true;
return trie_entry_set_result_added_new;
}
static int trie_entry_traverse_existing_edge( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int prefix_len, int parent_count )
{
assert( e->prefix );
DEBUG_printf( "traverse_existing_edge\n" );
if( ed->count == 1 ) {
DEBUG_printf( "single value %s\n", ed->label );
if( 0 == strcmp( ed->label, key ) ) {
return trie_entry_update_existing( e, ed, key, value, parent_count );
} else {
return trie_entry_split_existing_edge( e, ed, key, value, prefix_len, parent_count );
}
}
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
DEBUG_printf( "Traversing down %s (filename=%s) key=%s, remaining=%s, count=%d\n", ed->label, filename, key, &key[prefix_len], ed->count );
int result;
struct trie_entry* branch;
bool needs_freed = false;
if( ed->child_trie ) {
branch = ed->child_trie;
branch->filename = aformat("%s%s", e->filename, ed->label );
branch->prefix = aformat( "%s%s", e->prefix, ed->label );
} else {
branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
}
result = trie_entry_set( branch, &key[prefix_len], value, trie_entry_calculate_size( branch ) );
DEBUG_printf( "result=%d\n", result );
// update count
if( result == trie_entry_set_result_added_new ) {
DEBUG_printf( "Updated edge count +1\n" );
ed->count += 1;
} else if( result == trie_entry_set_result_deleted_existing ) {
DEBUG_printf( "Updated edge count -1\n" );
ed->count -= 1;
if( branch->edges.count == 1 ) {
char* new_label = aformat( "%s%s", ed->label, branch->edges.items[0].label );
free(ed->label); ed->label = new_label;
free(ed->value);
if( branch->edges.items[0].value ) {
ed->value = strdup(branch->edges.items[0].value);
} else {
ed->value = NULL;
}
if( ed->child_trie ) {
ed->child_trie->dirty = false;
trie_entry_free(ed->child_trie);
ed->child_trie = NULL;
} else {
trie_entry_save_to_file( e->file_root->filename, e->file_root );
printf( "deleting (2) %s\n", branch->filename );
remove( branch->filename );
}
}
}
if( needs_freed ) {
trie_entry_free(branch);
}
DEBUG_printf( "Traverse result = %d\n", result );
/*
if( parent_count > this_count ) {
if( result == trie_entry_set_result_updated_existing ) {
DEBUG_printf( "Fixing up parent count parent_count = %d, this_count = %d\n", parent_count, this_count );
return trie_entry_set_result_deleted_existing;
}
}
*/
return result;
}
static int trie_entry_set( struct trie_entry* e, const char* key, const char* value, int parent_count )
{
int prefix_len = 0;
struct edge* ed;
int key_length = strlen(key);
#ifdef DEBUG
/*
int this_count = trie_entry_calculate_size( e );
DEBUG_printf( "root this_count = %d, parent_count = %d\n", this_count, parent_count );
*/
#endif
DEBUG_printf( "e->prefix = \"%s\"\n", e->prefix );
DEBUG_printf( "key: %s\n", key );
for( int i = 0; i < e->edges.count; ++i ) {
ed = &e->edges.items[i];
prefix_len = prefix_match( key, ed->label );
//DEBUG_printf( "label[%d]: %s, prefix_len=%d\n", i, ed->label, prefix_len );
//DEBUG_fflush(stdout);
if( prefix_len == strlen( ed->label ) ) {
if( prefix_len == 0 ) {
if( key_length == 0 ) {
return trie_entry_traverse_existing_edge( e, ed, key, value, prefix_len, parent_count );
}
} else {
return trie_entry_traverse_existing_edge( e, ed, key, value, prefix_len, parent_count );
}
} else if( prefix_len != 0 ) {
return trie_entry_split_existing_edge( e, ed, key, value, prefix_len, parent_count );
}
}
return trie_entry_add_new_edge( e, key, value, parent_count );
}
static struct trie_entry* load_root_node( const char* filename )
{
struct trie_entry* root = malloc(sizeof(*root));
memset(root,0,sizeof(*root));
root->prefix = strdup("");
root->file_root = root;
root->root = root;
char buffer[512];
snprintf( buffer, sizeof(buffer), "%s/%%ROOT|", filename );
trie_entry_load_from_file( buffer, root );
return root;
}
bool ffdb_trie_set( const char* filename, const char* key, const char* value )
{
DEBUG_printf( "ffdb_trie_set( filename = '%s', key = '%s', value = '%s' )\n", filename, key, value );
struct trie_entry* root = NULL;
char* key_escaped = escape(key);
bool result = false;
mkdir( filename, 0755 );
char buffer[512];
snprintf( buffer,sizeof(buffer), "%s/.lock", filename );
int fd = open( buffer, O_CREAT );
flock( fd, LOCK_EX );
root = load_root_node(filename);
int this_count = 0;
for( int i = 0; i < root->edges.count; ++i ) {
this_count += root->edges.items[i].count;
}
int res = trie_entry_set( root, key_escaped, value, this_count );
if( !res ) {
printf( "Failed to set %s to %s\n", key, value );
goto failed;
}
result = true;
cleanup:
free(key_escaped);
trie_entry_free(root);
flock( fd, LOCK_UN );
close(fd);
printf( "\n" );
return result;
failed:
result = false;
goto cleanup;
}
static char* lookup_key( struct trie_entry* e, const char* key )
{
int key_length = strlen(key);
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
int prefix_len = prefix_match( ed->label, key );
bool match = false;
//int label_length = strlen(ed->label);
if( prefix_len == strlen(ed->label ) ) {
if( prefix_len == 0 ) {
// terminal requires exact match
if( key_length == 0 ) {
match = true;
} else {
match = false;
}
} else {
match = true;
}
}
if( match ) {
const char* remaining = &key[prefix_len];
//printf( "match=%s, remaining=%s\n", ed->label, remaining );
if( ed->child_trie ) {
return lookup_key( ed->child_trie, remaining );
} else if( ed->count > 1 ) {
//branch->prefix = aformat( "%s%s", e->prefix, ed->label );
//DEBUG_printf( "e->filename=%s\n", e->filename );
//DEBUG_printf( "branch.prefix=%s\n", branch.prefix );
char filename[512];
snprintf( filename, sizeof(filename), "%s%s%s", e->root->filename, e->prefix, ed->label );
bool needs_freed = false;
struct trie_entry* branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
char* result = lookup_key( branch, remaining );
if( needs_freed ) {
trie_entry_free(branch);
}
return result;
} else {
if( 0 == strcmp( ed->label, key ) ) {
//printf( "ed->label=%s, result=%s\n", ed->label, ed->value );
return strdup(ed->value);
} else {
return NULL;
}
}
}
}
return NULL;
}
char* ffdb_trie_get( const char* filename, const char* key )
{
printf( "ffdb_trie_get( %s, %s )\n", filename, key );
struct trie_entry* root = NULL;
char* key_escaped = escape(key);
char* result = NULL;
root = load_root_node(filename);
if( !root ) { goto failed; }
result = lookup_key( root, key_escaped );
cleanup:
free(key_escaped);
trie_entry_free(root);
printf( "\n" );
return result;
failed:
free(result);
result = NULL;
goto cleanup;
}
bool ffdb_trie_remove( const char* filename, const char* key )
{
return ffdb_trie_set( filename, key, NULL );
}
int ffdb_trie_count( const char* filename )
{
struct trie_entry* root = NULL;
int result = 0;
root = load_root_node(filename);
if( !root ) { goto failed; }
for( int i = 0; i < root->edges.count; ++i ) {
result += root->edges.items[i].count;
}
cleanup:
trie_entry_free(root);
return result;
failed:
result = -1;
goto cleanup;
}
void ffdb_trie_clean( const char* filename )
{
struct trie_entry* root = NULL;
//int result = 0;
root = load_root_node(filename);
if( !root ) { goto failed; }
cleanup:
trie_entry_free(root);
return;
failed:
goto cleanup;
}
struct string_array {
char** items;
int count;
};
static void fixup_consolidated_trie( struct trie_entry* e )
{
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->child_trie ) {
ed->child_trie->file_root = e->file_root;
free(ed->child_trie->filename);
ed->child_trie->filename = strdup(e->file_root->filename);
fixup_consolidated_trie(ed->child_trie);
}
}
}
static struct trie_entry* trie_entry_load_and_try_consolidate( const char* filename, struct trie_entry* e, struct edge* ed, bool* needs_freed )
{
assert( e->file_root );
assert( e->file_root->filename );
assert( e->root );
assert( e->prefix );
size_t s = trie_entry_measure( e->file_root );
DEBUG_printf( "size=%lu, filename=%s\n", s, e->file_root->filename );
if( s < 4096 ) {
DEBUG_printf( "trie entry %s is %ld bytes, and has file child (%s), candidate for merge\n", e->file_root->filename, s, filename );
ed->child_trie = malloc(sizeof(struct trie_entry));
memset(ed->child_trie,0,sizeof(struct trie_entry));
ed->child_trie->prefix = aformat( "%s%s", e->prefix, ed->label );
ed->child_trie->root = e->root;
trie_entry_load_from_file( filename, ed->child_trie );
ed->child_trie->file_root = e->file_root;
fixup_consolidated_trie( ed->child_trie );
ed->count = 0;
for( int i = 0; i < ed->child_trie->edges.count; ++i ) {
ed->count += ed->child_trie->edges.items[i].count;
}
trie_entry_save_to_file( e->file_root->filename, e->file_root );
DEBUG_printf( "deleting (1) %s\n", filename );
remove(filename);
*needs_freed = false;
assert( ed->child_trie );
return ed->child_trie;
} else {
//DEBUG_printf( "Loading trie at %s\n", filename );
// Load items from the branch
struct trie_entry* branch;
branch = malloc(sizeof(*branch));
memset(branch,0,sizeof(*branch));
branch->prefix = aformat( "%s%s", e->prefix, ed->label );
branch->root = e->root;
trie_entry_load_from_file( filename, branch );
assert( branch );
*needs_freed = true;
return branch;
}
}
static void load_items( struct trie_entry* e, int offset, int limit, struct string_array* keys, struct string_array* values )
{
assert( e );
assert( e->prefix );
assert( e->edges.count == 0 || e->edges.items );
assert( keys || values );
if( limit <= 0 ) { return; }
DEBUG_printf( "Looking at edges in '%s'\n", e->prefix );
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
DEBUG_printf( "ed = { .label = '%s', .value = '%s', .count = %d }\n", ed->label, ed->value, ed->count );
if( ed->count <= offset ) {
DEBUG_printf( "Skipping all for '%s' offset=%d\n", e->prefix, offset );
// branch - skip all
offset -= ed->count;
} else if( ed->count > 1 ) {
// branch - skip part, include part
int branch_count = ed->count - offset;
DEBUG_printf( "Skipping part of '%s', offset=%d, branch_count=%d, limit=%d\n", e->prefix, offset, branch_count, limit );
if( branch_count > limit ) {
branch_count = limit;
}
if( ed->child_trie ) {
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
DEBUG_printf( "Traversing down child trie, effective filename=%s\n", filename );
ed->child_trie->filename = strdup(filename);
ed->child_trie->prefix = aformat( "%s%s", e->prefix, ed->label );
load_items( ed->child_trie, offset, branch_count, keys, values );
offset = 0;
limit -= branch_count;
} else {
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
bool needs_freed = false;
struct trie_entry* branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
load_items( branch, offset, branch_count, keys, values );
if( needs_freed ) {
trie_entry_free(branch);
}
offset = 0;
limit -= branch_count;
}
} else if( limit > 0 ) {
// leaf - include
char* key = aformat( "%s%s", e->prefix, ed->label );
DEBUG_printf( "Loading item %s=%s from '%s', offset=%d\n", key, ed->value, e->prefix, offset );
if( keys ) {
array_append( keys, sizeof(key), &key );
} else {
free(key);
}
if( values ) {
char* str = strdup( ed->value );
array_append( values, sizeof(str), &str );
}
limit -= 1;
}
if( limit == 0 ) {
break;
}
}
DEBUG_printf( "Out of edges in '%s'\n", e->prefix );
}
int ffdb_trie_index_for_key( const char* filename, const char* key )
{
return -1;
}
void ffdb_trie_list( const char* filename, int offset, int limit, void* keys_ptr, void* values_ptr )
{
DEBUG_printf( "ffdb_trie_list( %s, %d, %d, %p, %p )\n", filename, offset, limit, keys_ptr, values_ptr );
struct trie_entry* root = load_root_node( filename );
root->prefix = strdup("");
load_items( root, offset, limit, keys_ptr, values_ptr );
printf( "---- root->filename = %s\n", root->filename );
trie_entry_free(root);
printf( "---- root released\n\n" );
}
bool ffdb_trie_get_index( const char* filename, int offset, char** key, char** value )
{
DEBUG_printf( "ffdb_trie_get_index( %s, %d, %p, %p )\n", filename, offset, key, value );
bool result = false;
/// Convenience function to get single item at offset
struct string_array keys;
struct string_array values;
memset(&keys,0,sizeof(keys));
memset(&values,0,sizeof(values));
ffdb_trie_list( filename, offset, 1, &keys, value ? &values : NULL );
if( keys.count == 0 ) {
result = false;
} else {
result = true;
if( key ) {
*key = keys.items[0];
} else {
free( keys.items[0] );
}
if( value ) {
*value = values.items[0];
} else {
free( values.items[0] );
}
}
free(keys.items);
free(values.items);
printf( "\n" );
return result;
}