You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1028 lines
26 KiB
C
1028 lines
26 KiB
C
#include "trie.h"
|
|
|
|
#include "json/json.h"
|
|
#include "collections/array.h"
|
|
#include "util/format.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/file.h>
|
|
#include <signal.h>
|
|
|
|
#define DEBUG
|
|
|
|
#ifdef DEBUG
|
|
#define DEBUG_printf printf
|
|
#define DEBUG_fflush fflush
|
|
#else
|
|
#define DEBUG_printf(...)
|
|
#define DEBUG_fflush(...)
|
|
#endif
|
|
|
|
enum {
|
|
tt_value = 1,
|
|
tt_inline = 2,
|
|
tt_extern = 3,
|
|
};
|
|
|
|
struct trie_entry;
|
|
struct edge
|
|
{
|
|
char* label;
|
|
char* value;
|
|
int count;
|
|
struct trie_entry* child_trie;
|
|
};
|
|
|
|
struct trie_entry
|
|
{
|
|
struct {
|
|
struct edge* items;
|
|
int count;
|
|
} edges;
|
|
|
|
char* filename;
|
|
char* prefix;
|
|
struct trie_entry* file_root;
|
|
struct trie_entry* root;
|
|
|
|
bool dirty;
|
|
};
|
|
|
|
static struct trie_entry* trie_entry_load_and_try_consolidate( const char* filename, struct trie_entry* e, struct edge* ed, bool* needs_freed );
|
|
static void trie_entry_free( struct trie_entry* e );
|
|
static bool trie_entry_save_to_file( const char* filename, struct trie_entry* e );
|
|
static void fixup_consolidated_trie( struct trie_entry* e );
|
|
|
|
static void trie_entry_free_composite( struct trie_entry* e )
|
|
{
|
|
if( e->dirty ) {
|
|
trie_entry_save_to_file( e->filename, e );
|
|
}
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
free(e->edges.items[i].label);
|
|
free(e->edges.items[i].value);
|
|
if( e->edges.items[i].child_trie ) {
|
|
trie_entry_free( e->edges.items[i].child_trie );
|
|
}
|
|
|
|
e->edges.items[i].child_trie = NULL;
|
|
}
|
|
free(e->edges.items); e->edges.items = NULL;
|
|
free(e->filename); e->filename = NULL;
|
|
free(e->prefix); e->prefix = NULL;
|
|
}
|
|
static void trie_entry_free( struct trie_entry* e )
|
|
{
|
|
if( !e ) { return; }
|
|
trie_entry_free_composite(e);
|
|
free(e);
|
|
}
|
|
|
|
static bool trie_entry_load_inside( struct json_pull_parser* jpp, struct trie_entry* e )
|
|
{
|
|
assert( e->prefix );
|
|
assert( e->filename );
|
|
assert( e->file_root );
|
|
assert( e->root );
|
|
|
|
int save;
|
|
if( !json_pull_parser_begin_object(jpp,&save) ) { goto failed; }
|
|
|
|
e->edges.count = 0;
|
|
|
|
char* edge_label = NULL;
|
|
while(( edge_label = json_pull_parser_read_object_key(jpp) )) {
|
|
e->edges.count += 1;
|
|
e->edges.items = realloc( e->edges.items, sizeof(struct edge) * e->edges.count );
|
|
struct edge* ed = &e->edges.items[ e->edges.count-1 ];
|
|
memset(ed,0,sizeof(*ed));
|
|
|
|
ed->label = edge_label;
|
|
if( json_pull_parser_read_int( jpp, &ed->count ) ) {
|
|
continue;
|
|
}
|
|
|
|
char* value = json_pull_parser_read_string(jpp);
|
|
if( value ) {
|
|
ed->value = value;
|
|
ed->count = 1;
|
|
continue;
|
|
}
|
|
|
|
struct trie_entry* child = NULL;
|
|
child = malloc(sizeof(*child));
|
|
memset(child,0,sizeof(*child));
|
|
child->prefix = aformat( "%s%s", e->prefix, ed->label );
|
|
child->filename = strdup(e->filename);
|
|
child->file_root = e->file_root;
|
|
child->root = e->root;
|
|
if( trie_entry_load_inside( jpp, child ) ) {
|
|
ed->child_trie = child;
|
|
ed->count = 0;
|
|
for( int i = 0; i < child->edges.count; ++i ) {
|
|
ed->count += child->edges.items[i].count;
|
|
}
|
|
continue;
|
|
} else {
|
|
DEBUG_printf( "failed!!!\n" );
|
|
}
|
|
|
|
free(child);
|
|
goto failed;
|
|
}
|
|
|
|
if( !json_pull_parser_end_object(jpp,&save) ) {
|
|
DEBUG_printf( "expecting end of object, but didn't get one\n" );
|
|
goto failed;
|
|
}
|
|
|
|
return true;
|
|
failed:
|
|
return false;
|
|
}
|
|
static bool trie_entry_load( FILE* f, struct trie_entry* e )
|
|
{
|
|
if( !f ) { return false; }
|
|
|
|
struct json_pull_parser jpp = {
|
|
.f = f,
|
|
.curr_state = jpp_initial_state,
|
|
};
|
|
|
|
bool result = trie_entry_load_inside( &jpp, e );
|
|
|
|
assert( e->prefix );
|
|
//e->prefix = strdup("");
|
|
|
|
if( !result ) {
|
|
printf( "parse error. Remaining data:\n" );
|
|
int c;
|
|
while( (c=fgetc(f)) != EOF ) {
|
|
fputc( c, stdout );
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
static bool trie_entry_load_from_file( const char* filename, struct trie_entry* e )
|
|
{
|
|
DEBUG_printf( "Loading trie at %s\n", filename );
|
|
|
|
assert( !strstr(filename,"(null)") );
|
|
|
|
e->filename = strdup(filename);
|
|
e->file_root = e;
|
|
|
|
FILE* f = fopen( filename, "r" );
|
|
if( !f ) {
|
|
printf( "Failed to open file %s\n", filename );
|
|
if( strstr(filename,"%ROOT|")[6] ) {
|
|
//__builtin_trap();
|
|
fflush(stdout);
|
|
raise(SIGTRAP);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool result = trie_entry_load( f, e );
|
|
fclose(f);
|
|
|
|
DEBUG_printf( "e = { .edges = [...], .filename = %s, .prefix = %s }\n",
|
|
e->filename, e->prefix
|
|
);
|
|
|
|
return result;
|
|
}
|
|
|
|
static void trie_entry_save_inside( FILE* f, struct trie_entry* e, int indent )
|
|
{
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
struct edge* ed = &e->edges.items[i];
|
|
if( ed->count == 1 && !ed->value && !ed->child_trie ) {
|
|
fflush(stdout);
|
|
raise(SIGTRAP);
|
|
// Somehow the count on an edge was lower than the actual number of
|
|
// children. Don't save with invalid edge (implicit += 1 to edge count)
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Selection Sort by edge
|
|
for( int i = 0; i < e->edges.count ; ++i ) {
|
|
int lowest = i;
|
|
const char* lowest_label = e->edges.items[i].label;
|
|
|
|
for( int j = i + 1; j < e->edges.count; ++j ) {
|
|
if( strcmp( lowest_label, e->edges.items[j].label ) < 0 ) {
|
|
lowest = j;
|
|
lowest_label = e->edges.items[j].label;
|
|
}
|
|
}
|
|
|
|
if( lowest != i ) {
|
|
struct edge tmp;
|
|
memcpy( &tmp, &e->edges.items[i], sizeof(tmp) );
|
|
memcpy( &e->edges.items[i], &e->edges.items[lowest], sizeof(tmp) );
|
|
memcpy( &e->edges.items[lowest], &tmp, sizeof(tmp) );
|
|
}
|
|
}
|
|
|
|
fprintf( f, "{" );
|
|
indent += 1;
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
if( i != 0 ) {
|
|
fprintf( f, ",\n" );
|
|
} else {
|
|
fprintf( f, "\n" );
|
|
}
|
|
for( int i = 0; i < indent; ++i ) { fprintf( f, "\t" ); }
|
|
|
|
struct edge* ed = &e->edges.items[i];
|
|
json_write_string( f, ed->label );
|
|
fprintf( f, ": " );
|
|
if( ed->child_trie ) {
|
|
trie_entry_save_inside( f, ed->child_trie, indent );
|
|
} else if( ed->count > 1 ) {
|
|
fprintf( f, "%d", ed->count );
|
|
} else {
|
|
assert( ed->value );
|
|
json_write_string( f, ed->value );
|
|
}
|
|
}
|
|
indent -= 1;
|
|
fprintf( f, "\n" );
|
|
for( int i = 0; i < indent; ++i ) { fprintf( f, "\t" ); }
|
|
fprintf( f, "}" );
|
|
}
|
|
static void trie_entry_save( FILE* f, struct trie_entry* e )
|
|
{
|
|
trie_entry_save_inside( f, e, 0 );
|
|
}
|
|
static size_t trie_entry_measure( struct trie_entry* e )
|
|
{
|
|
char* data = NULL;
|
|
size_t result = 0;
|
|
FILE* f = open_memstream( &data, &result );
|
|
trie_entry_save( f, e );
|
|
fclose(f);
|
|
free(data);
|
|
DEBUG_printf( "trie_entry_measure( e=%s ) = %ld\n", e->filename, result );
|
|
return result;
|
|
}
|
|
static int trie_entry_count_child_entries( struct trie_entry* e )
|
|
{
|
|
assert(e);
|
|
|
|
int result = 0;
|
|
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
struct edge* ed = &e->edges.items[i];
|
|
if( ed->child_trie ) {
|
|
result += trie_entry_count_child_entries( ed->child_trie );
|
|
result += 1;
|
|
}
|
|
}
|
|
|
|
//DEBUG_printf( "node %p has %d child entries\n", e, result );
|
|
return result;
|
|
}
|
|
static void trie_entry_break_at( struct trie_entry* e, int* break_at )
|
|
{
|
|
assert(e);
|
|
assert(break_at);
|
|
assert(e->file_root);
|
|
assert(e->file_root->prefix);
|
|
assert(e->file_root->filename);
|
|
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
struct edge* ed = &e->edges.items[i];
|
|
if( ed->child_trie ) {
|
|
*break_at -= 1;
|
|
if( !*break_at ) {
|
|
printf( "\tbreaking node with prefix=%s\n", ed->child_trie->prefix );
|
|
printf( "\t\te->root = %p\n", e->root );
|
|
printf( "\t\te->root->filename = %s\n", e->root->filename );
|
|
//printf( "\t\te->file_root->prefix = %s\n", e->file_root->prefix );
|
|
|
|
struct trie_entry* broken_e = ed->child_trie;
|
|
|
|
char* new_filename = aformat( "%s%s", e->root->filename, broken_e->prefix );
|
|
printf( "\t\tnew_filename = %s\n", new_filename );
|
|
printf( "\t\ted->count = %d\n", ed->count );
|
|
|
|
broken_e->filename = new_filename;
|
|
broken_e->file_root = broken_e;
|
|
fixup_consolidated_trie( broken_e );
|
|
trie_entry_save_to_file( broken_e->filename, broken_e );
|
|
trie_entry_save_to_file( e->file_root->filename, e->file_root );
|
|
} else {
|
|
trie_entry_break_at( ed->child_trie, break_at );
|
|
}
|
|
}
|
|
|
|
if( !*break_at ) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
static bool trie_entry_save_to_file( const char* filename, struct trie_entry* e )
|
|
{
|
|
DEBUG_printf( "Saving trie to %s\n", filename );
|
|
|
|
assert(e);
|
|
|
|
size_t s = trie_entry_measure( e );
|
|
if( s > 4096 ) {
|
|
printf( "split trie_entry, s=%ld, e=%p, e->file_root=%p\n", s, e, e->file_root );
|
|
int count = trie_entry_count_child_entries( e );
|
|
printf( "\tchild tries = %d\n", count );
|
|
int break_at = rand() % count;
|
|
printf( "\tsplitting at child # %d\n", break_at );
|
|
trie_entry_break_at( e, &break_at );
|
|
}
|
|
|
|
/*
|
|
if( 0 == strcmp(filename,"data/accounts/000m/000k/004/timeline" ) ) {
|
|
__builtin_trap();
|
|
}
|
|
//*/
|
|
char tmp_filename[512];
|
|
snprintf( tmp_filename, sizeof(tmp_filename), "%s.tmp", filename );
|
|
FILE* f = fopen(tmp_filename,"w");
|
|
if( !f ) { return false; }
|
|
|
|
trie_entry_save( f, e );
|
|
|
|
fclose(f);
|
|
rename( tmp_filename, filename );
|
|
DEBUG_printf( "Saved trie to %s\n", filename );
|
|
return true;
|
|
}
|
|
|
|
// Escape '/' as "%|" and '%' as "%%"
|
|
static char* escape( const char* str )
|
|
{
|
|
int size = 0;
|
|
for( const char* i = str; *i; ++i ) {
|
|
switch( *i ) {
|
|
case '/': size += 2; break;
|
|
case '%': size += 2; break;
|
|
default: size += 1; break;
|
|
}
|
|
}
|
|
|
|
char* result = malloc(size+1);
|
|
char* o = result;
|
|
for( const char* i = str; *i; ++i ) {
|
|
switch( *i ) {
|
|
case '/':
|
|
o[0] = '%';
|
|
o[1] = '|';
|
|
o += 2;
|
|
break;
|
|
case '%':
|
|
o[0] = '%';
|
|
o[1] = '%';
|
|
o += 2;
|
|
break;
|
|
default:
|
|
*o = *i;
|
|
++o;
|
|
}
|
|
}
|
|
*o = '\0';
|
|
|
|
return result;
|
|
}
|
|
|
|
static int prefix_match( const char* a, const char* b )
|
|
{
|
|
int res = 0;
|
|
while( *a && *b && *a == *b ) {
|
|
a += 1;
|
|
b += 1;
|
|
res += 1;
|
|
}
|
|
return res;
|
|
}
|
|
static int trie_entry_calculate_size( struct trie_entry* e )
|
|
{
|
|
int this_count = 0;
|
|
struct edge* ed = NULL;
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
ed = &e->edges.items[i];
|
|
this_count += ed->count;
|
|
}
|
|
|
|
return this_count;
|
|
}
|
|
|
|
enum {
|
|
trie_entry_set_result_failed_to_add = 0,
|
|
trie_entry_set_result_updated_existing = 1,
|
|
trie_entry_set_result_added_new = 2,
|
|
trie_entry_set_result_deleted_existing = 3,
|
|
trie_entry_set_result_no_change = 4,
|
|
};
|
|
static int trie_entry_set( struct trie_entry* e, const char* key, const char* value, int parent_count );
|
|
static int trie_entry_add_new_edge( struct trie_entry* e, const char* key, const char* value, int parent_count )
|
|
{
|
|
if( !value ) {
|
|
DEBUG_printf( "value is null, not adding\n" );
|
|
return trie_entry_set_result_no_change;
|
|
}
|
|
|
|
DEBUG_printf( "add_new_edge %s\n", key );
|
|
|
|
e->edges.count += 1;
|
|
e->edges.items = realloc( e->edges.items, sizeof(struct edge) * e->edges.count );
|
|
|
|
struct edge* ed2 = &e->edges.items[e->edges.count-1];
|
|
ed2->label = strdup(key);
|
|
ed2->value = strdup(value);
|
|
ed2->count = 1;
|
|
ed2->child_trie = NULL;
|
|
|
|
trie_entry_save_to_file( e->file_root->filename, e->file_root );
|
|
|
|
return trie_entry_set_result_added_new;
|
|
}
|
|
static int trie_entry_delete_existing( struct trie_entry* e, struct edge* ed, const char* key, int parent_count )
|
|
{
|
|
int i = ed - e->edges.items;
|
|
if( i != e->edges.count - 1 ) {
|
|
struct edge tmp;
|
|
memcpy( &tmp, ed, sizeof(*ed) );
|
|
memcpy( ed, &e->edges.items[ e->edges.count - 1 ], sizeof(*ed) );
|
|
|
|
free(tmp.label);
|
|
}
|
|
e->edges.count -= 1;
|
|
trie_entry_save_to_file( e->file_root->filename, e->file_root );
|
|
return trie_entry_set_result_deleted_existing;
|
|
}
|
|
static int trie_entry_update_existing( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int parent_count )
|
|
{
|
|
DEBUG_printf( "updating existing value\n" );
|
|
free(ed->value);
|
|
if( value == NULL ) {
|
|
return trie_entry_delete_existing( e, ed, key, parent_count );
|
|
}
|
|
ed->value = strdup(value);
|
|
trie_entry_save_to_file( e->file_root->filename, e->file_root );
|
|
|
|
return trie_entry_set_result_updated_existing;
|
|
}
|
|
static int trie_entry_split_existing_edge( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int prefix_len, int parent_count )
|
|
{
|
|
assert( e->prefix );
|
|
|
|
DEBUG_printf( "split_existing_edge\n" );
|
|
if( !value ) {
|
|
return trie_entry_set_result_no_change;
|
|
}
|
|
|
|
char* new_prefix = strndup( key, prefix_len );
|
|
char* new_filename = malloc(strlen(e->filename)+prefix_len+1);
|
|
strcpy(new_filename,e->filename);
|
|
strcat(new_filename,new_prefix);
|
|
|
|
// Create the new node
|
|
struct trie_entry* new_e = malloc(sizeof(struct trie_entry));
|
|
memset(new_e,0,sizeof(*new_e));
|
|
new_e->filename = new_filename;
|
|
new_e->prefix = aformat( "%s%s", e->prefix, new_prefix );
|
|
|
|
new_e->edges.items = malloc( sizeof(struct edge) * 2 );
|
|
memset( new_e->edges.items, 0, sizeof(struct edge) * 2 );
|
|
|
|
new_e->edges.items[0].label = strdup(&ed->label[prefix_len]);
|
|
if( ed->count == 1 ) {
|
|
new_e->edges.items[0].value = ed->value;
|
|
}
|
|
new_e->edges.items[0].count = ed->count;
|
|
new_e->edges.items[0].child_trie = ed->child_trie;
|
|
|
|
new_e->edges.items[1].label = strdup(&key[prefix_len]);
|
|
new_e->edges.items[1].value = strdup(value);
|
|
new_e->edges.items[1].count = 1;
|
|
new_e->edges.count = 2;
|
|
|
|
// Update the existing edge
|
|
free(ed->label);
|
|
ed->label = new_prefix;
|
|
ed->value = NULL;
|
|
ed->count = new_e->edges.items[0].count + new_e->edges.items[1].count;
|
|
|
|
size_t s = trie_entry_measure(e->file_root);
|
|
if( s > 4096 ) {
|
|
DEBUG_printf( "Forcing file split (s=%lu)\n", s );
|
|
DEBUG_printf( "new_e = %p\n", new_e );
|
|
DEBUG_printf( "e = %p\n", e );
|
|
new_e->dirty = true;
|
|
trie_entry_free(new_e);
|
|
ed->child_trie = NULL;
|
|
} else {
|
|
ed->child_trie = new_e;
|
|
new_e->file_root = e->file_root;
|
|
new_e->root = e->root;
|
|
}
|
|
|
|
e->file_root->dirty = true;
|
|
|
|
return trie_entry_set_result_added_new;
|
|
}
|
|
static int trie_entry_traverse_existing_edge( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int prefix_len, int parent_count )
|
|
{
|
|
assert( e->prefix );
|
|
|
|
DEBUG_printf( "traverse_existing_edge\n" );
|
|
if( ed->count == 1 ) {
|
|
DEBUG_printf( "single value %s\n", ed->label );
|
|
if( 0 == strcmp( ed->label, key ) ) {
|
|
return trie_entry_update_existing( e, ed, key, value, parent_count );
|
|
} else {
|
|
return trie_entry_split_existing_edge( e, ed, key, value, prefix_len, parent_count );
|
|
}
|
|
}
|
|
|
|
char filename[512];
|
|
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
|
|
DEBUG_printf( "Traversing down %s (filename=%s) key=%s, remaining=%s, count=%d\n", ed->label, filename, key, &key[prefix_len], ed->count );
|
|
|
|
int result;
|
|
|
|
struct trie_entry* branch;
|
|
bool needs_freed = false;
|
|
if( ed->child_trie ) {
|
|
branch = ed->child_trie;
|
|
branch->filename = aformat("%s%s", e->filename, ed->label );
|
|
branch->prefix = aformat( "%s%s", e->prefix, ed->label );
|
|
} else {
|
|
branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
|
|
}
|
|
result = trie_entry_set( branch, &key[prefix_len], value, trie_entry_calculate_size( branch ) );
|
|
|
|
DEBUG_printf( "result=%d\n", result );
|
|
|
|
// update count
|
|
if( result == trie_entry_set_result_added_new ) {
|
|
DEBUG_printf( "Updated edge count +1\n" );
|
|
ed->count += 1;
|
|
} else if( result == trie_entry_set_result_deleted_existing ) {
|
|
DEBUG_printf( "Updated edge count -1\n" );
|
|
ed->count -= 1;
|
|
if( branch->edges.count == 1 ) {
|
|
char* new_label = aformat( "%s%s", ed->label, branch->edges.items[0].label );
|
|
free(ed->label); ed->label = new_label;
|
|
free(ed->value);
|
|
if( branch->edges.items[0].value ) {
|
|
ed->value = strdup(branch->edges.items[0].value);
|
|
} else {
|
|
ed->value = NULL;
|
|
}
|
|
|
|
if( ed->child_trie ) {
|
|
ed->child_trie->dirty = false;
|
|
trie_entry_free(ed->child_trie);
|
|
ed->child_trie = NULL;
|
|
} else {
|
|
trie_entry_save_to_file( e->file_root->filename, e->file_root );
|
|
printf( "deleting (2) %s\n", branch->filename );
|
|
remove( branch->filename );
|
|
}
|
|
}
|
|
}
|
|
|
|
if( needs_freed ) {
|
|
trie_entry_free(branch);
|
|
}
|
|
|
|
DEBUG_printf( "Traverse result = %d\n", result );
|
|
/*
|
|
if( parent_count > this_count ) {
|
|
if( result == trie_entry_set_result_updated_existing ) {
|
|
DEBUG_printf( "Fixing up parent count parent_count = %d, this_count = %d\n", parent_count, this_count );
|
|
return trie_entry_set_result_deleted_existing;
|
|
}
|
|
}
|
|
*/
|
|
return result;
|
|
}
|
|
|
|
static int trie_entry_set( struct trie_entry* e, const char* key, const char* value, int parent_count )
|
|
{
|
|
int prefix_len = 0;
|
|
struct edge* ed;
|
|
int key_length = strlen(key);
|
|
|
|
#ifdef DEBUG
|
|
/*
|
|
int this_count = trie_entry_calculate_size( e );
|
|
DEBUG_printf( "root this_count = %d, parent_count = %d\n", this_count, parent_count );
|
|
*/
|
|
#endif
|
|
|
|
DEBUG_printf( "e->prefix = \"%s\"\n", e->prefix );
|
|
DEBUG_printf( "key: %s\n", key );
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
ed = &e->edges.items[i];
|
|
|
|
prefix_len = prefix_match( key, ed->label );
|
|
//DEBUG_printf( "label[%d]: %s, prefix_len=%d\n", i, ed->label, prefix_len );
|
|
//DEBUG_fflush(stdout);
|
|
if( prefix_len == strlen( ed->label ) ) {
|
|
if( prefix_len == 0 ) {
|
|
if( key_length == 0 ) {
|
|
return trie_entry_traverse_existing_edge( e, ed, key, value, prefix_len, parent_count );
|
|
}
|
|
} else {
|
|
return trie_entry_traverse_existing_edge( e, ed, key, value, prefix_len, parent_count );
|
|
}
|
|
} else if( prefix_len != 0 ) {
|
|
return trie_entry_split_existing_edge( e, ed, key, value, prefix_len, parent_count );
|
|
}
|
|
}
|
|
|
|
return trie_entry_add_new_edge( e, key, value, parent_count );
|
|
}
|
|
|
|
static struct trie_entry* load_root_node( const char* filename )
|
|
{
|
|
struct trie_entry* root = malloc(sizeof(*root));
|
|
memset(root,0,sizeof(*root));
|
|
root->prefix = strdup("");
|
|
root->file_root = root;
|
|
root->root = root;
|
|
|
|
char buffer[512];
|
|
snprintf( buffer, sizeof(buffer), "%s/%%ROOT|", filename );
|
|
trie_entry_load_from_file( buffer, root );
|
|
|
|
return root;
|
|
}
|
|
|
|
bool ffdb_trie_set( const char* filename, const char* key, const char* value )
|
|
{
|
|
DEBUG_printf( "ffdb_trie_set( filename = '%s', key = '%s', value = '%s' )\n", filename, key, value );
|
|
|
|
struct trie_entry* root = NULL;
|
|
char* key_escaped = escape(key);
|
|
bool result = false;
|
|
|
|
mkdir( filename, 0755 );
|
|
char buffer[512];
|
|
snprintf( buffer,sizeof(buffer), "%s/.lock", filename );
|
|
int fd = open( buffer, O_CREAT );
|
|
flock( fd, LOCK_EX );
|
|
root = load_root_node(filename);
|
|
|
|
int this_count = 0;
|
|
for( int i = 0; i < root->edges.count; ++i ) {
|
|
this_count += root->edges.items[i].count;
|
|
}
|
|
|
|
int res = trie_entry_set( root, key_escaped, value, this_count );
|
|
if( !res ) {
|
|
printf( "Failed to set %s to %s\n", key, value );
|
|
goto failed;
|
|
}
|
|
|
|
result = true;
|
|
cleanup:
|
|
free(key_escaped);
|
|
trie_entry_free(root);
|
|
|
|
flock( fd, LOCK_UN );
|
|
close(fd);
|
|
printf( "\n" );
|
|
return result;
|
|
failed:
|
|
result = false;
|
|
goto cleanup;
|
|
}
|
|
|
|
static char* lookup_key( struct trie_entry* e, const char* key )
|
|
{
|
|
int key_length = strlen(key);
|
|
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
struct edge* ed = &e->edges.items[i];
|
|
|
|
int prefix_len = prefix_match( ed->label, key );
|
|
|
|
bool match = false;
|
|
//int label_length = strlen(ed->label);
|
|
if( prefix_len == strlen(ed->label ) ) {
|
|
if( prefix_len == 0 ) {
|
|
// terminal requires exact match
|
|
if( key_length == 0 ) {
|
|
match = true;
|
|
} else {
|
|
match = false;
|
|
}
|
|
} else {
|
|
match = true;
|
|
}
|
|
}
|
|
|
|
if( match ) {
|
|
const char* remaining = &key[prefix_len];
|
|
//printf( "match=%s, remaining=%s\n", ed->label, remaining );
|
|
|
|
if( ed->child_trie ) {
|
|
return lookup_key( ed->child_trie, remaining );
|
|
} else if( ed->count > 1 ) {
|
|
|
|
//branch->prefix = aformat( "%s%s", e->prefix, ed->label );
|
|
//DEBUG_printf( "e->filename=%s\n", e->filename );
|
|
//DEBUG_printf( "branch.prefix=%s\n", branch.prefix );
|
|
|
|
char filename[512];
|
|
snprintf( filename, sizeof(filename), "%s%s%s", e->root->filename, e->prefix, ed->label );
|
|
|
|
bool needs_freed = false;
|
|
struct trie_entry* branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
|
|
|
|
char* result = lookup_key( branch, remaining );
|
|
if( needs_freed ) {
|
|
trie_entry_free(branch);
|
|
}
|
|
return result;
|
|
} else {
|
|
if( 0 == strcmp( ed->label, key ) ) {
|
|
//printf( "ed->label=%s, result=%s\n", ed->label, ed->value );
|
|
return strdup(ed->value);
|
|
} else {
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
char* ffdb_trie_get( const char* filename, const char* key )
|
|
{
|
|
printf( "ffdb_trie_get( %s, %s )\n", filename, key );
|
|
struct trie_entry* root = NULL;
|
|
char* key_escaped = escape(key);
|
|
char* result = NULL;
|
|
|
|
root = load_root_node(filename);
|
|
if( !root ) { goto failed; }
|
|
|
|
result = lookup_key( root, key_escaped );
|
|
|
|
cleanup:
|
|
free(key_escaped);
|
|
trie_entry_free(root);
|
|
printf( "\n" );
|
|
return result;
|
|
failed:
|
|
free(result);
|
|
result = NULL;
|
|
goto cleanup;
|
|
}
|
|
bool ffdb_trie_remove( const char* filename, const char* key )
|
|
{
|
|
return ffdb_trie_set( filename, key, NULL );
|
|
}
|
|
int ffdb_trie_count( const char* filename )
|
|
{
|
|
struct trie_entry* root = NULL;
|
|
int result = 0;
|
|
|
|
root = load_root_node(filename);
|
|
if( !root ) { goto failed; }
|
|
|
|
for( int i = 0; i < root->edges.count; ++i ) {
|
|
result += root->edges.items[i].count;
|
|
}
|
|
cleanup:
|
|
trie_entry_free(root);
|
|
return result;
|
|
failed:
|
|
result = -1;
|
|
goto cleanup;
|
|
}
|
|
|
|
void ffdb_trie_clean( const char* filename )
|
|
{
|
|
struct trie_entry* root = NULL;
|
|
//int result = 0;
|
|
|
|
root = load_root_node(filename);
|
|
if( !root ) { goto failed; }
|
|
cleanup:
|
|
trie_entry_free(root);
|
|
return;
|
|
failed:
|
|
goto cleanup;
|
|
|
|
}
|
|
struct string_array {
|
|
char** items;
|
|
int count;
|
|
};
|
|
|
|
static void fixup_consolidated_trie( struct trie_entry* e )
|
|
{
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
struct edge* ed = &e->edges.items[i];
|
|
if( ed->child_trie ) {
|
|
ed->child_trie->file_root = e->file_root;
|
|
free(ed->child_trie->filename);
|
|
ed->child_trie->filename = strdup(e->file_root->filename);
|
|
fixup_consolidated_trie(ed->child_trie);
|
|
}
|
|
}
|
|
}
|
|
|
|
static struct trie_entry* trie_entry_load_and_try_consolidate( const char* filename, struct trie_entry* e, struct edge* ed, bool* needs_freed )
|
|
{
|
|
assert( e->file_root );
|
|
assert( e->file_root->filename );
|
|
assert( e->root );
|
|
assert( e->prefix );
|
|
|
|
size_t s = trie_entry_measure( e->file_root );
|
|
DEBUG_printf( "size=%lu, filename=%s\n", s, e->file_root->filename );
|
|
|
|
if( s < 4096 ) {
|
|
DEBUG_printf( "trie entry %s is %ld bytes, and has file child (%s), candidate for merge\n", e->file_root->filename, s, filename );
|
|
|
|
ed->child_trie = malloc(sizeof(struct trie_entry));
|
|
memset(ed->child_trie,0,sizeof(struct trie_entry));
|
|
|
|
ed->child_trie->prefix = aformat( "%s%s", e->prefix, ed->label );
|
|
ed->child_trie->root = e->root;
|
|
|
|
trie_entry_load_from_file( filename, ed->child_trie );
|
|
ed->child_trie->file_root = e->file_root;
|
|
fixup_consolidated_trie( ed->child_trie );
|
|
|
|
ed->count = 0;
|
|
for( int i = 0; i < ed->child_trie->edges.count; ++i ) {
|
|
ed->count += ed->child_trie->edges.items[i].count;
|
|
}
|
|
|
|
trie_entry_save_to_file( e->file_root->filename, e->file_root );
|
|
|
|
DEBUG_printf( "deleting (1) %s\n", filename );
|
|
remove(filename);
|
|
|
|
*needs_freed = false;
|
|
assert( ed->child_trie );
|
|
return ed->child_trie;
|
|
|
|
} else {
|
|
//DEBUG_printf( "Loading trie at %s\n", filename );
|
|
|
|
// Load items from the branch
|
|
struct trie_entry* branch;
|
|
branch = malloc(sizeof(*branch));
|
|
memset(branch,0,sizeof(*branch));
|
|
branch->prefix = aformat( "%s%s", e->prefix, ed->label );
|
|
branch->root = e->root;
|
|
|
|
trie_entry_load_from_file( filename, branch );
|
|
assert( branch );
|
|
|
|
*needs_freed = true;
|
|
return branch;
|
|
}
|
|
}
|
|
|
|
static void load_items( struct trie_entry* e, int offset, int limit, struct string_array* keys, struct string_array* values )
|
|
{
|
|
assert( e );
|
|
assert( e->prefix );
|
|
assert( e->edges.count == 0 || e->edges.items );
|
|
assert( keys || values );
|
|
|
|
if( limit <= 0 ) { return; }
|
|
|
|
DEBUG_printf( "Looking at edges in '%s'\n", e->prefix );
|
|
for( int i = 0; i < e->edges.count; ++i ) {
|
|
struct edge* ed = &e->edges.items[i];
|
|
DEBUG_printf( "ed = { .label = '%s', .value = '%s', .count = %d }\n", ed->label, ed->value, ed->count );
|
|
if( ed->count <= offset ) {
|
|
DEBUG_printf( "Skipping all for '%s' offset=%d\n", e->prefix, offset );
|
|
// branch - skip all
|
|
offset -= ed->count;
|
|
} else if( ed->count > 1 ) {
|
|
// branch - skip part, include part
|
|
int branch_count = ed->count - offset;
|
|
DEBUG_printf( "Skipping part of '%s', offset=%d, branch_count=%d, limit=%d\n", e->prefix, offset, branch_count, limit );
|
|
if( branch_count > limit ) {
|
|
branch_count = limit;
|
|
}
|
|
|
|
if( ed->child_trie ) {
|
|
char filename[512];
|
|
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
|
|
|
|
DEBUG_printf( "Traversing down child trie, effective filename=%s\n", filename );
|
|
ed->child_trie->filename = strdup(filename);
|
|
ed->child_trie->prefix = aformat( "%s%s", e->prefix, ed->label );
|
|
load_items( ed->child_trie, offset, branch_count, keys, values );
|
|
|
|
offset = 0;
|
|
limit -= branch_count;
|
|
} else {
|
|
char filename[512];
|
|
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
|
|
|
|
bool needs_freed = false;
|
|
struct trie_entry* branch = trie_entry_load_and_try_consolidate( filename, e, ed, &needs_freed );
|
|
|
|
load_items( branch, offset, branch_count, keys, values );
|
|
|
|
if( needs_freed ) {
|
|
trie_entry_free(branch);
|
|
}
|
|
|
|
offset = 0;
|
|
limit -= branch_count;
|
|
}
|
|
} else if( limit > 0 ) {
|
|
// leaf - include
|
|
char* key = aformat( "%s%s", e->prefix, ed->label );
|
|
DEBUG_printf( "Loading item %s=%s from '%s', offset=%d\n", key, ed->value, e->prefix, offset );
|
|
if( keys ) {
|
|
array_append( keys, sizeof(key), &key );
|
|
} else {
|
|
free(key);
|
|
}
|
|
if( values ) {
|
|
char* str = strdup( ed->value );
|
|
array_append( values, sizeof(str), &str );
|
|
}
|
|
limit -= 1;
|
|
}
|
|
|
|
if( limit == 0 ) {
|
|
break;
|
|
}
|
|
}
|
|
DEBUG_printf( "Out of edges in '%s'\n", e->prefix );
|
|
}
|
|
|
|
int ffdb_trie_index_for_key( const char* filename, const char* key )
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
void ffdb_trie_list( const char* filename, int offset, int limit, void* keys_ptr, void* values_ptr )
|
|
{
|
|
DEBUG_printf( "ffdb_trie_list( %s, %d, %d, %p, %p )\n", filename, offset, limit, keys_ptr, values_ptr );
|
|
|
|
struct trie_entry* root = load_root_node( filename );
|
|
root->prefix = strdup("");
|
|
load_items( root, offset, limit, keys_ptr, values_ptr );
|
|
|
|
printf( "---- root->filename = %s\n", root->filename );
|
|
trie_entry_free(root);
|
|
printf( "---- root released\n\n" );
|
|
}
|
|
bool ffdb_trie_get_index( const char* filename, int offset, char** key, char** value )
|
|
{
|
|
DEBUG_printf( "ffdb_trie_get_index( %s, %d, %p, %p )\n", filename, offset, key, value );
|
|
bool result = false;
|
|
|
|
/// Convenience function to get single item at offset
|
|
struct string_array keys;
|
|
struct string_array values;
|
|
memset(&keys,0,sizeof(keys));
|
|
memset(&values,0,sizeof(values));
|
|
|
|
ffdb_trie_list( filename, offset, 1, &keys, value ? &values : NULL );
|
|
if( keys.count == 0 ) {
|
|
result = false;
|
|
} else {
|
|
result = true;
|
|
|
|
if( key ) {
|
|
*key = keys.items[0];
|
|
} else {
|
|
free( keys.items[0] );
|
|
}
|
|
if( value ) {
|
|
*value = values.items[0];
|
|
} else {
|
|
free( values.items[0] );
|
|
}
|
|
}
|
|
|
|
free(keys.items);
|
|
free(values.items);
|
|
|
|
printf( "\n" );
|
|
return result;
|
|
}
|