You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ffdb/trie.c

714 lines
18 KiB
C

#include "trie.h"
#include "json/json.h"
#include "collections/array.h"
#include "util/format.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/file.h>
//#define DEBUG
#ifdef DEBUG
#define DEBUG_printf printf
#define DEBUG_fflush fflush
#else
#define DEBUG_printf(...)
#define DEBUG_fflush(...)
#endif
enum {
tt_value = 1,
tt_inline = 2,
tt_extern = 3,
};
struct trie_entry;
struct edge
{
char* label;
char* value;
int count;
struct trie_entry* child_trie;
};
struct trie_entry
{
struct {
struct edge* items;
int count;
} edges;
char* filename;
char* prefix;
};
static void trie_entry_free_composite( struct trie_entry* e )
{
for( int i = 0; i < e->edges.count; ++i ) {
free(e->edges.items[i].label);
free(e->edges.items[i].value);
}
free(e->edges.items);
free(e->filename);
free(e->prefix);
}
static void trie_entry_free( struct trie_entry* e )
{
if( !e ) { return; }
trie_entry_free_composite(e);
free(e);
}
static bool trie_entry_load( FILE* f, struct trie_entry* e )
{
struct json_pull_parser jpp = {
.f = f,
.curr_state = jpp_initial_state,
};
if( !jpp.f ) { return false; }
int save;
if( !json_pull_parser_begin_object(&jpp,&save) ) { goto failed; }
e->edges.count = 0;
char* edge_label = NULL;
while(( edge_label = json_pull_parser_read_object_key(&jpp) )) {
e->edges.count += 1;
e->edges.items = realloc( e->edges.items, sizeof(struct edge) * e->edges.count );
struct edge* ed = &e->edges.items[ e->edges.count-1 ];
memset(ed,0,sizeof(*ed));
ed->label = edge_label;
if( json_pull_parser_read_int( &jpp, &ed->count ) ) {
continue;
}
char* value = json_pull_parser_read_string(&jpp);
if( value ) {
ed->value = value;
ed->count = 1;
continue;
}
struct trie_entry* child = NULL;
child = malloc(sizeof(*child));
memset(child,0,sizeof(*child));
if( trie_entry_load( f, child ) ) {
ed->child_trie = child;
ed->count = 0;
for( int i = 0; i < child->edges.count; ++i ) {
ed->count += child->edges.items[i].count;
}
continue;
}
free(child);
goto failed;
}
if( !json_pull_parser_end_object(&jpp,&save) ) { goto failed; }
return true;
failed:
return false;
}
static bool trie_entry_load_from_file( const char* filename, struct trie_entry* e )
{
e->filename = strdup(filename);
FILE* f = fopen( filename, "r" );
if( !f ) { return false; }
bool result = trie_entry_load( f, e );
fclose(f);
return result;
}
static void trie_entry_save( FILE* f, struct trie_entry* e )
{
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
if( ed->count == 1 && !ed->value ) {
// Somehow the count on an edge was lower than the actual number of
// children. Don't save with invalid edge (implicit += 1 to edge count)
return;
}
}
// Selection Sort by edge
for( int i = 0; i < e->edges.count ; ++i ) {
int lowest = i;
const char* lowest_label = e->edges.items[i].label;
for( int j = i + 1; j < e->edges.count; ++j ) {
if( strcmp( lowest_label, e->edges.items[j].label ) < 0 ) {
lowest = j;
lowest_label = e->edges.items[j].label;
}
}
if( lowest != i ) {
struct edge tmp;
memcpy( &tmp, &e->edges.items[i], sizeof(tmp) );
memcpy( &e->edges.items[i], &e->edges.items[lowest], sizeof(tmp) );
memcpy( &e->edges.items[lowest], &tmp, sizeof(tmp) );
}
}
fprintf( f, "{" );
for( int i = 0; i < e->edges.count; ++i ) {
if( i != 0 ) {
fprintf( f, ",\n\t" );
} else {
fprintf( f, "\n\t" );
}
struct edge* ed = &e->edges.items[i];
json_write_string( f, ed->label );
fprintf( f, ": " );
if( ed->count > 1 ) {
fprintf( f, "%d", ed->count );
} else {
assert( ed->value );
json_write_string( f, ed->value );
}
}
fprintf( f, "\n}\n" );
}
static bool trie_entry_save_to_file( const char* filename, struct trie_entry* e )
{
char tmp_filename[512];
snprintf( tmp_filename, sizeof(tmp_filename), "%s.tmp", filename );
FILE* f = fopen(tmp_filename,"w");
if( !f ) { return false; }
trie_entry_save( f, e );
fclose(f);
rename( tmp_filename, filename );
return true;
}
// Escape '/' as "%|" and '%' as "%%"
static char* escape( const char* str )
{
int size = 0;
for( const char* i = str; *i; ++i ) {
switch( *i ) {
case '/': size += 2; break;
case '%': size += 2; break;
default: size += 1; break;
}
}
char* result = malloc(size+1);
char* o = result;
for( const char* i = str; *i; ++i ) {
switch( *i ) {
case '/':
o[0] = '%';
o[1] = '|';
o += 2;
break;
case '%':
o[0] = '%';
o[1] = '%';
o += 2;
break;
default:
*o = *i;
++o;
}
}
*o = '\0';
return result;
}
static int prefix_match( const char* a, const char* b )
{
int res = 0;
while( *a && *b && *a == *b ) {
a += 1;
b += 1;
res += 1;
}
return res;
}
static int trie_entry_calculate_size( struct trie_entry* e )
{
int this_count = 0;
struct edge* ed = NULL;
for( int i = 0; i < e->edges.count; ++i ) {
ed = &e->edges.items[i];
this_count += ed->count;
}
return this_count;
}
enum {
trie_entry_set_result_failed_to_add = 0,
trie_entry_set_result_updated_existing = 1,
trie_entry_set_result_added_new = 2,
trie_entry_set_result_deleted_existing = 3,
trie_entry_set_result_no_change = 4,
};
static int trie_entry_set( struct trie_entry* e, const char* key, const char* value, int parent_count );
static int trie_entry_add_new_edge( struct trie_entry* e, const char* key, const char* value, int parent_count )
{
if( !value ) {
DEBUG_printf( "value is null, not adding\n" );
return trie_entry_set_result_no_change;
}
DEBUG_printf( "add_new_edge %s\n", key );
e->edges.count += 1;
e->edges.items = realloc( e->edges.items, sizeof(struct edge) * e->edges.count );
struct edge* ed2 = &e->edges.items[e->edges.count-1];
ed2->label = strdup(key);
ed2->value = strdup(value);
ed2->count = 1;
trie_entry_save_to_file( e->filename, e );
return trie_entry_set_result_added_new;
}
static int trie_entry_delete_existing( struct trie_entry* e, struct edge* ed, const char* key, int parent_count )
{
int i = ed - e->edges.items;
if( i != e->edges.count - 1 ) {
struct edge tmp;
memcpy( &tmp, ed, sizeof(*ed) );
memcpy( ed, &e->edges.items[ e->edges.count - 1 ], sizeof(*ed) );
free(tmp.label);
}
e->edges.count -= 1;
trie_entry_save_to_file( e->filename, e );
return trie_entry_set_result_deleted_existing;
}
static int trie_entry_update_existing( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int parent_count )
{
DEBUG_printf( "updating existing value\n" );
free(ed->value);
if( value == NULL ) {
return trie_entry_delete_existing( e, ed, key, parent_count );
}
ed->value = strdup(value);
trie_entry_save_to_file( e->filename, e );
/*
if( this_count > parent_count ) {
DEBUG_printf( "size mismatch detected. TODO: correct\n" );
}
*/
return trie_entry_set_result_updated_existing;
}
static int trie_entry_split_existing_edge( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int prefix_len, int parent_count )
{
DEBUG_printf( "split_existing_edge\n" );
if( !value ) {
return trie_entry_set_result_no_change;
}
{
char* new_prefix = strndup( key, prefix_len );
char* new_filename = malloc(strlen(e->filename)+prefix_len+1);
strcpy(new_filename,e->filename);
strcat(new_filename,new_prefix);
// Create the new node
struct trie_entry* new_e = malloc(sizeof(struct trie_entry));
memset(new_e,0,sizeof(*new_e));
new_e->filename = new_filename;
new_e->prefix = aformat( "%s%s", e->prefix, new_prefix );
new_e->edges.items = malloc( sizeof(struct edge) * 2 );
memset( new_e->edges.items, 0, sizeof(struct edge) * 2 );
new_e->edges.items[0].label = strdup(&ed->label[prefix_len]);
if( ed->count == 1 ) {
new_e->edges.items[0].value = ed->value;
}
new_e->edges.items[0].count = ed->count;
new_e->edges.items[1].label = strdup(&key[prefix_len]);
new_e->edges.items[1].value = strdup(value);
new_e->edges.items[1].count = 1;
new_e->edges.count = 2;
// Save to file
trie_entry_save_to_file( new_e->filename, new_e );
// Update the existing edge
free(ed->label);
ed->label = new_prefix;
ed->value = NULL;
ed->count = new_e->edges.items[0].count + new_e->edges.items[1].count;
trie_entry_free(new_e);
trie_entry_save_to_file( e->filename, e );
}
return trie_entry_set_result_added_new;
}
static int trie_entry_traverse_existing_edge( struct trie_entry* e, struct edge* ed, const char* key, const char* value, int prefix_len, int parent_count )
{
DEBUG_printf( "traverse_existing_edge\n" );
if( ed->count == 1 ) {
DEBUG_printf( "single value %s\n", ed->label );
if( 0 == strcmp( ed->label, key ) ) {
return trie_entry_update_existing( e, ed, key, value, parent_count );
} else {
return trie_entry_split_existing_edge( e, ed, key, value, prefix_len, parent_count );
}
}
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
DEBUG_printf( "Traversing down %s (filename=%s) key=%s, remaining=%s, count=%d\n", ed->label, filename, key, &key[prefix_len], ed->count );
struct trie_entry branch;
memset(&branch,0,sizeof(branch));
trie_entry_load_from_file( filename, &branch );
int result = trie_entry_set( &branch, &key[prefix_len], value, trie_entry_calculate_size( &branch ) );
DEBUG_printf( "result=%d\n", result );
// update count
if( result == trie_entry_set_result_added_new ) {
DEBUG_printf( "Updated edge count +1\n" );
ed->count += 1;
trie_entry_save_to_file( e->filename, e );
} else if( result == trie_entry_set_result_deleted_existing ) {
DEBUG_printf( "Updated edge count -1\n" );
ed->count -= 1;
if( branch.edges.count == 1 ) {
char* new_label = aformat( "%s%s", ed->label, branch.edges.items[0].label );
free(ed->label); ed->label = new_label;
free(ed->value);
if( branch.edges.items[0].value ) {
ed->value = strdup(branch.edges.items[0].value);
} else {
ed->value = NULL;
}
remove( branch.filename );
}
trie_entry_save_to_file( e->filename, e );
}
trie_entry_free_composite( &branch );
DEBUG_printf( "Traverse result = %d\n", result );
/*
if( parent_count > this_count ) {
if( result == trie_entry_set_result_updated_existing ) {
DEBUG_printf( "Fixing up parent count parent_count = %d, this_count = %d\n", parent_count, this_count );
return trie_entry_set_result_deleted_existing;
}
}
*/
return result;
}
static int trie_entry_set( struct trie_entry* e, const char* key, const char* value, int parent_count )
{
int prefix_len = 0;
struct edge* ed;
int key_length = strlen(key);
#ifdef DEBUG
int this_count = trie_entry_calculate_size( e );
DEBUG_printf( "root this_count = %d, parent_count = %d\n", this_count, parent_count );
#endif
DEBUG_printf( "key: %s\n", key );
for( int i = 0; i < e->edges.count; ++i ) {
ed = &e->edges.items[i];
prefix_len = prefix_match( key, ed->label );
DEBUG_printf( "label[%d]: %s, prefix_len=%d\n", i, ed->label, prefix_len );
DEBUG_fflush(stdout);
if( prefix_len == strlen( ed->label ) ) {
if( prefix_len == 0 ) {
if( key_length == 0 ) {
return trie_entry_traverse_existing_edge( e, ed, key, value, prefix_len, parent_count );
}
} else {
return trie_entry_traverse_existing_edge( e, ed, key, value, prefix_len, parent_count );
}
} else if( prefix_len != 0 ) {
return trie_entry_split_existing_edge( e, ed, key, value, prefix_len, parent_count );
}
}
return trie_entry_add_new_edge( e, key, value, parent_count );
}
static struct trie_entry* load_root_node( const char* filename )
{
struct trie_entry* root = malloc(sizeof(*root));
memset(root,0,sizeof(*root));
char buffer[512];
snprintf( buffer, sizeof(buffer), "%s/%%ROOT|", filename );
trie_entry_load_from_file( buffer, root );
return root;
}
bool ffdb_trie_set( const char* filename, const char* key, const char* value )
{
DEBUG_printf( "ffdb_trie_set( filename = '%s', key = '%s', value = '%s' )\n", filename, key, value );
struct trie_entry* root = NULL;
char* key_escaped = escape(key);
bool result = false;
mkdir( filename, 0755 );
char buffer[512];
snprintf( buffer,sizeof(buffer), "%s/.lock", filename );
int fd = open( buffer, O_CREAT );
flock( fd, LOCK_EX );
root = load_root_node(filename);
int this_count = 0;
for( int i = 0; i < root->edges.count; ++i ) {
this_count += root->edges.items[i].count;
}
int res = trie_entry_set( root, key_escaped, value, this_count );
if( !res ) {
printf( "Failed to set %s to %s\n", key, value );
goto failed;
}
if( res != trie_entry_set_result_updated_existing ) {
trie_entry_save_to_file( filename, root );
}
result = true;
cleanup:
free(key_escaped);
trie_entry_free(root);
flock( fd, LOCK_UN );
close(fd);
return result;
failed:
result = false;
goto cleanup;
}
static char* lookup_key( struct trie_entry* e, const char* key )
{
int key_length = strlen(key);
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
int prefix_len = prefix_match( ed->label, key );
bool match = false;
//int label_length = strlen(ed->label);
if( prefix_len == strlen(ed->label ) ) {
if( prefix_len == 0 ) {
// terminal requires exact match
if( key_length == 0 ) {
match = true;
} else {
match = false;
}
} else {
match = true;
}
}
if( match ) {
const char* remaining = &key[prefix_len];
//printf( "match=%s, remaining=%s\n", ed->label, remaining );
if( ed->count > 1 ) {
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
struct trie_entry branch;
memset(&branch,0,sizeof(branch));
trie_entry_load_from_file( filename, &branch );
char* result = lookup_key( &branch, remaining );
trie_entry_free_composite(&branch);
return result;
} else {
if( 0 == strcmp( ed->label, key ) ) {
//printf( "ed->label=%s, result=%s\n", ed->label, ed->value );
return strdup(ed->value);
} else {
return NULL;
}
}
}
}
return NULL;
}
char* ffdb_trie_get( const char* filename, const char* key )
{
struct trie_entry* root = NULL;
char* key_escaped = escape(key);
char* result = NULL;
root = load_root_node(filename);
if( !root ) { goto failed; }
result = lookup_key( root, key_escaped );
cleanup:
free(key_escaped);
trie_entry_free(root);
return result;
failed:
free(result);
result = NULL;
goto cleanup;
}
bool ffdb_trie_remove( const char* filename, const char* key )
{
return ffdb_trie_set( filename, key, NULL );
}
int ffdb_trie_count( const char* filename )
{
struct trie_entry* root = NULL;
int result = 0;
root = load_root_node(filename);
if( !root ) { goto failed; }
for( int i = 0; i < root->edges.count; ++i ) {
result += root->edges.items[i].count;
}
cleanup:
trie_entry_free(root);
return result;
failed:
result = -1;
goto cleanup;
}
void ffdb_trie_clean( const char* filename )
{
struct trie_entry* root = NULL;
//int result = 0;
root = load_root_node(filename);
if( !root ) { goto failed; }
cleanup:
trie_entry_free(root);
return;
failed:
goto cleanup;
}
struct string_array {
char** items;
int count;
};
static void load_items( struct trie_entry* e, int offset, int limit, struct string_array* keys, struct string_array* values )
{
if( limit <= 0 ) { return; }
DEBUG_printf( "Looking at edges in '%s'\n", e->prefix );
for( int i = 0; i < e->edges.count; ++i ) {
struct edge* ed = &e->edges.items[i];
DEBUG_printf( "ed = { .label = '%s', .value = '%s', .count = %d }\n", ed->label, ed->value, ed->count );
if( ed->count <= offset ) {
DEBUG_printf( "Skipping all for '%s' offset=%d\n", e->prefix, offset );
// branch - skip all
offset -= ed->count;
} else if( ed->count > 1 ) {
// branch - skip part, include part
int branch_count = ed->count - offset;
DEBUG_printf( "Skipping part of '%s', offset=%d, branch_count=%d, limit=%d\n", e->prefix, offset, branch_count, limit );
if( branch_count > limit ) {
branch_count = limit;
}
char filename[512];
snprintf( filename, sizeof(filename), "%s%s", e->filename, ed->label );
// Load items from the branch
struct trie_entry branch;
memset(&branch,0,sizeof(branch));
trie_entry_load_from_file( filename, &branch );
branch.prefix = aformat( "%s%s", e->prefix, ed->label );
load_items( &branch, offset, branch_count, keys, values );
trie_entry_free_composite( &branch );
offset = 0;
limit -= branch_count;
} else if( limit > 0 ) {
// leaf - include
DEBUG_printf( "Loading from '%s', offset=%d\n", e->prefix, offset );
if( keys ) {
char* str = aformat( "%s%s", e->prefix, ed->label );
array_append( keys, sizeof(str), &str );
}
if( values ) {
char* str = strdup( ed->value );
array_append( values, sizeof(str), &str );
}
limit -= 1;
}
}
DEBUG_printf( "Out of edges in '%s'\n", e->prefix );
}
int ffbd_trie_index_for_key( const char* filename, const char* key )
{
return -1;
}
void ffdb_trie_list( const char* filename, int offset, int limit, void* keys_ptr, void* values_ptr )
{
struct trie_entry* root = load_root_node( filename );
root->prefix = strdup("");
load_items( root, offset, limit, keys_ptr, values_ptr );
trie_entry_free(root);
}
bool ffdb_trie_get_index( const char* filename, int offset, char** key, char** value )
{
bool result = false;
/// Convenience function to get single item at offset
struct string_array keys;
struct string_array values;
memset(&keys,0,sizeof(keys));
memset(&values,0,sizeof(values));
ffdb_trie_list( filename, offset, 1, &keys, value ? &values : NULL );
if( keys.count == 0 ) {
result = false;
} else {
result = true;
if( key ) {
*key = keys.items[0];
} else {
free( keys.items[0] );
}
if( value ) {
*value = values.items[0];
} else {
free( values.items[0] );
}
}
free(keys.items);
free(values.items);
return result;
}