From 44f99657a0653338d77ac070e62b5735552307a8 Mon Sep 17 00:00:00 2001 From: teknomunk Date: Mon, 30 Oct 2023 21:17:42 -0500 Subject: [PATCH] Fix several SEGV faults, add support for custom HTTP User-Agent when fetching, change argument handling, show process name with starting subprocesses, add garbage collection for statuses by uri index --- src/controller/api/client_apps.c | 3 ++ src/ffdb | 2 +- src/http | 2 +- src/model/activity.c | 12 +++++--- src/model/fetch.c | 6 ++-- src/model/server.c | 48 ++++++++++++++++++++------------ src/model/server.h | 1 + src/model/status.c | 40 ++++++++++++++++++++++++++ src/process.c | 38 ++++++++++++++----------- src/process.h | 2 ++ 10 files changed, 111 insertions(+), 43 deletions(-) diff --git a/src/controller/api/client_apps.c b/src/controller/api/client_apps.c index 5c0198b..0c67a53 100644 --- a/src/controller/api/client_apps.c +++ b/src/controller/api/client_apps.c @@ -84,7 +84,10 @@ bool check_bearer_token( const char* auth_token ) char* client_id = strndup( &auth_token[7], 32 ); struct client_app* app = client_app_from_id( client_id ); free(client_id); + if( !app ) { return false; } + if( !app->access_token ) { return false; } + if( 0 != strcmp( &auth_token[7], app->access_token ) ) { client_app_free(app); return false; diff --git a/src/ffdb b/src/ffdb index 45f1653..c57a8bb 160000 --- a/src/ffdb +++ b/src/ffdb @@ -1 +1 @@ -Subproject commit 45f1653cd4243ca46a36105f3528b95968a087d5 +Subproject commit c57a8bb19fedfb717ac0ae498b2e121f96d3507f diff --git a/src/http b/src/http index 75ec846..d6ef4a6 160000 --- a/src/http +++ b/src/http @@ -1 +1 @@ -Subproject commit 75ec84638952c7183df4e93934fcb30c49094e55 +Subproject commit d6ef4a67ba6c5eb2b1101723cf86286d7aa1602e diff --git a/src/model/activity.c b/src/model/activity.c index 4fcf879..953cd06 100644 --- a/src/model/activity.c +++ b/src/model/activity.c @@ -247,13 +247,17 @@ struct ap_object* activity_create_Note( struct status* s ) act->content.content = strdup(status_render_source(s,g_server->domain)); if( s->in_reply_to ) { struct status* s_in_reply_to = status_from_id( s->in_reply_to ); - act->in_reply_to = strdup( s_in_reply_to->url ); - status_free(s_in_reply_to); + if( s_in_reply_to ) { + act->in_reply_to = strdup( s_in_reply_to->url ); + status_free(s_in_reply_to); + } } if( s->quote_id ) { struct status* s_quote_post = status_from_id( s->quote_id ); - act->quote_url = strdup( s_quote_post->url ); - status_free(s_quote_post); + if( s_quote_post ) { + act->quote_url = strdup( s_quote_post->url ); + status_free(s_quote_post); + } } // Set the replies diff --git a/src/model/fetch.c b/src/model/fetch.c index b256235..46d8f8c 100644 --- a/src/model/fetch.c +++ b/src/model/fetch.c @@ -73,7 +73,7 @@ size_t fetch_handle_header( char* header, size_t size, size_t nitems, void* user static bool do_fetch_tor( const char* uri, struct fetch_data* fd, const char* result_filename ) { char user_agent[512]; - snprintf( user_agent, sizeof(user_agent), "User-Agent: Apogee/0.1; +https://%s/owner/actor", g_server->domain ); + snprintf( user_agent, sizeof(user_agent), "User-Agent: %s; +https://%s/owner/actor", g_server->user_agent, g_server->domain ); char proxy[512]; snprintf( proxy,512, "socks5h://localhost:%d", g_server->tor_socks_port ); @@ -108,7 +108,7 @@ static bool do_fetch_tor( const char* uri, struct fetch_data* fd, const char* re static bool do_fetch_clearnet( const char* uri, struct fetch_data* fd, const char* result_filename ) { char user_agent[512]; - snprintf( user_agent, sizeof(user_agent), "User-Agent: curl (Apogee/0.1; +https://%s/owner/actor)", g_server->domain ); + snprintf( user_agent, sizeof(user_agent), "User-Agent: %s; +https://%s/owner/actor", g_server->user_agent, g_server->domain ); FILE* result = fopen( result_filename, "w" ); if( !result ) { return false; } @@ -141,7 +141,7 @@ static bool do_fetch_signed_clearnet( const char* uri, struct fetch_data* fd, co if( !result ) { return false; } char user_agent[512]; - snprintf( user_agent, sizeof(user_agent), "User-Agent: curl (Apogee/0.1; +https://%s/owner/actor)", g_server->domain ); + snprintf( user_agent, sizeof(user_agent), "User-Agent: %s; +https://%s/owner/actor", g_server->user_agent, g_server->domain ); // Load crypto keys struct crypto_keys* keys = crypto_keys_new(); diff --git a/src/model/server.c b/src/model/server.c index 646fe42..12d168a 100644 --- a/src/model/server.c +++ b/src/model/server.c @@ -3,6 +3,7 @@ #include "json/layout.h" #include "controller/cli.h" +#include "process.h" #include #include @@ -11,6 +12,7 @@ #define OBJ_TYPE struct app_args static struct json_object_field app_args_layout[] = { JSON_FIELD_STRING( domain, true ), + JSON_FIELD_STRING( user_agent, false ), { .key = "addr", .offset = offsetof( OBJ_TYPE, http_settings.bind_address ), @@ -61,6 +63,7 @@ struct app_args* app_args_new( int argc, char** argv ) args->tor_socks_port = 9123; args->section = -1; args->outbox_discard_limit = 5; + args->user_agent = strdup( "Apogee/0.1" ); json_read_object_layout_from_file( "data/server.json", app_args_layout, args ); @@ -75,26 +78,34 @@ struct app_args* app_args_new( int argc, char** argv ) for( int i = 1; i < argc; ++i ) { const char* arg = argv[i]; - if( sscanf(arg,"--section=%d",&args->section) ) { - } else if( 0 == strcmp(argv[i],"--debug") ) { - args->debug = true; - } else if( 0 == strcmp(argv[i],"--test") ) { - args->section = 3; - } else if( 0 == strcmp(argv[i],"--webserver") ) { - args->section = 0; - } else if( 0 == strcmp(argv[i],"--inbox") ) { - args->section = 1; - } else if( 0 == strcmp(argv[i],"--outbox") ) { - args->section = 2; - } else if( 0 == strcmp(argv[i],"--reindex") ) { - args->section = 3; - } else if( 0 == strcmp(argv[i],"--devel") ) { + // Debug flag + if( 0 == strcmp(argv[i],"--debug") ) { args->debug = true; goto next_arg; } + + // Sections by number + if( sscanf(arg,"--section=%d",&args->section) ) { goto next_arg; } + + // Sections by name + if( ( argv[i][0] == '-' ) && ( argv[i][1] == '-' ) ) { + for( int i = 0; i <= process_get_max_section(); ++i ) { + if( strcmp( &argv[i][2], process_get_section_name(i) ) ) { + args->section = i; + goto next_arg; + } + } + } + + // Development + if( 0 == strcmp(argv[i],"--devel") ) { args->section = 100; - } else { - printf( "Unknown argument: %s\n", argv[i] ); - free(args); - return NULL; + goto next_arg; } + + // Unknown argument + printf( "Unknown argument: %s\n", argv[i] ); + free(args); + return NULL; + + next_arg:; } return args; @@ -105,6 +116,7 @@ void app_args_release( struct app_args* args ) free(args->http_settings.bind_address); free(args->domain); free(args->tor_hidden_service); + free(args->user_agent); free(args); } diff --git a/src/model/server.h b/src/model/server.h index a4a1684..e227d79 100644 --- a/src/model/server.h +++ b/src/model/server.h @@ -12,6 +12,7 @@ struct app_args //char* addr; char* domain; char* tor_hidden_service; + char* user_agent; bool debug; int section; bool disable_tor; diff --git a/src/model/status.c b/src/model/status.c index 5a07102..92d6ca3 100644 --- a/src/model/status.c +++ b/src/model/status.c @@ -30,6 +30,9 @@ #include #include #include +#include + +#define STATUSES_BY_URI "data/statuses/by-uri" extern struct json_enum visibility_enum[]; @@ -233,6 +236,9 @@ struct status* status_from_uri( const char* uri ) struct status* s = status_from_id(atoi(id_str)); if( s ) { return s; + } else { + // Status has been deleted, remove the index entry + ffdb_trie_remove( "data/statuses/by-uri", uri ); } free(id_str); } @@ -451,6 +457,7 @@ bool status_sync_from_activity_pub( struct status* s, struct ap_object* act ) s->poll = poll; } + s->poll->expires_at = act->closed; sync_poll( s, &act->one_of ); } else if( act->any_of.count > 0 ) { if( !s->poll ) { @@ -464,6 +471,7 @@ bool status_sync_from_activity_pub( struct status* s, struct ap_object* act ) s->poll = poll; } + s->poll->expires_at = act->closed; sync_poll( s, &act->any_of ); } @@ -841,6 +849,7 @@ void status_delete( struct status* s ) char filename[512]; ffdb_trie_remove( "data/statuses/stubs", format(filename,512,"%d",s->id) ); + ffdb_trie_remove( "data/statuses/by-uri", s->url ); get_status_data_filename(s->id,filename,sizeof(filename)); remove(filename); @@ -1222,6 +1231,37 @@ void status_get_bookmarks( int offset, int limit, void* results_ptr ) void status_gc() { + int count = ffdb_trie_count( STATUSES_BY_URI ); + enum { + per_pass = 20, + }; + + struct { + char** items; + int count; + } keys; + + printf( "Checking and repairing uri-to-status index...\n" ); + int pages = ( count + per_pass - 1 ) / per_pass; + for( int i = 0; i < pages; ++i ) { + printf( "page %d of %d\n", i, pages ); + memset( &keys, 0, sizeof(keys) ); + + ffdb_trie_list( STATUSES_BY_URI, i * per_pass, per_pass, &keys, NULL ); + + // Check and repair index + //printf( "keys.count = %d\n", keys.count ); + for( int j = 0; j < keys.count; ++j ) { + //printf( "processing %s\n", keys.items[j] ); + status_free( status_from_uri( keys.items[j] ) ); + } + + free( keys.items ); + + //sleep(1); + } + printf( "done.\n" ); + /* printf( "Sweeping legacy uri->status_id index\n" ); // Sweep legacy url to status id index diff --git a/src/process.c b/src/process.c index 4c461aa..abb8367 100644 --- a/src/process.c +++ b/src/process.c @@ -40,6 +40,24 @@ struct { extern bool terminate; +const char* process_get_section_name( int section ) +{ + switch( section ) { + case 0: return "webserver"; + case 1: return "inbox"; + case 2: return "outbox"; + case 3: return "test"; + case 4: return "reindex"; + case 5: return "fetch"; + case 6: return "tor"; + } + return NULL; +} +int process_get_max_section() +{ + return 6; +} + static void handle_request( struct http_request* req, void* ) { //printf( "Handling request from %s\n", http_request_get_remote_host_address( req ) ); @@ -76,7 +94,7 @@ void develop(); int process_run_section( int id ) { - printf( "Starting section %d\n", id ); + printf( "Starting section %d (%s)\n", id, process_get_section_name(id) ); switch( id ) { case 0: return !run_webserver( g_server ); @@ -107,21 +125,9 @@ int process_run_section( int id ) return EXIT_FAILURE; } -static const char* section_name( int section ) -{ - switch( section ) { - case 0: return "webserver"; - case 1: return "inbox"; - case 2: return "outbox"; - case 5: return "fetch"; - case 6: return "tor"; - } - return NULL; -} - static void redirect_io( int section ) { - const char* section_name_str = section_name(section); + const char* section_name_str = process_get_section_name(section); if( !section_name_str ) { return; } char filename[512]; @@ -140,7 +146,7 @@ static void redirect_io( int section ) void process_start_section( int id ) { - printf( "Starting section %d\n", id ); + printf( "Starting section %d (%s)\n", id, process_get_section_name(id) ); pid_t child_pid; child_pid = fork(); @@ -173,7 +179,7 @@ void process_stop() static bool rotate_log( int section ) { - const char* section_name_str = section_name(section); + const char* section_name_str = process_get_section_name(section); if( !section_name_str ) { return false; } char logfile[512]; diff --git a/src/process.h b/src/process.h index 289507c..7797ac8 100644 --- a/src/process.h +++ b/src/process.h @@ -4,4 +4,6 @@ int process_run_section( int id ); void process_start_section( int id ); void process_stop(); void process_wait_for_finished(); +const char* process_get_section_name( int section ); +int process_get_max_section();