Start implementation of garbage collection of older posts, fix to,cc,bcc parse error when a single value is present, fix Accept header value, add status removal functions

master
teknomunk 1 year ago
parent c6f8df2244
commit 9b361dd2d3

@ -7,6 +7,7 @@
#include "model/status.h"
#include "model/notification.h"
#include "model/ap/activity.h"
#include "model/gc.h"
#include "controller/inbox.h"
#include "view/api/Status.h"
@ -162,6 +163,15 @@ static bool handle_command_test( struct cli_request* req )
return true;
}
static bool handle_command_gc( struct cli_request* req )
{
int res = cli_route_command( req, "gc", 0, "" );
if( res != 1 ) { return !!res; }
gc_run();
return true;
}
bool handle_command_account_sync( struct cli_request* req, int account_id )
{
@ -222,6 +232,7 @@ void handle_command( char** argv, int argc )
|| handle_command_reindex(&req)
|| handle_command_test(&req)
|| handle_command_update(&req)
|| handle_command_gc(&req)
) { return; }
printf( "Unknown command %s\n", argv[1] );

@ -514,7 +514,7 @@ static bool process_one()
if( !env ) {
printf( "! Failed to parse envelope+activity for data/inbox/%d.json\n", id );
goto failed;
goto discard;
}
// Load activity
@ -554,6 +554,7 @@ cleanup:
ap_object_free(act);
ap_envelope_free(env);
printf( "result=%c\n", result ? 'T' : 'F' );
fflush(stdout);
return result;
failed:
result = false;

@ -1 +1 @@
Subproject commit b5165d7553f1b247212f4ee4d095d8eadb77d70f
Subproject commit 34af1bef81911b7206cc8c514c09022de1f74695

@ -1 +1 @@
Subproject commit 36c3fb214952ab71a0f3ee7c235a78c19c2ac5a8
Subproject commit ad7f0125cb4feadb222290149f06ce9166f429a4

@ -47,7 +47,13 @@ bool account_sync_from_activity_pub( unsigned int account_id )
}
a->bot = ( obj->type != apot_person );
a->account_type = at_remote_activity_pub;
a->account_url = strdup(obj->url);
if( obj->url ) {
a->account_url = strdup(obj->url);
} else if( obj->id ) {
a->account_url = strdup(obj->id);
} else {
goto failed;
}
a->inbox = strdup(obj->inbox);
if( obj->summary ) {
a->note = strdup(obj->summary);

@ -243,7 +243,7 @@ struct json_object_field ap_object_layout[] = {
.offset = offsetof( OBJ_TYPE, to ),
.required = false,
.allow_drop_empty_array = true,
.type = &json_field_array_of,
.type = &json_field_single_or_array_of,
.array_item_type = &json_field_string,
},
{
@ -251,7 +251,7 @@ struct json_object_field ap_object_layout[] = {
.offset = offsetof( OBJ_TYPE, cc ),
.required = false,
.allow_drop_empty_array = true,
.type = &json_field_array_of,
.type = &json_field_single_or_array_of,
.array_item_type = &json_field_string,
},
{
@ -259,7 +259,7 @@ struct json_object_field ap_object_layout[] = {
.offset = offsetof( OBJ_TYPE, bcc ),
.required = false,
.allow_drop_empty_array = true,
.type = &json_field_array_of,
.type = &json_field_single_or_array_of,
.array_item_type = &json_field_string,
},
{

@ -0,0 +1,147 @@
#include "gc.h"
#include "ffdb/fs_list.h"
#include "model/status.h"
#include "model/account.h"
#include "timeline.h"
void mark_post( int status_id, bool force )
{
struct status* s = status_from_id( status_id );
if( !s ) { return; }
if( s->mark && !force ) {
// Already marked
status_free(s);
return;
}
s->mark = true;
status_save(s);
// Flood fill to keep all related posts alive
if( s->repost_id ) {
mark_post( s->repost_id, false );
}
if( s->in_reply_to ) {
mark_post( s->in_reply_to, false );
}
for( int i = 0; i < s->replies.count; ++i ) {
mark_post( s->replies.items[i], false );
}
status_free(s);
}
void gc_run()
{
int head = fs_list_get( "data/statuses/HEAD" );
int tail = fs_list_get( "data/statuses/TAIL" );
// Clear all marks
printf( "Clearing marks..." );
fflush(stdout);
for( int i = tail; i < head; ++i ) {
struct status* s = status_from_id(i);
if( !s ) {
if( i == tail ) {
tail += 1;
}
} else {
if( s->mark ) {
s->mark = false;
status_save(s);
}
status_free(s);
}
}
printf( "done.\n" );
fflush(stdout);
// Save tail position
fs_list_set( "data/statuses/TAIL", tail );
// Mark posts to keep
printf( "Mark posts to keep..." );
fflush(stdout);
for( int i = tail; i < head; ++i ) {
struct status* s = status_from_id(i);
if( !s ) { continue; }
if( s->published > time(NULL) - 3600 * 24 * 2 ) { // keep all posts in the last 2 days
s->mark = true;
} else if( s->account_id == owner_account_id ) { // Keep all owner posts
if( s->repost_id ) {
s->mark = true;
}
} else if( s->bookmarked ) {
s->mark = true;
} else if( s->pinned ) { // Keep pinned posts regardless of age
s->mark = true;
}
if( s->mark ) {
mark_post( s->id, true );
status_save(s);
}
status_free(s);
}
printf( "done.\n");
fflush(stdout);
printf( "Sweeping..." );
fflush(stdout);
int to_keep = 0;
for( int i = tail; i < head; ++i ) {
struct status* s = status_from_id(i);
bool delete = true;
if( !s ) {
if( i == tail ) {
tail += 1;
}
} else {
if( s->published > time(NULL) - 3600 * 24 * 2 ) { // keep all posts in the last 2 days
delete = false;
} else if( s->mark ) {
delete = false;
}
}
if( s ) {
if( delete ) {
status_delete(s);
} else {
status_free(s);
}
} else {
to_keep += 1;
}
}
printf( "done. Would keep %d posts.\n", to_keep );
fflush(stdout);
// Clear all marks once we are done
printf( "Clearing marks..." );
for( int i = tail; i < head; ++i ) {
struct status* s = status_from_id(i);
if( !s ) {
if( i == tail ) {
tail += 1;
}
} else {
if( s->mark ) {
s->mark = false;
status_save(s);
}
status_free(s);
}
}
printf( "done.\n" );
fflush(stdout);
// Save tail position
fs_list_set( "data/statuses/TAIL", tail );
}

@ -0,0 +1,4 @@
#pragma once
void gc_run();

@ -33,6 +33,7 @@ static struct json_object_field status_layout[] = {
JSON_FIELD_STRING( url, false ),
JSON_FIELD_BOOL( stub, false ),
JSON_FIELD_BOOL( remote, false ),
JSON_FIELD_BOOL( mark, false ),
JSON_FIELD_STRING( content, false ),
JSON_FIELD_STRING( source, false ),
@ -204,6 +205,10 @@ void status_add_repost( struct status* s, struct status* repost )
}
bool status_sync_from_activity_pub( struct status* s, struct ap_object* act )
{
if( !act->actor && act->attributed_to ) {
act->actor = strdup( act->attributed_to );
}
printf( "Syncing status from activity %s\n", act->id );
ap_object_write_to_FILE( act, stdout );
bool result = false;
@ -303,7 +308,7 @@ bool pull_remote_file( const char* filename, const char* uri )
long status_code = -1;
const void* request[] = {
HTTP_REQ_URL, uri,
HTTP_REQ_HEADER, "Accept: application/ld+json",
HTTP_REQ_HEADER, "Accept: application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"",
HTTP_REQ_OUTFILE, f,
HTTP_REQ_RESULT_STATUS, &status_code,
NULL,
@ -315,14 +320,21 @@ bool pull_remote_file( const char* filename, const char* uri )
}
printf( "status_code = %d\n", status_code );
if( status_code != 200 ) {
if( status_code == 200 ) {
// success
fclose(f);
rename(tmp_filename,filename);
return true;
} else if( status_code == 401 ) {
// Not Authorized
// TODO: perform signed fetch
fclose(f);
return false;
}
fclose(f);
rename(tmp_filename,filename);
return true;
// Failure
fclose(f);
return false;
}
bool status_sync_from_uri( struct status* s, const char* uri )
@ -496,6 +508,13 @@ void status_save( struct status* s )
if( s->url ) {
hash_index_set( "data/statuses/uri", s->url, s->id );
}
if( s->stub ) {
mkdir( "data/statuses/stubs", 0755 );
ffdb_trie_set( "data/statuses/stubs", format(filename,512,"%d",s->id), "T" );
} else {
ffdb_trie_remove( "data/statuses/stubs", format(filename,512,"%d",s->id) );
}
}
void status_write_to_FILE( struct status* s, FILE* f )
{
@ -535,6 +554,28 @@ void status_free( struct status* s )
free(s);
}
void status_delete( struct status* s )
{
int ids[] = {
s->account_id,
home_timeline_id,
public_timeline_id
};
for( int i = 0; i < sizeof(ids)/sizeof(ids[0]); ++i ) {
struct timeline* tl = timeline_from_id( ids[i] );
if( tl ) {
timeline_remove_post( tl, s );
timeline_free(tl);
}
}
char filename[512];
get_status_data_filename(s->id,filename,sizeof(filename));
remove(filename);
status_free(s);
}
struct async_status_fetch
{
struct {

@ -16,6 +16,7 @@ struct status
char* url;
bool stub;
bool remote;
bool mark;
char* content;
char* source;
@ -84,6 +85,7 @@ bool status_save_new( struct status* s );
void status_write_to_FILE( struct status* s, FILE* f );
void status_save( struct status* s );
void status_free( struct status* s );
void status_delete( struct status* s );
void status_flag_for_async_fetch( struct status* s );

@ -57,15 +57,11 @@ int timeline_load_statuses( struct timeline* tl, int offset_from_head, int count
free(values.items);
return result_count;
}
void timeline_add_post( struct timeline* tl, struct status* s )
static void key_for_post( struct status* s, char* key, int sizeof_key )
{
char filename[512];
snprintf( filename, sizeof(filename), "data/accounts/%d/timeline", tl->id );
struct tm gmtime_data;
gmtime_r( &s->published, &gmtime_data );
char key[512];
snprintf( key, sizeof(key), "%04d-%02d-%02dT%02d:%02d:%02dZ",
snprintf( key, sizeof_key, "%04d-%02d-%02dT%02d:%02d:%02dZ",
gmtime_data.tm_year + 1900,
gmtime_data.tm_mon + 1,
gmtime_data.tm_mday,
@ -73,8 +69,26 @@ void timeline_add_post( struct timeline* tl, struct status* s )
gmtime_data.tm_min,
gmtime_data.tm_sec
);
}
void timeline_add_post( struct timeline* tl, struct status* s )
{
char filename[512];
snprintf( filename, sizeof(filename), "data/accounts/%d/timeline", tl->id );
char key[512];
key_for_post(s,key,sizeof(key));
char value[32];
ffdb_trie_set( filename, key, format( value, sizeof(value), "%d", s->id ) );
}
void timeline_remove_post( struct timeline* tl, struct status* s )
{
char filename[512];
snprintf( filename, sizeof(filename), "data/accounts/%d/timeline", tl->id );
char key[512];
key_for_post(s,key,sizeof(key));
ffdb_trie_remove( filename, key );
}

@ -12,4 +12,5 @@ void timeline_free( struct timeline* tl );
int timeline_load_statuses( struct timeline* tl, int offset_from_head, int count, struct status** result );
void timeline_add_post( struct timeline* tl, struct status* s );
void timeline_remove_post( struct timeline* tl, struct status* s );

Loading…
Cancel
Save