Get mark-and-sweep status garbage collection working, remove mark from status and use in-memory array (performace enhancement)

master
teknomunk 1 year ago
parent 9b361dd2d3
commit 18536975e4

@ -1 +1 @@
Subproject commit 34af1bef81911b7206cc8c514c09022de1f74695
Subproject commit 2126183e06ae69fc29561f1d8a3d500cca66d5e2

@ -7,28 +7,71 @@
#include "timeline.h"
void mark_post( int status_id, bool force )
#include <stdlib.h>
#include <string.h>
struct entry
{
unsigned marked : 1;
unsigned processed : 1;
};
struct bitmap
{
struct entry* items;
int count;
int base_id;
int exists;
};
void mark_post( struct bitmap* b, int status_id, bool force )
{
printf( "\rstatus_id = %d", status_id );
fflush(stdout);
struct entry* e = &b->items[ status_id - b->base_id ];
if( e->processed ) { return; };
struct status* s = status_from_id( status_id );
if( !s ) { return; }
if( s->mark && !force ) {
// Already marked
status_free(s);
if( !s ) {
e->processed = true;
return;
}
s->mark = true;
status_save(s);
b->exists += 1;
if( force ) {
e->marked = true;
} else {
if( s->published > time(NULL) - 3600 * 24 * 2 ) { // keep all posts in the last 2 days
e->marked = true;
} else if( s->account_id == owner_account_id ) { // Keep all owner posts
if( s->repost_id ) {
e->marked = true;
}
} else if( s->bookmarked ) {
e->marked = true;
} else if( s->pinned ) { // Keep pinned posts regardless of age
e->marked = true;
}
}
e->processed = true;
if( !e->marked ) {
status_free(s);
return;
}
// Flood fill to keep all related posts alive
if( s->repost_id ) {
mark_post( s->repost_id, false );
mark_post( b, s->repost_id, true );
}
if( s->in_reply_to ) {
mark_post( s->in_reply_to, false );
mark_post( b, s->in_reply_to, true );
}
for( int i = 0; i < s->replies.count; ++i ) {
mark_post( s->replies.items[i], false );
mark_post( b, s->replies.items[i], true );
}
status_free(s);
@ -39,72 +82,50 @@ void gc_run()
int head = fs_list_get( "data/statuses/HEAD" );
int tail = fs_list_get( "data/statuses/TAIL" );
// Clear all marks
printf( "Clearing marks..." );
fflush(stdout);
for( int i = tail; i < head; ++i ) {
struct status* s = status_from_id(i);
if( !s ) {
if( i == tail ) {
tail += 1;
}
} else {
if( s->mark ) {
s->mark = false;
status_save(s);
}
status_free(s);
}
}
printf( "done.\n" );
fflush(stdout);
// Save tail position
fs_list_set( "data/statuses/TAIL", tail );
int count = head - tail + 1;
struct bitmap b = {
.items = malloc( sizeof(struct entry) * count ),
.count = count,
.base_id = tail,
};
memset( b.items, 0, sizeof(struct entry) * count );
// Mark posts to keep
printf( "Mark posts to keep..." );
printf( "Mark posts to keep...\n");
fflush(stdout);
for( int i = head; i >= tail; --i ) {
mark_post( &b, i, false );
}
printf( "done.\n");
fflush(stdout);
for( int i = tail; i < head; ++i ) {
struct status* s = status_from_id(i);
if( !s ) { continue; }
if( s->published > time(NULL) - 3600 * 24 * 2 ) { // keep all posts in the last 2 days
s->mark = true;
} else if( s->account_id == owner_account_id ) { // Keep all owner posts
if( s->repost_id ) {
s->mark = true;
}
} else if( s->bookmarked ) {
s->mark = true;
} else if( s->pinned ) { // Keep pinned posts regardless of age
s->mark = true;
}
if( s->mark ) {
mark_post( s->id, true );
status_save(s);
int to_keep = 0;
for( int i = 0; i < b.count; ++i ) {
if( b.items[i].marked ) {
to_keep += 1;
}
status_free(s);
}
printf( "done.\n");
printf( "Would keep %d/%d posts.\n", to_keep, b.exists );
fflush(stdout);
printf( "Sweeping..." );
fflush(stdout);
int to_keep = 0;
for( int i = tail; i < head; ++i ) {
struct entry* e = &b.items[ i - b.base_id ];
struct status* s = status_from_id(i);
bool delete = true;
if( !s ) {
if( i == tail ) {
tail += 1;
}
} else {
printf( "\rstatus_id = %d, account_id=%d", i, s->account_id );
fflush(stdout);
if( s->published > time(NULL) - 3600 * 24 * 2 ) { // keep all posts in the last 2 days
delete = false;
} else if( s->mark ) {
} else if( e->marked ) {
delete = false;
}
}
@ -115,31 +136,12 @@ void gc_run()
} else {
status_free(s);
}
} else {
to_keep += 1;
}
}
printf( "done. Would keep %d posts.\n", to_keep );
fflush(stdout);
// Clear all marks once we are done
printf( "Clearing marks..." );
for( int i = tail; i < head; ++i ) {
struct status* s = status_from_id(i);
if( !s ) {
if( i == tail ) {
tail += 1;
}
} else {
if( s->mark ) {
s->mark = false;
status_save(s);
}
status_free(s);
}
}
printf( "done.\n" );
fflush(stdout);
free(b.items);
// Save tail position
fs_list_set( "data/statuses/TAIL", tail );

@ -33,7 +33,6 @@ static struct json_object_field status_layout[] = {
JSON_FIELD_STRING( url, false ),
JSON_FIELD_BOOL( stub, false ),
JSON_FIELD_BOOL( remote, false ),
JSON_FIELD_BOOL( mark, false ),
JSON_FIELD_STRING( content, false ),
JSON_FIELD_STRING( source, false ),

@ -16,7 +16,6 @@ struct status
char* url;
bool stub;
bool remote;
bool mark;
char* content;
char* source;

Loading…
Cancel
Save