You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
192 lines
3.5 KiB
C
192 lines
3.5 KiB
C
#include "gc.h"
|
|
|
|
#include "ffdb/fs_list.h"
|
|
|
|
#include "model/status.h"
|
|
#include "model/account.h"
|
|
|
|
#include "timeline.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
struct entry
|
|
{
|
|
unsigned marked : 1;
|
|
unsigned processed : 1;
|
|
};
|
|
|
|
struct bitmap
|
|
{
|
|
struct entry* items;
|
|
int count;
|
|
int base_id;
|
|
int exists;
|
|
};
|
|
|
|
void mark_post( struct bitmap* b, int status_id, bool force )
|
|
{
|
|
printf( "\rstatus_id = %d", status_id );
|
|
fflush(stdout);
|
|
|
|
struct entry* e = &b->items[ status_id - b->base_id ];
|
|
|
|
if( e->processed ) { return; };
|
|
|
|
struct status* s = status_from_id( status_id );
|
|
if( !s ) {
|
|
e->processed = true;
|
|
return;
|
|
}
|
|
|
|
b->exists += 1;
|
|
|
|
if( force ) {
|
|
e->marked = true;
|
|
} else {
|
|
if( s->published > time(NULL) - 3600 * 24 * 2 ) { // keep all posts in the last 2 days
|
|
e->marked = true;
|
|
} else if( s->account_id == owner_account_id ) { // Keep all owner posts
|
|
if( s->repost.id ) {
|
|
e->marked = true;
|
|
} else if( s->quote.id ) {
|
|
e->marked = true;
|
|
}
|
|
} else if( s->bookmarked ) {
|
|
e->marked = true;
|
|
} else if( s->pinned ) { // Keep pinned posts regardless of age
|
|
e->marked = true;
|
|
}
|
|
}
|
|
|
|
e->processed = true;
|
|
if( !e->marked ) {
|
|
status_free(s);
|
|
return;
|
|
}
|
|
|
|
// Flood fill to keep all related posts alive
|
|
if( s->repost.id ) {
|
|
mark_post( b, s->repost.id, true );
|
|
}
|
|
if( s->in_reply_to.id ) {
|
|
mark_post( b, s->in_reply_to.id, true );
|
|
}
|
|
if( s->quote.id ) {
|
|
mark_post( b, s->quote.id, true );
|
|
}
|
|
for( int i = 0; i < s->replies.count; ++i ) {
|
|
mark_post( b, s->replies.items[i], true );
|
|
}
|
|
|
|
status_free(s);
|
|
}
|
|
|
|
static void sweep_posts()
|
|
{
|
|
int head = fs_list_get( "data/statuses/HEAD" );
|
|
int tail = fs_list_get( "data/statuses/TAIL" );
|
|
|
|
int count = head - tail + 1;
|
|
struct bitmap b = {
|
|
.items = malloc( sizeof(struct entry) * count ),
|
|
.count = count,
|
|
.base_id = tail,
|
|
};
|
|
memset( b.items, 0, sizeof(struct entry) * count );
|
|
|
|
// Mark posts to keep
|
|
printf( "Mark posts to keep...\n");
|
|
fflush(stdout);
|
|
for( int i = head; i >= tail; --i ) {
|
|
mark_post( &b, i, false );
|
|
}
|
|
printf( "done.\n");
|
|
fflush(stdout);
|
|
|
|
int to_keep = 0;
|
|
for( int i = 0; i < b.count; ++i ) {
|
|
if( b.items[i].marked ) {
|
|
to_keep += 1;
|
|
}
|
|
}
|
|
printf( "Would keep %d/%d posts.\n", to_keep, b.exists );
|
|
fflush(stdout);
|
|
|
|
printf( "Sweeping..." );
|
|
fflush(stdout);
|
|
for( int i = tail; i < head; ++i ) {
|
|
struct entry* e = &b.items[ i - b.base_id ];
|
|
struct status* s = status_from_id(i);
|
|
|
|
bool delete = true;
|
|
if( !s ) {
|
|
if( i == tail ) {
|
|
tail += 1;
|
|
}
|
|
} else {
|
|
printf( "\rstatus_id = %d, account_id=%d", i, s->account_id );
|
|
fflush(stdout);
|
|
|
|
if( s->published > time(NULL) - 3600 * 24 * 2 ) { // keep all posts in the last 2 days
|
|
delete = false;
|
|
} else if( e->marked ) {
|
|
delete = false;
|
|
}
|
|
}
|
|
|
|
if( s && s->remote && !s->pinned ) {
|
|
if( delete ) {
|
|
status_delete(s);
|
|
} else {
|
|
status_free(s);
|
|
}
|
|
}
|
|
}
|
|
printf( "done. Would keep %d posts.\n", to_keep );
|
|
fflush(stdout);
|
|
|
|
free(b.items);
|
|
|
|
// Save tail position
|
|
fs_list_set( "data/statuses/TAIL", tail );
|
|
}
|
|
|
|
/*
|
|
static void sweep_timeline( struct timeline* tl )
|
|
{
|
|
printf( "Sweeping timeline at %s\n", tl->path );
|
|
struct status* ss[32];
|
|
int pos = 0;
|
|
int count = 1;
|
|
while( count > 0 ) {
|
|
count = timeline_load_statuses( tl, pos, 32, ss );
|
|
for( int i = 0; i < count; ++i ) {
|
|
status_free(ss[i]);
|
|
}
|
|
pos += count;
|
|
}
|
|
}
|
|
|
|
static void sweep_timelines()
|
|
{
|
|
for( int i = 0;; i += 1 ) {
|
|
struct timeline* tl = timeline_from_id(i);
|
|
if( !tl ) { return; }
|
|
|
|
sweep_timeline(tl);
|
|
|
|
timeline_free(tl);
|
|
}
|
|
}
|
|
*/
|
|
|
|
void gc_run()
|
|
{
|
|
status_gc();
|
|
sweep_posts();
|
|
//sweep_timelines();
|
|
|
|
}
|
|
|