Learned a bit more sqlite3 to solve this problem, code below for anyone needing to free up disk space by deleting earlier data from history databases.
I used this on an ibkr minute-level db, sharded by sid, and reduced disk usage by 10GB by deleting the first 3 weeks of minute data. Will be using weekly to keep db size in check. If you use, make sure to backup your db beforehand and verify your db directory structure in /var/lib/quantrocket. (Also make sure not to collect the same earlier data again by using the start_date
parameter next time you collect_history
.)
@Brian Please chime in if there are any nuances/gotchas to how history stores data. This code simply loops through all the sqlite db files and removes the queried data from each.
import os
import pandas as pd
import sqlite3
from sqlite3 import Error
def drop_history_data_before_date(db_name, cutoff_date, subdir = 'sid'):
db_path = f'/var/lib/quantrocket/quantrocket.v2.history.{db_name}.sqlite/{subdir}'
cutoff_date = pd.to_datetime(cutoff_date).strftime('%Y-%m-%d')
counter = 0
succ_count = 0
err_count = 0
file_list = os.listdir(db_path)
for file in file_list:
counter += 1
if file.endswith(".sqlite"):
# Log
dots = [i for i, char in enumerate(file) if char == '.']
sid = file[(dots[-2] + 1):dots[-1]]
print(f'Deleting data for sid {sid} ({counter} of {len(file_list)})')
# Open db file
conn = None
full_fn = os.path.join(db_path, file)
try:
conn = sqlite3.connect(full_fn)
except Error as e:
print(f'ERROR opening file: {full_fn}')
err_count += 1
continue
# Operations
with conn:
try:
# Delete data
sql = 'DELETE FROM Price WHERE Date < ?'
cur = conn.cursor()
cur.execute(sql, (cutoff_date,))
conn.commit()
# Vaccum file to reclaim disk space
conn.execute('VACUUM')
except Error as e:
print(f'ERROR deleting data: {e}')
err_count += 1
continue
succ_count += 1
print(f'Finished dropping data for {succ_count} db files with {err_count} error(s)')
return