Hi Brian and Quant Rocketers,
I am trying to get the kitchensink_ml code to work on intraday, but I get the following error:
quantrocket-moonshot-1|[minute-ml-poc] Collecting features from Minute_ML_POC.prices_to_features for 2024-03-10 to 2024-03-16
quantrocket-moonshot-1|adding techical indicator features
quantrocket-moonshot-1|[minute-ml-poc] Incrementally fitting StandardScaler with features for 2024-03-10 to 2024-03-16
quantrocket-moonshot-1|[minute-ml-poc] Transforming features with StandardScaler for 2024-03-10 to 2024-03-16
quantrocket-moonshot-1|[minute-ml-poc] Incrementally fitting SGDRegressor with transformed features for 2024-03-10 to 2024-03-16
quantrocket-moonshot-1|[minute-ml-poc] Backtesting strategy from 2024-03-17 to 2024-03-23 with model trained through 2024-03-16
quantrocket-moonshot-1|adding techical indicator features
quantrocket-moonshot-1|Traceback (most recent call last):
quantrocket-moonshot-1| File "sym://qrocket_app_py", line 499, in post
quantrocket-moonshot-1| File "sym://qrocket_mshot_backtest_ml_walkforward_incremental_py", line 218, in analyze
quantrocket-moonshot-1| File "/opt/conda/lib/python3.11/site-packages/moonshot/strategies/ml.py", line 407, in backtest
quantrocket-moonshot-1| return super(MoonshotML, self).backtest(
quantrocket-moonshot-1| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
quantrocket-moonshot-1| File "/opt/conda/lib/python3.11/site-packages/moonshot/strategies/base.py", line 1341, in backtest
quantrocket-moonshot-1| weights = self.signals_to_target_weights(signals, prices)
quantrocket-moonshot-1| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
quantrocket-moonshot-1| File "/codeload/moonshot/minute_ml_poc.py", line 116, in signals_to_target_weights
quantrocket-moonshot-1| weights = weights.resample("D").first()
quantrocket-moonshot-1| ^^^^^^^^^^^^^^^^^^^^^
quantrocket-moonshot-1| File "/opt/conda/lib/python3.11/site-packages/pandas/core/generic.py", line 9435, in resample
quantrocket-moonshot-1| return get_resampler(
quantrocket-moonshot-1| ^^^^^^^^^^^^^^
quantrocket-moonshot-1| File "/opt/conda/lib/python3.11/site-packages/pandas/core/resample.py", line 1970, in get_resampler
quantrocket-moonshot-1| return tg._get_resampler(obj, kind=kind)
quantrocket-moonshot-1| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
quantrocket-moonshot-1| File "/opt/conda/lib/python3.11/site-packages/pandas/core/resample.py", line 2160, in _get_resampler
quantrocket-moonshot-1| raise TypeError(
quantrocket-moonshot-1|TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'MultiIndex'
quantrocket-moonshot-1|
quantrocket-moonshot-1|...The work of process 566 is done. Seeya!
I created a copy of the kichensink_ml.py to leave only the technical indicators simplify the troubleshooting, but it is the same issue as the original one.
Here is the code:
import pandas as pd
from moonshot import MoonshotML
from moonshot.commission import PerShareCommission
from quantrocket.fundamental import get_sharadar_fundamentals_reindexed_like
from quantrocket import get_prices
from quantrocket.master import get_securities_reindexed_like
class USStockCommission(PerShareCommission):
BROKER_COMMISSION_PER_SHARE = 0.005
class Minute_ML_POC(MoonshotML):
CODE = "minute-ml-poc"
DB = "usstock-1min-bundle"
UNIVERSES = "leveraged-efts"
# DB_FIELDS = ["Close", "Volume"]
# BENCHMARK_DB = "market-1d"
# SPY_SID = "FIBBG000BDTBL9"
# VIX_SID = "IB13455763"
# TRIN_SID = "IB26718743"
# BENCHMARK = SPY_SID
# DOLLAR_VOLUME_TOP_N_PCT = 60
# DOLLAR_VOLUME_WINDOW = 90
MODEL = None
LOOKBACK_WINDOW = 252
COMMISSION_CLASS = USStockCommission
TIMEZONE = "America/New_York"
SIDS = "FIBBG000QB9Y48" #TQQQ
BENCHMARK_TIME = "14:00:00" ## TODO: added for troubleshooting, needs to be removed
def prices_to_features(self, prices: pd.DataFrame):
closes = prices.loc["Close"]
features = {}
print("adding techical indicator features")
self.add_technical_indicator_features(prices, features)
returns = (closes - closes.shift(5)) / closes.shift(5).where(closes.shift(5) > 0) # 5 minutes?
targets = returns.shift(-5)
return features, targets
def add_technical_indicator_features(self, prices: pd.DataFrame, features: dict[str, pd.DataFrame]):
"""
Various technical indicators:
- Bollinger bands
- RSI
- Stochastic oscillator
- Money Flow Index
"""
closes = prices.loc["Close"]
# relative position within Bollinger Bands (0 = at or below lower band, 1 = at or above upper band)
mavgs = closes.rolling(20).mean()
stds = closes.rolling(20).std()
upper_bands = mavgs + (stds * 2)
lower_bands = mavgs - (stds * 2)
# Winsorize at upper and lower bands
winsorized_closes = closes.where(closes > lower_bands, lower_bands).where(closes < upper_bands, upper_bands)
features["close_vs_bbands"] = (winsorized_closes - lower_bands) / (upper_bands - lower_bands)
# RSI (0-1)
returns = closes.diff()
avg_gains = returns.where(returns > 0).rolling(window=14, min_periods=1).mean()
avg_losses = returns.where(returns < 0).abs().rolling(window=14, min_periods=1).mean()
relative_strengths = avg_gains / avg_losses.where(avg_losses != 0)
features["RSI"] = 1 - (1 / (1 + relative_strengths.fillna(0.5)))
# Stochastic oscillator (0-1)
highest_highs = closes.rolling(window=14).max()
lowest_lows = closes.rolling(window=14).min()
features["stochastic"] = (closes - lowest_lows) / (highest_highs - lowest_lows)
# Money flow (similar to RSI but volume-weighted) (0-1)
money_flows = closes * prices.loc["Volume"]
positive_money_flows = money_flows.where(returns > 0).rolling(window=14, min_periods=1).sum()
negative_money_flows = money_flows.where(returns < 0).rolling(window=14, min_periods=1).sum()
money_flow_ratios = positive_money_flows / negative_money_flows.where(negative_money_flows > 0)
features["money_flow"] = 1 - (1 / (1 + money_flow_ratios.fillna(0.5)))
def predictions_to_signals(self, predictions: pd.DataFrame, prices: pd.DataFrame):
closes = prices.loc["Close"]
volumes = prices.loc["Volume"]
# avg_dollar_volumes = (closes * volumes).rolling(self.DOLLAR_VOLUME_WINDOW).mean()
# dollar_volume_ranks = avg_dollar_volumes.rank(axis=1, ascending=False, pct=True)
# have_adequate_dollar_volumes = dollar_volume_ranks <= (self.DOLLAR_VOLUME_TOP_N_PCT/100)
# Save the predictions and prices so we can analyze them
self.save_to_results("Prediction", predictions)
self.save_to_results("Close", closes)
self.save_to_results("Volume", volumes)
# Buy (sell) stocks with best (worst) predicted return
# have_best_predictions = predictions.where(have_adequate_dollar_volumes).rank(ascending=False, axis=1) <= 10
# have_worst_predictions = predictions.where(have_adequate_dollar_volumes).rank(ascending=True, axis=1) <= 10
# Buy (sell) stocks with best (worst) predicted return
have_best_predictions = predictions.rank(ascending=False, axis=1) <= 10
have_worst_predictions = predictions.rank(ascending=True, axis=1) <= 10
signals = have_best_predictions.astype(int).where(have_best_predictions, -have_worst_predictions.astype(int).where(have_worst_predictions, 0))
return signals
def signals_to_target_weights(self, signals: pd.DataFrame, prices: pd.DataFrame):
# Allocate equal weights
daily_signal_counts = signals.abs().sum(axis=1)
weights = signals.div(daily_signal_counts, axis=0).fillna(0)
# Rebalance weekly
# Resample daily to weekly, taking the first day's signal
# For pandas offset aliases, see https://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
weights = weights.resample("D").first()
# Reindex back to daily and fill forward
weights = weights.reindex(prices.loc["Close"].index, method="ffill")
weights = self.allocate_equal_weights(signals)
return weights
def target_weights_to_positions(self, weights: pd.DataFrame, prices: pd.DataFrame):
# Enter the position the day after the signal
return weights.shift()
def positions_to_gross_returns(self, positions: pd.DataFrame, prices: pd.DataFrame):
closes = prices.loc["Close"]
gross_returns = closes.pct_change() * positions.shift()
# print("gross_returns")
# print (gross_returns)
return gross_returns
Thank you!