Brian - I'm playing around with a modified version of the Kitchen Sink ML strategy where I've stripped out the fundamental and quality features and I'm just focusing on the price, volume, and technical features. I can tell from the logs that the backtest ran but when I go to create the tear sheet I'm receiving the following UnicodeDecodeError.
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/moonchart/perf.py:266, in DailyPerformance.from_moonshot_csv(cls, filepath_or_buffer, start_date, end_date, trim_outliers, how_to_aggregate, riskfree, compound, rolling_sharpe_window)
265 try:
--> 266 results = read_moonshot_csv(filepath_or_buffer)
267 except ValueError as e:
268 # "ValueError: 'Date' is not in list" might mean the user passed
269 # a paramscan csv by mistake
File /opt/conda/lib/python3.11/site-packages/quantrocket/moonshot.py:238, in read_moonshot_csv(filepath_or_buffer)
210 """
211 Load a Moonshot backtest CSV into a DataFrame.
212
(...)
236 >>> returns = results.loc["Return"]
237 """
--> 238 return _read_moonshot_or_pnl_csv(filepath_or_buffer)
File /opt/conda/lib/python3.11/site-packages/quantrocket/utils/_parse.py:41, in _read_moonshot_or_pnl_csv(filepath_or_buffer)
39 raise ImportError("pandas must be installed to use this function")
---> 41 results = pd.read_csv(
42 filepath_or_buffer,
43 parse_dates=["Date"],
44 # columns can have mixed types, silence warning
45 low_memory=False)
46 index_cols = ["Field", "Date"]
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:948, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
946 kwds.update(kwds_defaults)
--> 948 return _read(filepath_or_buffer, kwds)
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:611, in _read(filepath_or_buffer, kwds)
610 # Create the parser.
--> 611 parser = TextFileReader(filepath_or_buffer, **kwds)
613 if chunksize or iterator:
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1448, in TextFileReader.__init__(self, f, engine, **kwds)
1447 self.handles: IOHandles | None = None
-> 1448 self._engine = self._make_engine(f, self.engine)
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1723, in TextFileReader._make_engine(self, f, engine)
1722 try:
-> 1723 return mapping[engine](f, **self.options)
1724 except Exception:
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/c_parser_wrapper.py:93, in CParserWrapper.__init__(self, src, **kwds)
92 import_optional_dependency("pyarrow")
---> 93 self._reader = parsers.TextReader(src, **kwds)
95 self.unnamed_cols = self._reader.unnamed_cols
File parsers.pyx:579, in pandas._libs.parsers.TextReader.__cinit__()
File parsers.pyx:668, in pandas._libs.parsers.TextReader._get_header()
File parsers.pyx:879, in pandas._libs.parsers.TextReader._tokenize_rows()
File parsers.pyx:890, in pandas._libs.parsers.TextReader._check_tokenize_status()
File parsers.pyx:2050, in pandas._libs.parsers.raise_parser_error()
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb3 in position 15: invalid start byte
During handling of the above exception, another exception occurred:
UnicodeDecodeError Traceback (most recent call last)
Cell In[5], line 2
1 from moonchart import Tearsheet
----> 2 Tearsheet.from_moonshot_csv("quantitativo_ml_results.csv")
File /opt/conda/lib/python3.11/site-packages/moonchart/tearsheet.py:129, in Tearsheet.from_moonshot_csv(cls, filepath_or_buffer, figsize, max_cols_for_details, trim_outliers, how_to_aggregate, pdf_filename, riskfree, start_date, end_date, compound, rolling_sharpe_window)
54 @classmethod
55 def from_moonshot_csv(
56 cls,
(...)
67 rolling_sharpe_window: int = 200
68 ) -> None:
69 """
70 Create a full tear sheet from a moonshot backtest results CSV.
71
(...)
127 >>> Tearsheet.from_moonshot_csv("backtest_results.csv")
128 """
--> 129 perf = DailyPerformance.from_moonshot_csv(
130 filepath_or_buffer,
131 start_date=start_date,
132 end_date=end_date,
133 trim_outliers=trim_outliers,
134 how_to_aggregate=how_to_aggregate,
135 riskfree=riskfree,
136 compound=compound,
137 rolling_sharpe_window=rolling_sharpe_window)
139 t = cls(figsize=figsize,
140 max_cols_for_details=max_cols_for_details,
141 pdf_filename=pdf_filename)
143 return t.create_full_tearsheet(perf)
File /opt/conda/lib/python3.11/site-packages/moonchart/perf.py:272, in DailyPerformance.from_moonshot_csv(cls, filepath_or_buffer, start_date, end_date, trim_outliers, how_to_aggregate, riskfree, compound, rolling_sharpe_window)
270 if "Date" not in repr(e):
271 raise
--> 272 results = pd.read_csv(filepath_or_buffer)
273 if "StrategyOrDate" in results.columns:
274 raise MoonchartError("this looks like a parameter scan CSV, please use ParamscanTearsheet.from_csv")
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:948, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
944 dtype_backend=dtype_backend,
945 )
946 kwds.update(kwds_defaults)
--> 948 return _read(filepath_or_buffer, kwds)
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:611, in _read(filepath_or_buffer, kwds)
608 _validate_names(kwds.get("names", None))
610 # Create the parser.
--> 611 parser = TextFileReader(filepath_or_buffer, **kwds)
613 if chunksize or iterator:
614 return parser
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1448, in TextFileReader.__init__(self, f, engine, **kwds)
1445 self.options["has_index_names"] = kwds["has_index_names"]
1447 self.handles: IOHandles | None = None
-> 1448 self._engine = self._make_engine(f, self.engine)
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1723, in TextFileReader._make_engine(self, f, engine)
1720 raise ValueError(msg)
1722 try:
-> 1723 return mapping[engine](f, **self.options)
1724 except Exception:
1725 if self.handles is not None:
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/c_parser_wrapper.py:93, in CParserWrapper.__init__(self, src, **kwds)
90 if kwds["dtype_backend"] == "pyarrow":
91 # Fail here loudly instead of in cython after reading
92 import_optional_dependency("pyarrow")
---> 93 self._reader = parsers.TextReader(src, **kwds)
95 self.unnamed_cols = self._reader.unnamed_cols
97 # error: Cannot determine type of 'names'
File parsers.pyx:579, in pandas._libs.parsers.TextReader.__cinit__()
File parsers.pyx:668, in pandas._libs.parsers.TextReader._get_header()
File parsers.pyx:879, in pandas._libs.parsers.TextReader._tokenize_rows()
File parsers.pyx:890, in pandas._libs.parsers.TextReader._check_tokenize_status()
File parsers.pyx:2050, in pandas._libs.parsers.raise_parser_error()
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb3 in position 15: invalid start byte
Any suggestions would be appreciated.
Thanks.