Skip to content

Commit

Permalink
Workaround pair data issues (#105)
Browse files Browse the repository at this point in the history
- Workaround pair data issues when Parquest is interpreting some values as NaN when should be None
- Might be related to Parquest data format and somehow values get misinterpreted when written or read
- #104
  • Loading branch information
miohtama authored Jul 6, 2023
1 parent f67b216 commit e77fe15
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 9 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 0.17.6

- [Add loaded trading pair data preprocesing and working around Parquet data problems](https://github.com/tradingstrategy-ai/trading-strategy/issues/104)
- Add `DataDecodeFailed(Exception)` that gives more context information if there is something wrong
with the trading pair

# 0.17.5

- Fix test warnings by changing function calls from `create_single_pair_universe()` to `create_pair_universe()`
Expand Down
69 changes: 60 additions & 9 deletions tradingstrategy/pair.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import logging
import enum
import pprint
import warnings
from dataclasses import dataclass
from typing import Optional, List, Iterable, Dict, Union, Set, Tuple, TypeAlias, Collection
Expand Down Expand Up @@ -94,6 +95,10 @@ class DuplicatePair(Exception):
"""Found multiple trading pairs for the same naive lookup."""


class DataDecodeFailed(Exception):
"""The parquet file has damaged data for this trading pair."""


#: Data needed to identify a trading pair with human description.
#:
#: This is `(chain, exchange slug, base token, quote token)`.
Expand Down Expand Up @@ -694,9 +699,9 @@ def get_pair_by_id(self, pair_id: PrimaryKey) -> Optional[DEXPair]:
# as we do not know what pairs a strategy might access.
data = self.pair_map.get(pair_id)

# Convert
obj = DEXPair.from_dict(data)
self.pair_map[pair_id] = obj
obj = _convert_to_dex_pair(data)

self.dex_pair_obj_cache[pair_id] = obj
return obj

# We did not build this universe with pair index
Expand All @@ -711,7 +716,8 @@ def get_pair_by_id(self, pair_id: PrimaryKey) -> Optional[DEXPair]:

if len(pairs) == 1:
data = next(iter(pairs.to_dict("index").values()))
return DEXPair.from_dict(data)
obj = _convert_to_dex_pair(data)
return obj

raise PairNotFoundError(pair_id=pair_id)

Expand Down Expand Up @@ -781,16 +787,22 @@ def get_all_tokens(self) -> Set[Token]:
def get_single(self) -> DEXPair:
"""For strategies that trade only a single trading pair, get the only pair in the universe.
:raise AssertionError: If our pair universe does not have an exact single pair
:raise AssertionError:
If our pair universe does not have an exact single pair.
If the target pair could not be decoded.
"""
if self.single_pair_cache:
return self.single_pair_cache

pair_count = len(self.pair_map)
assert pair_count == 1, f"Not a single trading pair universe, we have {pair_count} pairs"
self.single_pair_cache = DEXPair.from_dict(next(iter(self.pair_map.values())))
return self.single_pair_cache
data = next(iter(self.pair_map.values()))

# See https://github.com/tradingstrategy-ai/trading-strategy/issues/104
obj =_convert_to_dex_pair(data)
self.single_pair_cache = obj
return self.single_pair_cache

def get_by_symbols(self, base_token_symbol: str, quote_token_symbol: str) -> Optional[DEXPair]:
"""For strategies that trade only a few trading pairs, get the only pair in the universe.
Expand Down Expand Up @@ -916,11 +928,11 @@ def get_one_pair_from_pandas_universe(
# Sort by trade volume and pick the highest one
pairs = pairs.sort_values(by=["fee", "buy_volume_all_time"], ascending=[True, False])
data = next(iter(pairs.to_dict("index").values()))
return DEXPair.from_dict(data)
return _convert_to_dex_pair(data)

if len(pairs) == 1:
data = next(iter(pairs.to_dict("index").values()))
return DEXPair.from_dict(data)
return _convert_to_dex_pair(data)

raise PairNotFoundError(base_token=base_token, quote_token=quote_token, fee_tier=fee_tier, exchange_id=exchange_id)

Expand Down Expand Up @@ -1724,3 +1736,42 @@ def expander(row: pd.Series) -> dict:
applied_df = df.apply(expander, axis='columns', result_type='expand')
df = pd.concat([df, applied_df], axis='columns')
return df


def _preprocess_loaded_pair_data(data: dict) -> dict:
"""Fix any data loading and transfomration issues we might have with the data.
Hot fix for https://github.com/tradingstrategy-ai/trading-strategy/issues/104
"""

assert isinstance(data, dict)

def _fix_val(v):
try:
if isnan(v):
return None
except:
pass
return v

result = {}
for k, v in data.items():
result[k] = _fix_val(v)

return result


def _convert_to_dex_pair(data: dict) -> DEXPair:
"""Convert trading pai0r data from dict to object.
- Correctly handle serialisation quirks
- Give user friendly error reports
"""
data = _preprocess_loaded_pair_data(data)
try:
obj = DEXPair.from_dict(data)
except Exception as e:
pretty = pprint.pformat(data)
raise DataDecodeFailed(f"Could not decode trading pair data:\n{pretty}") from e
return obj

0 comments on commit e77fe15

Please sign in to comment.