From 68f0ba49ef47b03dacd94b99c364db7050166f26 Mon Sep 17 00:00:00 2001 From: AndrewRPorter Date: Tue, 2 Aug 2022 07:00:37 -0400 Subject: [PATCH 1/5] Refactor and remove python 2 support --- {yahoo_historical/tests => tests}/__init__.py | 0 tests/test_fetch.py | 40 ++++++++ yahoo_historical/__init__.py | 2 +- yahoo_historical/constants.py | 6 ++ yahoo_historical/fetch.py | 94 +++++-------------- yahoo_historical/tests/test_fetch.py | 26 ----- 6 files changed, 72 insertions(+), 96 deletions(-) rename {yahoo_historical/tests => tests}/__init__.py (100%) create mode 100644 tests/test_fetch.py create mode 100644 yahoo_historical/constants.py delete mode 100644 yahoo_historical/tests/test_fetch.py diff --git a/yahoo_historical/tests/__init__.py b/tests/__init__.py similarity index 100% rename from yahoo_historical/tests/__init__.py rename to tests/__init__.py diff --git a/tests/test_fetch.py b/tests/test_fetch.py new file mode 100644 index 0000000..c6ad5cc --- /dev/null +++ b/tests/test_fetch.py @@ -0,0 +1,40 @@ +from yahoo_historical import Fetcher + +TEST_TICKER = "AAPL" + + +def test_get_no_dataframe(): + data = Fetcher("AAPL", [2007, 1, 1], [2017, 1, 1]).get_historical( + as_dataframe=False + ) + assert len(data) > 0 + + +def test_get_with_lowercase(): + data = Fetcher("aapl", [2007, 1, 1], [2017, 1, 1]).get_historical() + assert len(data) > 0 + + +def test_get_historical(): + data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_historical() + assert len(data) > 0 + + +def test_get_dividends(): + data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_dividends() + assert len(data) > 0 + + +def test_get_splits(): + data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_splits() + assert len(data) > 0 + + +def test_get_date_price(): + data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_date_price() + assert len(data) > 0 + + +def test_get_date_volume(): + data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_date_volume() + assert len(data) > 0 diff --git a/yahoo_historical/__init__.py b/yahoo_historical/__init__.py index a7dd020..fa61312 100644 --- a/yahoo_historical/__init__.py +++ b/yahoo_historical/__init__.py @@ -1 +1 @@ -from yahoo_historical.fetch import Fetcher +from .fetch import Fetcher diff --git a/yahoo_historical/constants.py b/yahoo_historical/constants.py new file mode 100644 index 0000000..1e8d2c2 --- /dev/null +++ b/yahoo_historical/constants.py @@ -0,0 +1,6 @@ +API_URL = "https://query1.finance.yahoo.com/v7/finance/download/%s?period1=%s&period2=%s&interval=%s&events=%s" +ONE_DAY_INTERVAL = "1d" +ONE_WEEK_INTERVAL = "1wk" +ONE_MONTH_INTERVAL = "1mo" + +DATE_INTERVALS = [ONE_DAY_INTERVAL, ONE_WEEK_INTERVAL, ONE_MONTH_INTERVAL] diff --git a/yahoo_historical/fetch.py b/yahoo_historical/fetch.py index be7106d..4fe83dc 100644 --- a/yahoo_historical/fetch.py +++ b/yahoo_historical/fetch.py @@ -1,21 +1,14 @@ import calendar as cal import datetime as dt import time -import warnings - import pandas as pd import requests - -try: - from io import StringIO -except ImportError: - from StringIO import StringIO +from io import StringIO +from .constants import API_URL, DATE_INTERVALS, ONE_DAY_INTERVAL class Fetcher: - api_url = "https://query1.finance.yahoo.com/v7/finance/download/%s?period1=%s&period2=%s&interval=%s&events=%s" - - def __init__(self, ticker, start, end=None, interval="1d"): + def __init__(self, ticker, start, end=None, interval=ONE_DAY_INTERVAL): """Initializes class variables and formats api_url string""" self.ticker = ticker.upper() self.interval = interval @@ -26,78 +19,41 @@ def __init__(self, ticker, start, end=None, interval="1d"): else: self.end = int(time.time()) - def _get(self, events): - if self.interval not in ["1d", "1wk", "1mo"]: - raise ValueError("Incorrect interval: valid intervals are 1d, 1wk, 1mo") + def create_url(self, events): + return API_URL % (self.ticker, self.start, self.end, self.interval, events) - url = self.api_url % (self.ticker, self.start, self.end, self.interval, events) + def _get(self, events, as_dataframe=True): + if self.interval not in DATE_INTERVALS: + raise ValueError( + f"Incorrect interval: valid intervals are {', '.join(DATE_INTERVALS)}" + ) + url = self.create_url(events) data = requests.get(url, headers={"User-agent": ""}) content = StringIO(data.content.decode("utf-8")) - return pd.read_csv(content, sep=",") - - def getData(self, events): - """Returns a list of historical data from Yahoo Finance""" - warnings.warn( - "getData has been deprecated, use get_data instead", DeprecationWarning - ) - return self._get(events) - - def getHistorical(self): - """Returns a list of historical price data from Yahoo Finance""" - warnings.warn( - "getHistorical has been deprecated, use get_historical instead", - DeprecationWarning, - ) - return self._get("history") - - def getDividends(self): - """Returns a list of historical dividends data from Yahoo Finance""" - warnings.warn( - "getDividends has been deprecated, use get_dividends instead", - DeprecationWarning, - ) - return self._get("div") - - def getSplits(self): - """Returns a list of historical splits data from Yahoo Finance""" - warnings.warn( - "getSplits has been deprecated, use get_splits instead", DeprecationWarning - ) - return self._get("split") - def getDatePrice(self): - """Returns a DataFrame for Date and Price from getHistorical()""" - warnings.warn( - "getDatePrice has been deprecated, use get_date_price instead", - DeprecationWarning, - ) - return self.getHistorical().iloc[:, [0, 4]] + dataframe = pd.read_csv(content, sep=",") + if as_dataframe: + return dataframe - def getDateVolume(self): - """Returns a DataFrame for Date and Volume from getHistorical()""" - warnings.warn( - "getDateVolume has been deprecated, use get_date_volume instead", - DeprecationWarning, - ) - return self.getHistorical().iloc[:, [0, 6]] + return dataframe.to_json() - def get_historical(self): + def get_historical(self, as_dataframe=True): """PEP8 friendly version of deprecated getHistorical function""" - return self._get("history") + return self._get("history", as_dataframe=as_dataframe) - def get_dividends(self): + def get_dividends(self, as_dataframe=True): """PEP8 friendly version of deprecated getDividends function""" - return self._get("div") + return self._get("div", as_dataframe=as_dataframe) - def get_splits(self): + def get_splits(self, as_dataframe=True): """PEP8 friendly version of deprecated getSplits function""" - return self._get("split") + return self._get("split", as_dataframe=as_dataframe) - def get_date_price(self): + def get_date_price(self, as_dataframe=True): """PEP8 friendly version of deprecated getDatePrice function""" - return self.get_historical().iloc[:, [0, 4]] + return self.get_historical(as_dataframe=as_dataframe).iloc[:, [0, 4]] - def get_date_volume(self): + def get_date_volume(self, as_dataframe=True): """PEP8 friendly version of deprecated getDateVolume function""" - return self.get_historical().iloc[:, [0, 6]] + return self.get_historical(as_dataframe=as_dataframe).iloc[:, [0, 6]] diff --git a/yahoo_historical/tests/test_fetch.py b/yahoo_historical/tests/test_fetch.py deleted file mode 100644 index df551fe..0000000 --- a/yahoo_historical/tests/test_fetch.py +++ /dev/null @@ -1,26 +0,0 @@ -from yahoo_historical import Fetcher - - -def test_get_historical(): - data = Fetcher("AAPL", [2007, 1, 1], [2017, 1, 1]).get_historical() - assert len(data) > 0 - - -def test_get_dividends(): - data = Fetcher("AAPL", [2007, 1, 1], [2017, 1, 1]).get_dividends() - assert len(data) > 0 - - -def test_get_splits(): - data = Fetcher("AAPL", [2007, 1, 1], [2017, 1, 1]).get_splits() - assert len(data) > 0 - - -def test_get_date_price(): - data = Fetcher("AAPL", [2007, 1, 1], [2017, 1, 1]).get_date_price() - assert len(data) > 0 - - -def test_get_date_volume(): - data = Fetcher("AAPL", [2007, 1, 1], [2017, 1, 1]).get_date_volume() - assert len(data) > 0 From a852771e87a7ba08c6ca36886169e5e033888ba6 Mon Sep 17 00:00:00 2001 From: AndrewRPorter Date: Tue, 2 Aug 2022 07:06:47 -0400 Subject: [PATCH 2/5] Use testing ticket in tests and add as_dataframe flag to README --- README.md | 39 ++++++++------------------------------- tests/test_fetch.py | 4 ++-- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 89e9475..e9b346c 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ Below details the available method params for creating a Fetcher object. ```python from yahoo_historical import Fetcher - data = Fetcher("AAPL", [2007,1,1], [2017,1,1]) - print(data.get_historical()) +data = Fetcher("AAPL", [2007,1,1], [2017,1,1]) +print(data.get_historical()) ``` ``` @@ -42,35 +42,12 @@ from yahoo_historical import Fetcher 1 2007-01-04 12.007143 12.278571 11.974286 11.052453 12.237143 211815100 2 2007-01-05 12.252857 12.314285 12.057143 10.973743 12.150000 208685400 3 2007-01-08 12.280000 12.361428 12.182858 11.027935 12.210000 199276700 - 4 2007-01-09 12.350000 13.282857 12.164286 11.944029 13.224286 837324600 - 5 2007-01-10 13.535714 13.971429 13.350000 12.515617 13.857142 738220000 - 6 2007-01-11 13.705714 13.825714 13.585714 12.360788 13.685715 360063200 - 7 2007-01-12 13.512857 13.580000 13.318571 12.208535 13.517143 328172600 - 8 2007-01-16 13.668571 13.892858 13.635715 12.528520 13.871428 311019100 - 9 2007-01-17 13.937143 13.942857 13.545714 12.251113 13.564285 411565000 - 10 2007-01-18 13.157143 13.158571 12.721429 11.492435 12.724286 591151400 ``` -## License +Note that you can return a dictionary instead of a DataFrame by setting the `as_dataframe` flag to `False`. -MIT License - -Copyright (c) 2017 Andrew Porter - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +```python +from yahoo_historical import Fetcher +data = Fetcher("AAPL", [2007,1,1], [2017,1,1]) +print(data.get_historical(as_dataframe=False)) +``` diff --git a/tests/test_fetch.py b/tests/test_fetch.py index c6ad5cc..57579ff 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -4,14 +4,14 @@ def test_get_no_dataframe(): - data = Fetcher("AAPL", [2007, 1, 1], [2017, 1, 1]).get_historical( + data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_historical( as_dataframe=False ) assert len(data) > 0 def test_get_with_lowercase(): - data = Fetcher("aapl", [2007, 1, 1], [2017, 1, 1]).get_historical() + data = Fetcher(TEST_TICKER.lower(), [2007, 1, 1], [2017, 1, 1]).get_historical() assert len(data) > 0 From e7805c1a969254f5e69f567d75cfb11bf6243fcb Mon Sep 17 00:00:00 2001 From: AndrewRPorter Date: Tue, 2 Aug 2022 07:28:31 -0400 Subject: [PATCH 3/5] Use unix timestamps for start and end date inputs --- README.md | 21 +++++++++++++++++---- tests/test_fetch.py | 20 +++++++++++--------- yahoo_historical/fetch.py | 29 ++++++++++++++++------------- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index e9b346c..7bb6b87 100644 --- a/README.md +++ b/README.md @@ -23,16 +23,22 @@ Below details the available method params for creating a Fetcher object. ### Arguments - ticker: The ticker symbol to download historical data for -- start: Start date in form [Year,Month,Day] +- start: Start date as Unix timestamp ### Optional Arguments -- end: End date in form [Year,Month,Day] +- end: End date as Unix timestamp - interval: Interval to fetch historical data (can be 1d, 1wk, 1mo, defaults to 1d) ```python from yahoo_historical import Fetcher -data = Fetcher("AAPL", [2007,1,1], [2017,1,1]) +import datetime +import time + +# create unix timestamp representing January 1st, 2007 +timestamp = time.mktime(datetime.datetime(2007, 1, 1).timetuple()) + +data = Fetcher("AAPL", timestamp) print(data.get_historical()) ``` @@ -48,6 +54,13 @@ Note that you can return a dictionary instead of a DataFrame by setting the `as_ ```python from yahoo_historical import Fetcher -data = Fetcher("AAPL", [2007,1,1], [2017,1,1]) + +import datetime +import time + +# create unix timestamp representing January 1st, 2007 +timestamp = time.mktime(datetime.datetime(2007, 1, 1).timetuple()) + +data = Fetcher("AAPL", timestamp) print(data.get_historical(as_dataframe=False)) ``` diff --git a/tests/test_fetch.py b/tests/test_fetch.py index 57579ff..6765259 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -1,40 +1,42 @@ from yahoo_historical import Fetcher +import datetime +import time TEST_TICKER = "AAPL" +TIME_START = time.mktime(datetime.datetime(2007, 1, 1).timetuple()) +TIME_END = time.mktime(datetime.datetime(2017, 1, 1).timetuple()) def test_get_no_dataframe(): - data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_historical( - as_dataframe=False - ) + data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_historical(as_dataframe=False) assert len(data) > 0 def test_get_with_lowercase(): - data = Fetcher(TEST_TICKER.lower(), [2007, 1, 1], [2017, 1, 1]).get_historical() + data = Fetcher(TEST_TICKER.lower(), TIME_START, TIME_END).get_historical() assert len(data) > 0 def test_get_historical(): - data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_historical() + data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_historical() assert len(data) > 0 def test_get_dividends(): - data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_dividends() + data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_dividends() assert len(data) > 0 def test_get_splits(): - data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_splits() + data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_splits() assert len(data) > 0 def test_get_date_price(): - data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_date_price() + data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_date_price() assert len(data) > 0 def test_get_date_volume(): - data = Fetcher(TEST_TICKER, [2007, 1, 1], [2017, 1, 1]).get_date_volume() + data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_date_volume() assert len(data) > 0 diff --git a/yahoo_historical/fetch.py b/yahoo_historical/fetch.py index 4fe83dc..ef14872 100644 --- a/yahoo_historical/fetch.py +++ b/yahoo_historical/fetch.py @@ -1,5 +1,3 @@ -import calendar as cal -import datetime as dt import time import pandas as pd import requests @@ -8,27 +6,32 @@ class Fetcher: - def __init__(self, ticker, start, end=None, interval=ONE_DAY_INTERVAL): - """Initializes class variables and formats api_url string""" + def __init__(self, ticker: str, start, end=time.time(), interval=ONE_DAY_INTERVAL): self.ticker = ticker.upper() self.interval = interval - self.start = int(cal.timegm(dt.datetime(*start).timetuple())) - if end is not None: - self.end = int(cal.timegm(dt.datetime(*end).timetuple())) - else: - self.end = int(time.time()) + # we convert the unix timestamps to int here to avoid sending floats to yahoo finance API + self.start = int(start) + self.end = int(end) - def create_url(self, events): - return API_URL % (self.ticker, self.start, self.end, self.interval, events) + def create_url(self, event: str) -> str: + """Generate a URL for a particular event. - def _get(self, events, as_dataframe=True): + Args: + event (str): event type to query for ('history', 'div', 'split') + + Returns: + str: formatted URL for an API call + """ + return API_URL % (self.ticker, self.start, self.end, self.interval, event) + + def _get(self, event, as_dataframe=True): if self.interval not in DATE_INTERVALS: raise ValueError( f"Incorrect interval: valid intervals are {', '.join(DATE_INTERVALS)}" ) - url = self.create_url(events) + url = self.create_url(event) data = requests.get(url, headers={"User-agent": ""}) content = StringIO(data.content.decode("utf-8")) From 33ad369042fe6d0e474171d252f06b42016f73ed Mon Sep 17 00:00:00 2001 From: AndrewRPorter Date: Tue, 2 Aug 2022 07:36:40 -0400 Subject: [PATCH 4/5] Bump major version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 31eff01..11797b5 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name="yahoo_historical", packages=["yahoo_historical"], - version="0.4.1", + version="1.0.0", description="Fetches historical EOD (end of day) prices from yahoo finance", author="Andrew Porter", author_email="porter.r.andrew@gmail.com", From 33ee82bb943cd272f7875d5d85876ab55e719acf Mon Sep 17 00:00:00 2001 From: AndrewRPorter Date: Wed, 3 Aug 2022 19:00:44 -0400 Subject: [PATCH 5/5] Add comments and remove old methods for date price and date volume --- README.md | 4 +--- tests/test_fetch.py | 10 --------- yahoo_historical/__init__.py | 2 +- yahoo_historical/fetch.py | 39 ++++++++++++++++++++++++------------ 4 files changed, 28 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 7bb6b87..8da12d3 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,6 @@ pip install --user yahoo-historical - get_historical() - get_dividends() - get_splits() -- get_date_price() -- get_date_volume() ## Example Usage @@ -27,7 +25,7 @@ Below details the available method params for creating a Fetcher object. ### Optional Arguments -- end: End date as Unix timestamp +- end: End date as Unix timestamp (defaults to `time.time()`) - interval: Interval to fetch historical data (can be 1d, 1wk, 1mo, defaults to 1d) ```python diff --git a/tests/test_fetch.py b/tests/test_fetch.py index 6765259..1f735dd 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -30,13 +30,3 @@ def test_get_dividends(): def test_get_splits(): data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_splits() assert len(data) > 0 - - -def test_get_date_price(): - data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_date_price() - assert len(data) > 0 - - -def test_get_date_volume(): - data = Fetcher(TEST_TICKER, TIME_START, TIME_END).get_date_volume() - assert len(data) > 0 diff --git a/yahoo_historical/__init__.py b/yahoo_historical/__init__.py index fa61312..75aa30d 100644 --- a/yahoo_historical/__init__.py +++ b/yahoo_historical/__init__.py @@ -1 +1 @@ -from .fetch import Fetcher +from .fetch import Fetcher # noqa diff --git a/yahoo_historical/fetch.py b/yahoo_historical/fetch.py index ef14872..65f79d4 100644 --- a/yahoo_historical/fetch.py +++ b/yahoo_historical/fetch.py @@ -1,3 +1,4 @@ +from typing import Union import time import pandas as pd import requests @@ -6,11 +7,18 @@ class Fetcher: - def __init__(self, ticker: str, start, end=time.time(), interval=ONE_DAY_INTERVAL): + def __init__( + self, + ticker: str, + start: Union[int, float], + end: Union[int, float] = time.time(), + interval: str = ONE_DAY_INTERVAL, + ): self.ticker = ticker.upper() self.interval = interval # we convert the unix timestamps to int here to avoid sending floats to yahoo finance API + # as the API will reject the call for an invalid type self.start = int(start) self.end = int(end) @@ -25,13 +33,26 @@ def create_url(self, event: str) -> str: """ return API_URL % (self.ticker, self.start, self.end, self.interval, event) - def _get(self, event, as_dataframe=True): + def _get(self, event: str, as_dataframe=True) -> Union[pd.DataFrame, dict]: + """Private helper function to build URL and make API request to grab data + + Args: + event (str): kind of data we want to query (history, div, split) + as_dataframe (bool, optional): whether or not to return data as a pandas DataFrame. Defaults to True. + + Raises: + ValueError: if invalid interval is supplied + + Returns: + Union[pd.DataFrame, dict]: data from yahoo finance API call + """ if self.interval not in DATE_INTERVALS: raise ValueError( f"Incorrect interval: valid intervals are {', '.join(DATE_INTERVALS)}" ) url = self.create_url(event) + # yahoo finance rejects our API request without an empty user agent data = requests.get(url, headers={"User-agent": ""}) content = StringIO(data.content.decode("utf-8")) @@ -42,21 +63,13 @@ def _get(self, event, as_dataframe=True): return dataframe.to_json() def get_historical(self, as_dataframe=True): - """PEP8 friendly version of deprecated getHistorical function""" + """Returns a list of historical price data from Yahoo Finance""" return self._get("history", as_dataframe=as_dataframe) def get_dividends(self, as_dataframe=True): - """PEP8 friendly version of deprecated getDividends function""" + """Returns a list of historical dividends data from Yahoo Finance""" return self._get("div", as_dataframe=as_dataframe) def get_splits(self, as_dataframe=True): - """PEP8 friendly version of deprecated getSplits function""" + """Returns a list of historical stock splits from Yahoo Finance""" return self._get("split", as_dataframe=as_dataframe) - - def get_date_price(self, as_dataframe=True): - """PEP8 friendly version of deprecated getDatePrice function""" - return self.get_historical(as_dataframe=as_dataframe).iloc[:, [0, 4]] - - def get_date_volume(self, as_dataframe=True): - """PEP8 friendly version of deprecated getDateVolume function""" - return self.get_historical(as_dataframe=as_dataframe).iloc[:, [0, 6]]