from distutils.version import LooseVersion
import pandas as pd
import ffn
# import ffn.utils as utils
from . import utils
# This is a temporary fix until pandas_datareader 0.7 is released.
# pandas 0.23 has moved is_list_like from common to api.types, hence the monkey patch
if LooseVersion(pd.__version__) > LooseVersion("0.23.0"):
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data as pdata
[docs]@utils.memoize
def get(
tickers,
provider=None,
common_dates=True,
forward_fill=False,
clean_tickers=True,
column_names=None,
ticker_field_sep=":",
mrefresh=False,
existing=None,
**kwargs
):
"""
Helper function for retrieving data as a DataFrame.
Args:
* tickers (list, string, csv string): Tickers to download.
* provider (function): Provider to use for downloading data.
By default it will be ffn.DEFAULT_PROVIDER if not provided.
* common_dates (bool): Keep common dates only? Drop na's.
* forward_fill (bool): forward fill values if missing. Only works
if common_dates is False, since common_dates will remove
all nan's, so no filling forward necessary.
* clean_tickers (bool): Should the tickers be 'cleaned' using
ffn.utils.clean_tickers? Basically remove non-standard
characters (^VIX -> vix) and standardize to lower case.
* column_names (list): List of column names if clean_tickers
is not satisfactory.
* ticker_field_sep (char): separator used to determine the
ticker and field. This is in case we want to specify
particular, non-default fields. For example, we might
want: AAPL:Low,AAPL:High,AAPL:Close. ':' is the separator.
* mrefresh (bool): Ignore memoization.
* existing (DataFrame): Existing DataFrame to append returns
to - used when we download from multiple sources
* kwargs: passed to provider
"""
if provider is None:
provider = DEFAULT_PROVIDER
tickers = utils.parse_arg(tickers)
data = {}
for ticker in tickers:
t = ticker
f = None
# check for field
bits = ticker.split(ticker_field_sep, 1)
if len(bits) == 2:
t = bits[0]
f = bits[1]
# call provider - check if supports memoization
if hasattr(provider, "mcache"):
data[ticker] = provider(ticker=t, field=f, mrefresh=mrefresh, **kwargs)
else:
data[ticker] = provider(ticker=t, field=f, **kwargs)
data[ticker] = data[ticker][~data[ticker].index.duplicated(keep="last")]
df = pd.DataFrame(data)
# ensure same order as provided
df = df[tickers]
if existing is not None:
df = ffn.merge(existing, df)
if common_dates:
df = df.dropna()
if forward_fill:
df = df.fillna(method="ffill")
if column_names:
cnames = utils.parse_arg(column_names)
if len(cnames) != len(df.columns):
raise ValueError("column_names must be of same length as tickers")
df.columns = cnames
elif clean_tickers:
df.columns = map(utils.clean_ticker, df.columns)
return df
[docs]@utils.memoize
def web(ticker, field=None, start=None, end=None, mrefresh=False, source="yahoo"):
"""
Data provider wrapper around pandas.io.data provider. Provides
memoization.
"""
if source == "yahoo" and field is None:
field = "Adj Close"
tmp = _download_web(ticker, data_source=source, start=start, end=end)
if tmp is None:
raise ValueError("failed to retrieve data for %s:%s" % (ticker, field))
if field:
return tmp[field]
else:
return tmp
@utils.memoize
def _download_web(name, **kwargs):
"""
Thin wrapper to enable memoization
"""
return pdata.DataReader(name, **kwargs)
[docs]@utils.memoize
def yf(ticker, field, start=None, end=None, mrefresh=False):
if field is None:
field = "Adj Close"
tmp = pdata.get_data_yahoo(ticker, start=start, end=end)
if tmp is None:
raise ValueError("failed to retrieve data for %s:%s" % (ticker, field))
if field:
return tmp[field]
else:
return tmp
[docs]@utils.memoize
def csv(ticker, path="data.csv", field="", mrefresh=False, **kwargs):
"""
Data provider wrapper around pandas' read_csv. Provides memoization.
"""
# set defaults if not specified
if "index_col" not in kwargs:
kwargs["index_col"] = 0
if "parse_dates" not in kwargs:
kwargs["parse_dates"] = True
# read in dataframe from csv file
df = pd.read_csv(path, **kwargs)
tf = ticker
if field != "" and field is not None:
tf = "%s:%s" % (tf, field)
# check that required column exists
if tf not in df:
raise ValueError("Ticker(field) not present in csv file!")
return df[tf]
DEFAULT_PROVIDER = yf