diff --git a/fecon236/boots/bootstrap.py b/fecon236/boots/bootstrap.py index de50d93..94fefb1 100644 --- a/fecon236/boots/bootstrap.py +++ b/fecon236/boots/bootstrap.py @@ -1,7 +1,6 @@ # Python Module for import Date : 2018-07-07 # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per PEP 0263 -''' -_______________| bootstrap.py :: Bootstrap module for fecon236 +"""Bootstrap module for `fecon236` - Efficient storage and rescaling of empirical data. - Normalize, but retain features of the empirical distribution. @@ -9,14 +8,20 @@ - Visualize sample price paths. - Bootstrap for small-sample statistics. - Bootstrap for determining probabilities of events. -- Specify hybrid population array using synthetic rates of return - from GM(2) Gaussian Mixture model. +- Specify hybrid population array using synthetic rates of return from GM(2) + Gaussian Mixture model. + + +Usage +----- +Two methods to efficiently pre-compute asset returns: + + - `writefile_normdiflog`: Create a CSV file of normalized rates of + return. + - Use CSV file in `csv2ret` to create "population" array of returns. -USAGE: Two methods to efficiently pre-compute asset returns: - - writefile_normdiflog(): Create a CSV file of normalized rates of return. - - Use CSV file in csv2ret() to create "population" array of returns. - Repeatedly bootstrap from population array, poparr, in computer memory - to simulate price histories by bsret2prices(). + to simulate price histories by `bsret2prices`. - See [TO BE ANNOUNCED] notebook in fecon235 for concrete usage and studies. @@ -32,31 +37,39 @@ future horizons among all competing models. -DEPENDENCIES - - fecon236.prob.sim module +Dependencies +------------ -REFERENCES -- Bradley Efron; Robert Tibshirani (1994). An Introduction to the Bootstrap. +- `fecon236.prob.sim` module -- Bootstrapping, https://en.wikipedia.org/wiki/Bootstrapping_(statistics) +References +---------- -- Function np.random.choice() used in bootstrap(), +- Bradley Efron; Robert Tibshirani (1994). An Introduction to the Bootstrap. +- Bootstrapping, https://en.wikipedia.org/wiki/Bootstrapping_(statistics) +- Function `np.random.choice` used in `bootstrap`, http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.choice.html +Notes +----- +For LATEST version, see https://git.io/fecon236 + +Change Log +---------- + +* 2018-07-07 Add `smallsample_gmr` to demo geometric mean rates. + Add `smallsample_loss` to demo probability of loss. +* 2018-07-05 Let `replace=True` as default argument. + The opposite was useful during testing. +* 2018-07-04 Add `hybrid2ret` for synthesis with Gaussian mixture. +* 2018-07-01 Apply the functions extracted to sim module. +* 2018-06-28 TOTAL REWRITE: generalization and clarification of logic flow. + Deprecate fecon235/nb/SIMU-mn0-sd1pc-d4spx_1957-2014.csv.gz + Include recipe for creating similar CSV files. +* 2018-06-08 Spin-off 2014 material from `sim.py`, but needs generalization. + Let N generally be the count:= sample size. -CHANGE LOG For LATEST version, see https://git.io/fecon236 -2018-07-07 Add smallsample_gmr() to demo geometric mean rates. - Add smallsample_loss() to demo probability of loss. -2018-07-05 Let replace=True as default argument. - The opposite was useful during testing. -2018-07-04 Add hybrid2ret() for synthesis with Gaussian mixture. -2018-07-01 Apply the functions extracted to sim module. -2018-06-28 TOTAL REWRITE: generalization and clarification of logic flow. - Deprecate fecon235/nb/SIMU-mn0-sd1pc-d4spx_1957-2014.csv.gz - Include recipe for creating similar CSV files. -2018-06-08 Spin-off 2014 material from sim.py, but needs generalization. - Let N generally be the count:= sample size. -''' +""" from __future__ import absolute_import, print_function, division @@ -82,17 +95,22 @@ def writefile_normdiflog(df, filename='tmp-fe-normdiflog.csv', lags=1): - '''Dataframe variations into CSV file: logrithmic differences as N(0,1). - PRE-COMPUTING increases speed and eliminates network download time. - Recommend gz compression of the produced CSV file. - ''' + """Dataframe variations into CSV file: logrithmic differences as N(0,1). + + Notes + ----- + + * PRE-COMPUTING increases speed and eliminates network download time. + * Recommend gz compression of the produced CSV file. + + """ dfndl = tool.normalize(tool.diflog(df, lags=lags)) tool.writefile(dfndl, filename) return def readcsv(datafile='tmp-fe-normdiflog.csv'): - '''Read CSV file.''' + """Read CSV file.""" try: if datafile.endswith('.gz'): df = readfile(datafile, compress='gzip') @@ -104,7 +122,7 @@ def readcsv(datafile='tmp-fe-normdiflog.csv'): def csv2ret(datafile, mean=SPXmean, sigma=SPXsigma, yearly=256): - '''Reform empirical N(0, 1) rates distribution as returns array.''' + """Reform empirical N(0, 1) rates distribution as returns array.""" df = readcsv(datafile) # Dataframe of normalized RATES of return. normarr = df['Y'].values # That dataframe expressed as array. # .values converts to numpy ARRAY form. @@ -117,11 +135,16 @@ def csv2ret(datafile, mean=SPXmean, sigma=SPXsigma, yearly=256): def hybrid2ret(poparr, mean=SPXmean, sigma=SPXsigma, yearly=256): - '''Concatenate synthetic GM(2) returns for DataFrame of hybrid prices. - This is a SYNTHESIS between empirical and Gaussian mixture methods. - Array poparr is assumed to be constructed from same mean and sigma. - This function is OPTIONAL, strictly outside proper bootstrapping. - ''' + """Concatenate synthetic GM(2) returns for DataFrame of hybrid prices. + + Notes + ----- + + * This is a SYNTHESIS between empirical and Gaussian mixture methods. + * Array `poparr` is assumed to be constructed from same mean and sigma. + * This function is OPTIONAL, strictly outside proper bootstrapping. + + """ poplen = poparr.shape[0] gmarr = sim.gmix2ret(poplen, mean, sigma, yearly) # gmarr has same length as poparr to maximize the uncertainty @@ -134,19 +157,23 @@ def hybrid2ret(poparr, mean=SPXmean, sigma=SPXsigma, yearly=256): def bootstrap(N, poparr, replace=True): - '''Randomly pick out N items from poparr. - Default argument, replace=True, means "WITH replacement." - ''' - # Note that replace=False is useful during testing to replicate - # the entire population if necessary (e.g. to check terminal price). - # The theory on bootstrap generally assumes replace=True. + """Randomly pick out `N` items from `poparr`. + + Notes + ----- + Default argument, `replace=True`, means "WITH replacement." + + Note that `replace=False` is useful during testing to replicate + the entire population if necessary (e.g. to check terminal price). + The theory on bootstrap generally assumes `replace=True`. + """ bsarr = np.random.choice(poparr, size=N, replace=replace) # BOOTSTRAPPED array return bsarr def bsret2prices(N, poparr, inprice=1.0, replace=True): - '''Transform array of bootstrap returns into DataFrame of prices.''' + """Transform array of bootstrap returns into DataFrame of prices.""" bsarr = bootstrap(N, poparr, replace=replace) bsprices = sim.ret2prices(bsarr, inprice=inprice) return bsprices @@ -154,7 +181,7 @@ def bsret2prices(N, poparr, inprice=1.0, replace=True): def bootshow(N, poparr, yearly=256, repeat=1, visual=True, b=SPXb, inprice=100, replace=True): - '''Statistical and optional visual SUMMARY: repeat bsret2prices().''' + """Statistical and optional visual SUMMARY: repeat `bsret2prices`.""" # Also nice template for gathering SMALL-SAMPLE statistics... # to be pursued elsewhere for different asset classes. for i in range(repeat): @@ -172,7 +199,7 @@ def bootshow(N, poparr, yearly=256, repeat=1, visual=True, b=SPXb, def smallsample_gmr(N, poparr, yearly=256, repeat=100, inprice=1.0, replace=True): - '''Demo small sample statistics: repeat geometric mean rates.''' + """Demo small sample statistics: repeat geometric mean rates.""" ssarr = np.ones((repeat,)) # small sample array to fill-in. for i in range(repeat): prices = bsret2prices(N, poparr, inprice=inprice, replace=replace) @@ -184,9 +211,10 @@ def smallsample_gmr(N, poparr, yearly=256, repeat=100, def smallsample_loss(N, poparr, yearly=256, repeat=100, level=0.90, inprice=1.0, replace=True): - '''Demo small sample statistics: probability of loss: price < level. - Relative to investment at initial price, inprice. - ''' + """Demo small sample statistics: probability of loss: price < level. + + Relative to investment at initial price, inprice. + """ ssarr = np.ones((repeat,)) # small sample array to fill-in. for i in range(repeat): prices = bsret2prices(N, poparr, inprice=inprice, replace=replace)