This introduction demonstrates how one may conduct a basic forecasting exercise with OOS. For more detail examples and documentation, please see the OOS website.
# pull and prepare data from FRED
quantmod::getSymbols.FRED(
c('UNRATE','INDPRO','GS10'),
env = globalenv())
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#> [1] "UNRATE" "INDPRO" "GS10"
Data = cbind(UNRATE, INDPRO, GS10)
Data = data.frame(Data, date = zoo::index(Data)) %>%
dplyr::filter(lubridate::year(date) >= 1990) %>%
na.omit()
# make industrial production and 10-year Treasury stationary
Data = Data %>%
dplyr::mutate(
GS10 = GS10 - dplyr::lag(GS10),
INDPRO = (INDPRO - lag(INDPRO, 12))/lag(INDPRO, 12))
# start data when all three variables are available
# (this is not necessary, but it will suppress warnings for us)
Data = dplyr::filter(Data, date >= as.Date('1954-01-01'))
# run univariate forecasts
forecast.uni =
forecast_univariate(
Data = dplyr::select(Data, date, UNRATE),
forecast.dates = tail(Data$date,5),
method = c('naive'), #,'auto.arima', 'ets'),
horizon = 1,
recursive = FALSE,
rolling.window = NA,
freq = 'month')
#> forecast_univariate.control_panel was instantiated and default values will be used for model estimation.
# create multivariate forecasts
forecast.multi =
forecast_multivariate(
Data = Data,
forecast.date = tail(Data$date,5),
target = 'UNRATE',
horizon = 1,
method = c('lasso'),
rolling.window = NA,
freq = 'month')
#> forecast_multivariate.ml.control_panel was instantiated and default values will be used for ML model estimation.
#> forecast_multivariate.var.control_panel was instantiated and default values will be used for VAR model estimation.
#> Loading required package: ggplot2
#> Loading required package: lattice
#>
#> Attaching package: 'caret'
#> The following object is masked from 'package:purrr':
#>
#> lift
# combine forecasts and add in observed values
forecasts =
dplyr::bind_rows(
forecast.uni,
forecast.multi) %>%
dplyr::left_join(
dplyr::select(Data, date, observed = UNRATE),
by = 'date')
# forecast combinations
forecast.combo =
forecast_combine(
forecasts,
method = c('uniform','median','trimmed.mean'))
# merge forecast combinations back into forecasts
forecasts =
forecasts %>%
dplyr::bind_rows(forecast.combo)
# calculate forecast errors
forecast.error = forecast_accuracy(forecasts)
# view forecast errors from least to greatest
# (best forecast to worst forecast method)
forecast.error %>%
dplyr::mutate_at(vars(-model), round, 3) %>%
dplyr::arrange(MSE)
#> # A tibble: 5 × 5
#> model MSE RMSE MAE MAPE
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 naive 0.018 0.132 0.125 0.03
#> 2 median.combo 0.599 0.774 0.768 0.184
#> 3 trimmed.mean.combo 0.599 0.774 0.768 0.184
#> 4 uniform.combo 0.599 0.774 0.768 0.184
#> 5 lasso 2.44 1.56 1.56 0.375
# compare forecasts to the baseline (a random walk)
forecast_comparison(
forecasts,
baseline.forecast = 'naive',
test = 'ER',
loss = 'MSE') %>%
dplyr::arrange(error.ratio)
#> model error.ratio
#> 1 naive 1.00000
#> 2 median.combo 34.21083
#> 3 trimmed.mean.combo 34.21083
#> 4 uniform.combo 34.21083
#> 5 lasso 139.70836
# chart forecasts
chart =
chart_forecast(
forecasts,
Title = 'US Unemployment Rate',
Ylab = 'Index',
Freq = 'Monthly')