<!DOCTYPE html>

Time Series Project

Libraries

library(tidyverse)
library(tswge)
library(vars)
library(lubridate)
library(vars)
library(nnfor)
library(caret)

Load Data

# Variable of Interest - Quarterly from 1/1/63
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSPUS.csv"
mhp <- read.csv(file_path, header = TRUE)

# Home ownership rate - Quarterly from 1/1/65
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/RHORUSQ156N.csv"
hor <- read.csv(file_path, header = TRUE)

# Housing units completed - Monthly from 1/1/1968
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/COMPUTSA.csv"
huc <- read.csv(file_path, header = TRUE)

# Supply of new houses - Monthly from 1/1/1963
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSACSR.csv"
snh <- read.csv(file_path, header = TRUE)

# House price index - Quarterly from 1/1/1975
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/USSTHPI.csv"
hpi <- read.csv(file_path, header = TRUE)

# Converting Monthly Data to Quarterly Data

# Preserve Monthly format
snh_monthly <- snh
huc_monthly <- huc

# Supply of New Houses Variable 
snh$DATE = as.Date(snh$DATE)
snh$month <- month(snh$DATE)
head(snh)

##         DATE MSACSR month
## 1 1963-01-01    4.7     1
## 2 1963-02-01    6.6     2
## 3 1963-03-01    6.4     3
## 4 1963-04-01    5.3     4
## 5 1963-05-01    5.1     5
## 6 1963-06-01    6.0     6

snh_quarterly <- snh %>%
  filter(snh$month == 1 | snh$month == 4 | snh$month == 7 | snh$month == 10)
summary(snh_quarterly)

##       DATE                MSACSR           month       
##  Min.   :1963-01-01   Min.   : 3.300   Min.   : 1.000  
##  1st Qu.:1978-05-16   1st Qu.: 4.900   1st Qu.: 2.500  
##  Median :1993-10-01   Median : 5.800   Median : 4.000  
##  Mean   :1993-09-30   Mean   : 6.117   Mean   : 5.482  
##  3rd Qu.:2009-02-15   3rd Qu.: 6.950   3rd Qu.: 7.000  
##  Max.   :2024-07-01   Max.   :12.200   Max.   :10.000

# Housing Units Completed Variable
huc$DATE = as.Date(huc$DATE)
huc$month <- month(huc$DATE)
head(huc)

##         DATE COMPUTSA month
## 1 1968-01-01     1257     1
## 2 1968-02-01     1174     2
## 3 1968-03-01     1323     3
## 4 1968-04-01     1328     4
## 5 1968-05-01     1367     5
## 6 1968-06-01     1184     6

huc_quarterly <- huc %>%
  filter(huc$month == 1 | huc$month == 4 | huc$month == 7 | huc$month == 10)
summary(huc_quarterly)

##       DATE               COMPUTSA        month      
##  Min.   :1968-01-01   Min.   : 520   Min.   : 1.00  
##  1st Qu.:1982-02-15   1st Qu.:1196   1st Qu.: 2.50  
##  Median :1996-04-01   Median :1389   Median : 4.00  
##  Mean   :1996-03-31   Mean   :1394   Mean   : 5.48  
##  3rd Qu.:2010-05-16   3rd Qu.:1632   3rd Qu.: 7.00  
##  Max.   :2024-07-01   Max.   :2195   Max.   :10.00

# Using same time frames, which would be starting at 1975 Q1 and ending at 2024 Q2 (due to hpi data)

# hor observation 41 is 1975 Q1
hor_1975 = hor[41:238,]
hor_1975$DATE <- as.Date(hor_1975$DATE)
summary(hor_1975)

##       DATE             RHORUSQ156N   
##  Min.   :1975-01-01   Min.   :62.90  
##  1st Qu.:1987-04-23   1st Qu.:64.20  
##  Median :1999-08-16   Median :65.15  
##  Mean   :1999-08-16   Mean   :65.53  
##  3rd Qu.:2011-12-09   3rd Qu.:66.58  
##  Max.   :2024-04-01   Max.   :69.20

# huc_quarterlly observation 29
huc_1975 = huc_quarterly[29:226,]
summary(huc_1975)

##       DATE               COMPUTSA        month      
##  Min.   :1975-01-01   Min.   : 520   Min.   : 1.00  
##  1st Qu.:1987-04-23   1st Qu.:1136   1st Qu.: 1.75  
##  Median :1999-08-16   Median :1378   Median : 4.00  
##  Mean   :1999-08-16   Mean   :1355   Mean   : 5.47  
##  3rd Qu.:2011-12-09   3rd Qu.:1599   3rd Qu.: 7.00  
##  Max.   :2024-04-01   Max.   :2071   Max.   :10.00

# mhp observation 49 is 1975 Q1
mhp_1975 = mhp[49:246,]
mhp_1975$DATE <- as.Date(mhp_1975$DATE)
summary(mhp_1975)

##       DATE                MSPUS       
##  Min.   :1975-01-01   Min.   : 38100  
##  1st Qu.:1987-04-23   1st Qu.:104050  
##  Median :1999-08-16   Median :161150  
##  Mean   :1999-08-16   Mean   :185330  
##  3rd Qu.:2011-12-09   3rd Qu.:247350  
##  Max.   :2024-04-01   Max.   :442600

# snh_quarterly observation 49 is 1975 Q1
snh_1975 = snh_quarterly[49:246,]
summary(snh_1975)

##       DATE                MSACSR           month      
##  Min.   :1975-01-01   Min.   : 3.300   Min.   : 1.00  
##  1st Qu.:1987-04-23   1st Qu.: 4.900   1st Qu.: 1.75  
##  Median :1999-08-16   Median : 6.000   Median : 4.00  
##  Mean   :1999-08-16   Mean   : 6.167   Mean   : 5.47  
##  3rd Qu.:2011-12-09   3rd Qu.: 7.075   3rd Qu.: 7.00  
##  Max.   :2024-04-01   Max.   :12.200   Max.   :10.00

# Housing Price Index variable already from 1975 Q1 - 2024 Q2
hpi$DATE <- as.Date(hpi$DATE)
summary(hpi)

##       DATE               USSTHPI      
##  Min.   :1975-01-01   Min.   : 60.03  
##  1st Qu.:1987-04-23   1st Qu.:145.09  
##  Median :1999-08-16   Median :224.69  
##  Mean   :1999-08-16   Mean   :260.08  
##  3rd Qu.:2011-12-09   3rd Qu.:351.31  
##  Max.   :2024-04-01   Max.   :682.18

# Create Dataframe
# Combined (Train/Test) Set NOT LOGGED
fed_housing_data_NL = data.frame(Year_Quarter = mhp_1975$DATE, Ownership_Rate = hor_1975$RHORUSQ156N, Housing_Units_Completed = huc_1975$COMPUTSA, Supply_New_Houses = snh_1975$MSACSR, Housing_Price_Index = hpi$USSTHPI, Median_Sales_Price = mhp_1975$MSPUS)

# Combined (Train/Test) Set LOGGED
fed_housing_data = data.frame(Year_Quarter = as.Date(mhp_1975$DATE), Ownership_Rate = hor_1975$RHORUSQ156N, Housing_Units_Completed = huc_1975$COMPUTSA, Supply_New_Houses = snh_1975$MSACSR, Housing_Price_Index = hpi$USSTHPI, Median_Sales_Price = log(mhp_1975$MSPUS))

# Train & Test sets
train = fed_housing_data[1:168,]
test = fed_housing_data[169:198,]

summary(fed_housing_data)

##   Year_Quarter        Ownership_Rate  Housing_Units_Completed Supply_New_Houses
##  Min.   :1975-01-01   Min.   :62.90   Min.   : 520            Min.   : 3.300   
##  1st Qu.:1987-04-23   1st Qu.:64.20   1st Qu.:1136            1st Qu.: 4.900   
##  Median :1999-08-16   Median :65.15   Median :1378            Median : 6.000   
##  Mean   :1999-08-16   Mean   :65.53   Mean   :1355            Mean   : 6.167   
##  3rd Qu.:2011-12-09   3rd Qu.:66.58   3rd Qu.:1599            3rd Qu.: 7.075   
##  Max.   :2024-04-01   Max.   :69.20   Max.   :2071            Max.   :12.200   
##  Housing_Price_Index Median_Sales_Price
##  Min.   : 60.03      Min.   :10.55     
##  1st Qu.:145.09      1st Qu.:11.55     
##  Median :224.69      Median :11.99     
##  Mean   :260.08      Mean   :11.95     
##  3rd Qu.:351.31      3rd Qu.:12.42     
##  Max.   :682.18      Max.   :13.00

# plotting variables
xmin_plot = 150
ymin_plot = 12.3
ymax_plot = 13.1
ymax_future = 13.4

EDA

Median Housing Sale Price

Our variable of interest is the Median Housing Sale Price. This was quarterly data starting in 1965.

mhp=mhp$MSPUS
plot(ts(mhp/1000, frequency=4,start=c(1965,1)),xlab='Year',ylab='Housing price (in thousands of dollars)')
title(main='Median US Housing Price from 1965')

acf(mhp,ylab='',main='ACF')

x = parzen.wge(mhp)

The realization looks like it is increasing linearly. This data could be model with a signal plus noise, with a linear signal.

The realization appears to be non stationary. There is evidence that the data is increasing over time. In addition, the variation appears much higher in later years as in earlier years, showing that variance is increasing over time as well. When modeling, it may be useful to take the logarithm of the data before modeling.

The ACF shows very slowly dampening autocorrelations. The parzen window also shows a very low frequency. This could points to a (1-B) term which could be removed with a high-pass difference filter.

To fix the problem with the increasing variance. We decided to take the log of the Mean Housing Price.

plot(ts(log(mhp), frequency=4,start=c(1965,1)),xlab='Year',ylab='Log Housing price')
title(main='Log Median US Housing Price from 1965')

Home Ownership Rate

We used 4 exogenous variables. The first was home ownership rate. Like the variable of interest, this was quarterly data starting from 1965.

hor <- hor$RHORUSQ156N
plot(ts(hor, frequency=4,start=c(1965,1)),xlab='Year',ylab='Percentage')
title(main='Home Ownership Rate from 1965')

acf(hor,ylab='',main='ACF')

x = parzen.wge(hor)

Unlike the median house price, the home ownership rate appears to be stationary. There isn’t a linear trend to the data, and the variance seems to be constant over time.

The ACF does show that there are dampening autocorrelations, but not quite at the rate as for median house price. This could point to an ARMA model being appropriate for the home ownership rate.

Similar to median house price, the Parzen Window shows that 0 is a prominent frequency.

Housing Units Completed

The next exogenous variable is Housing Units Completed. This was a monthly variable, starting from 1968.

huc <- huc$COMPUTSA
plot(ts(huc, frequency=12,start=c(1968,1)),xlab='Year',ylab='')
title(main='Housing Units Completed from 1968')

acf(huc,ylab='',main='ACF')

x = parzen.wge(huc)

This has very similar characteristics to home ownership rate. It’s autocorrelations dampen a little more slowly, but the ACF and Parzen Window look very similar. This variable likewise shows evidence for being stationary.

Supply of New Houses

snh <- snh$MSACSR
plot(ts(snh, frequency=12,start=c(1963,1)),xlab='Year',ylab='')
title(main='Supply of New Houses from 1963')

acf(snh,ylab='',main='ACF')

x = parzen.wge(snh)

Similar to home ownership rate as well, but with even more quickly dampening autocorrelations. This variable likewise shows evidence for being stationary.

House Price Index

The next exogenous variable is House Price Index. This was a quarterly variable, starting from 1975.

hpi <- hpi$USSTHPI
plot(ts(hpi, frequency=4,start=c(1975,1)),xlab='Year',ylab='')
title(main='House Price Index from 1975')

acf(hpi,ylab='',main='ACF')

x = parzen.wge(hpi)

The realization has a similar upward trend to the Housing Sale Price. It seems very smooth though, very little variance. Because of the upward trend though, it shows evidence against stationarity.

The ACF and Parzen Window are also similar to those of Housing Sale Price.

Not all the data matches up by date range or by frequency of observation (monthly vs quarterly). To make everything consistent, we will only use quarterly data going from Q1 1975 to Q2 2024.

AR(I)MA Model Investigation

First ARIMA Model

The first model we looked at was an ARIMA model. From the EDA, we saw that because of the increasing variance, we should look at the log of the Housing Sales Price. And since the Parzen Window had a prominent peak at 0 and very slowly dampening autocorrelations, we should take the first difference.

log.mhp = fed_housing_data$Median_Sales_Price
mhp = exp(log.mhp)
d.log.mhp = artrans.wge(log.mhp,1)

The difference looks more like white noise without an obvious pattern to the ACF. Next we will try modeling the differenced data as an ARMA model.

aic5.wge(d.log.mhp,p=0:6,q=0:2,type='aic')

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
##     p    q        aic
##     1    2  -7.188559
##     2    2  -7.180263
##     5    2  -7.174131
##     3    2  -7.170135
##     6    2  -7.167230

aic5.wge(d.log.mhp,p=0:6,q=0:2,type='bic')

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  bic 
##     p    q        bic
##     1    2  -7.121895
##     2    2  -7.096933
##     0    2  -7.092570
##     3    0  -7.087997
##     1    0  -7.087827

Both the AIC and BIC selection choose p = 1 and q = 2.

We will evaluate our models with a short term horizon of 1 year (4 quarters) and a long term horizon of 5 years (20 quarters). We calcluated a Rolling Window RMSE for both of those horizons (using the logged data) and calculated an ASE for both of those horizons as well (taking the exponential to get back to the original data).

h.short = 4
h.long = 20
l = length(mhp)
est = est.arma.wge(d.log.mhp,p=1,q=2)

##   
##   
## Coefficients of AR polynomial:  
## 0.6901 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.6901B              1.4490               0.6901       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.9720 -0.4007 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9720B+0.4007B^2    1.2128+-1.0122i      0.6330       0.1107
##   
##

f = fore.arima.wge(log.mhp,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254

ase = mean((mhp[(l-h.short+1):l]-exp(f$f))^2)/1e6
ase # 84.37902

## [1] 84.37902

f = fore.arima.wge(log.mhp,d=1,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254

ase = mean((mhp[(l-h.long+1):l]-exp(f$f))^2)/1e6
ase # 7091.032

## [1] 7091.032

r = roll.win.rmse.wge(log.mhp,h.short,d=1,phi=est$phi,theta=est$theta) # 0.036501

r$rwRMSE

## [1] 0.03650146

r = roll.win.rmse.wge(log.mhp,h.long,d=1,phi=est$phi,theta=est$theta) # 0.13269

r$rwRMSE

## [1] 0.1326872

For the ARIMA(1,1,2) model we get a short term Rolling Window RMSE of 0.037, a long term Rolling Window RMSE of 0.133, a short term ASE of 84.4 million and a long term ASE of 7.09 billion.

ARMA Model

As a comparison, we wanted to look a simpler ARMA model.

aic5.wge(d.log.mhp,p=0:6,q=0:4,type='aic')

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
##     p    q        aic
##     1    2  -7.188559
##     2    2  -7.180263
##     5    2  -7.174131
##     3    2  -7.170135
##     3    4  -7.168648

aic5.wge(d.log.mhp,p=0:6,q=0:4,type='bic')

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  bic 
##     p    q        bic
##     1    2  -7.121895
##     0    3  -7.098442
##     2    2  -7.096933
##     0    2  -7.092570
##     3    0  -7.087997

Both the AIC and BIC selection choose p = 2 and q = 2.

est = est.arma.wge(log.mhp,p=2,q=2)

##   
##   
## Coefficients of AR polynomial:  
## 1.8974 -0.8977 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9972B              1.0028               0.9972       0.0000
## 1-0.9002B              1.1109               0.9002       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 1.1064 -0.4030 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.1064B+0.4030B^2    1.3725+-0.7728i      0.6349       0.0816
##   
##

f = fore.arma.wge(log.mhp,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

ase = mean((mhp[(l-h.short+1):l]-exp(f$f))^2)/1e6
ase # 112.4314

## [1] 112.4314

f = fore.arma.wge(log.mhp,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

ase = mean((mhp[(l-h.long+1):l]-exp(f$f))^2)/1e6
ase # 9167.59

## [1] 9167.59

r = roll.win.rmse.wge(log.mhp,h.short,phi=est$phi,theta=est$theta) # 0.044488

r$rwRMSE

## [1] 0.04448757

r = roll.win.rmse.wge(log.mhp,h.long,phi=est$phi,theta=est$theta) # 0.12779

r$rwRMSE

## [1] 0.1277888

For the ARMA(2,2) model we get a short term Rolling Window RMSE of 0.044, a long term Rolling Window RMSE of 0.128, a short term ASE of 112.4 million and a long term ASE of 9.17 billion. Even though this model has a better long term Rolling Window RMSE than the ARIMA model, it did worse in the other metrics.

ARIMA(p,2,q) model

Looking at the overfit factor table, we can see two prominent 0 frequency roots.

est = est.ar.wge(log.mhp,p=12)

##   
##   
## Coefficients of AR polynomial:  
## 0.7873 0.3941 0.1201 -0.0790 -0.0881 -0.1315 -0.0324 0.0971 0.0282 -0.1868 0.0902 0.0005 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9992B              1.0008               0.9992       0.0000
## 1-0.9128B              1.0955               0.9128       0.0000
## 1+1.0481B+0.7252B^2   -0.7226+-0.9256i      0.8516       0.3555
## 1-0.0214B+0.7099B^2    0.0151+-1.1868i      0.8426       0.2480
## 1+1.5907B+0.6978B^2   -1.1398+-0.3660i      0.8353       0.4505
## 1-0.9244B+0.4855B^2    0.9519+-1.0740i      0.6968       0.1346
## 1-0.5740B              1.7420               0.5740       0.0000
## 1+0.0058B             -173.5007               0.0058       0.5000
##   
##

factor.wge(phi=est$phi)

##   
##   
## Coefficients of AR polynomial:  
## 0.7873 0.3941 0.1201 -0.0790 -0.0881 -0.1315 -0.0324 0.0971 0.0282 -0.1868 0.0902 0.0005 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9992B              1.0008               0.9992       0.0000
## 1-0.9128B              1.0955               0.9128       0.0000
## 1+1.0481B+0.7252B^2   -0.7226+-0.9256i      0.8516       0.3555
## 1-0.0214B+0.7099B^2    0.0151+-1.1868i      0.8426       0.2480
## 1+1.5907B+0.6978B^2   -1.1398+-0.3660i      0.8353       0.4505
## 1-0.9244B+0.4855B^2    0.9519+-1.0740i      0.6968       0.1346
## 1-0.5740B              1.7420               0.5740       0.0000
## 1+0.0058B             -173.5007               0.0058       0.5000
##   
##

Even though the differenced data didn’t have slowly dampening autocorrelations and looked liked white noise, the original data had a pretty clear upward trend. We thought that this warranted investigation into an ARIMA(p,2,q) model.

d2.log.mhp = artrans.wge(d.log.mhp,1)

aic5.wge(d.log.mhp,p=0:4,q=0:2,type='aic')

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
##     p    q        aic
##     1    2  -7.188559
##     2    2  -7.180263
##     3    2  -7.170135
##     3    1  -7.166652
##     4    0  -7.164881

aic5.wge(d.log.mhp,p=0:4,q=0:2,type='bic')

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  bic 
##     p    q        bic
##     1    2  -7.121895
##     2    2  -7.096933
##     0    2  -7.092570
##     3    0  -7.087997
##     1    0  -7.087827

Both the AIC and BIC selection choose p = 1 and q = 1.

est = est.arma.wge(d2.log.mhp,p=1,q=1)

##   
##   
## Coefficients of AR polynomial:  
## -0.3365 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+0.3365B             -2.9722               0.3365       0.5000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.8508 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.8508B              1.1753               0.8508       0.0000
##   
##

f = fore.arima.wge(log.mhp,d=2,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma -0.02848876 0.06515941 -0.02191816 -0.00591316 -0.02767201 0.01995818 -0.007043221 0.0372058 -0.05668252 0.05783844 -0.02902112 0.01571075 -0.02811809 0.03603873 -0.02364418 0.01366843 -0.01538544 -0.05803636 0.05041521 -0.01272076 0.00926602 0.008884892 -0.01684341 0.03217776 -0.04106979 0.02007841 -0.07568861 0.1055637 -0.05138717 0.03696983 -0.009184633 -0.001872253 0.01123961 -0.05240299 0.04942306 0.001615965 -0.02775835 -0.01738388 0.04932551 -0.01769841 -0.03108832 0.05549379 -0.02862908 0.03180794 -0.03581358 0.01155285 0.008792259 0.02458875 -0.02982428 0.02575136 -0.06412972 0.01354423 0.04445176 -0.05406302 0.04497501 -0.02776557 0.00161076 0.03001177 -0.04645838 0.03037392 -0.1035734 0.1181774 -0.05016285 0.01158884 0.001667361 -0.0008336807 -0.004175371 0.008350743 -0.004175371 0.04879016 -0.05675833 0.02384152 -0.01587335 0 0.02334736 -0.02334736 -0.002310359 0.01988819 -0.0328453 0.04482627 -0.04385013 0.05874309 -0.05172452 0.02821972 -0.02023241 0.02815054 -0.02263884 -0.0007241623 -0.01100415 -3.044042e-05 0.05952674 -0.07189327 0.04104058 -0.02314153 0.02835241 -0.02012712 -0.005707984 0.03571176 -0.03822907 -0.01278556 0.0465237 -0.009739329 -0.04209093 0.07085665 -0.0897531 0.02883951 0.1060593 -0.1058912 -0.04185149 0.1150374 -0.08700863 0.05251006 -0.03018525 0.03480365 0.03225854 -0.04480764 -0.04179748 0.08823311 -0.05316946 -0.01089395 0.006339027 0.01851521 -0.01331145 -0.02235883 -0.03874682 0.08516893 0.006987682 -0.1086092 0.05921457 -0.01250811 -0.004895259 0.02502388 -0.04408397 0.02029851 -0.04764995 0.1237186 -0.08858401 0.05202804 -0.004043317 -0.03302252 0.03611116 -0.01984809 0.01063289 -0.006250213 -0.02564745 0.009576389 0.08613117 -0.07407725 0.04018423 -0.02985329 0.01468233 0.0105804 -0.04923649 0.04507758 -0.02686144 0.03963145 -0.07006818 0.08636021 -0.09474498 0.03264474 0.02325674 -0.0005131741 -0.03136342 0.02943521 -0.02768503 0.03031724 -0.01605042 0.009106176 -0.008955327 0.04566556 -0.07108533 -0.03183917 0.09739741 -0.0721239 -0.006046467 0.06072969 -0.04269463 0.03975216 -0.02116566 -0.04263239 0.07033207 -0.001380716 0.01518753 -0.0118768 0.03643113 -0.02537865 -0.04768247 0.05808469 -0.05619106 0.009762351 -0.04165703 0.00642948 0.06436829 -0.06800853 0.03689091 -0.03771318

ase = mean((mhp[(l-h.short+1):l]-exp(f$f))^2)/1e6
ase # 162.1383

## [1] 162.1383

f = fore.arima.wge(log.mhp,d=2,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma -0.02848876 0.06515941 -0.02191816 -0.00591316 -0.02767201 0.01995818 -0.007043221 0.0372058 -0.05668252 0.05783844 -0.02902112 0.01571075 -0.02811809 0.03603873 -0.02364418 0.01366843 -0.01538544 -0.05803636 0.05041521 -0.01272076 0.00926602 0.008884892 -0.01684341 0.03217776 -0.04106979 0.02007841 -0.07568861 0.1055637 -0.05138717 0.03696983 -0.009184633 -0.001872253 0.01123961 -0.05240299 0.04942306 0.001615965 -0.02775835 -0.01738388 0.04932551 -0.01769841 -0.03108832 0.05549379 -0.02862908 0.03180794 -0.03581358 0.01155285 0.008792259 0.02458875 -0.02982428 0.02575136 -0.06412972 0.01354423 0.04445176 -0.05406302 0.04497501 -0.02776557 0.00161076 0.03001177 -0.04645838 0.03037392 -0.1035734 0.1181774 -0.05016285 0.01158884 0.001667361 -0.0008336807 -0.004175371 0.008350743 -0.004175371 0.04879016 -0.05675833 0.02384152 -0.01587335 0 0.02334736 -0.02334736 -0.002310359 0.01988819 -0.0328453 0.04482627 -0.04385013 0.05874309 -0.05172452 0.02821972 -0.02023241 0.02815054 -0.02263884 -0.0007241623 -0.01100415 -3.044042e-05 0.05952674 -0.07189327 0.04104058 -0.02314153 0.02835241 -0.02012712 -0.005707984 0.03571176 -0.03822907 -0.01278556 0.0465237 -0.009739329 -0.04209093 0.07085665 -0.0897531 0.02883951 0.1060593 -0.1058912 -0.04185149 0.1150374 -0.08700863 0.05251006 -0.03018525 0.03480365 0.03225854 -0.04480764 -0.04179748 0.08823311 -0.05316946 -0.01089395 0.006339027 0.01851521 -0.01331145 -0.02235883 -0.03874682 0.08516893 0.006987682 -0.1086092 0.05921457 -0.01250811 -0.004895259 0.02502388 -0.04408397 0.02029851 -0.04764995 0.1237186 -0.08858401 0.05202804 -0.004043317 -0.03302252 0.03611116 -0.01984809 0.01063289 -0.006250213 -0.02564745 0.009576389 0.08613117 -0.07407725 0.04018423 -0.02985329 0.01468233 0.0105804 -0.04923649 0.04507758 -0.02686144 0.03963145 -0.07006818 0.08636021 -0.09474498 0.03264474 0.02325674 -0.0005131741 -0.03136342 0.02943521 -0.02768503 0.03031724 -0.01605042 0.009106176 -0.008955327 0.04566556 -0.07108533 -0.03183917 0.09739741 -0.0721239 -0.006046467 0.06072969 -0.04269463 0.03975216 -0.02116566 -0.04263239 0.07033207 -0.001380716 0.01518753 -0.0118768 0.03643113 -0.02537865 -0.04768247 0.05808469 -0.05619106 0.009762351 -0.04165703 0.00642948 0.06436829 -0.06800853 0.03689091 -0.03771318

ase = mean((mhp[(l-h.long+1):l]-exp(f$f))^2)/1e6
ase # 6263.556

## [1] 6263.556

r = roll.win.rmse.wge(log.mhp,h.short,d=2,phi=est$phi,theta=est$theta) # 0.049161

r$rwRMSE

## [1] 0.0491605

r = roll.win.rmse.wge(log.mhp,h.long,d=2,phi=est$phi,theta=est$theta) # 0.1809277

r$rwRMSE

## [1] 0.1809277

For the ARIMA(1,2,1) model we get a short term Rolling Window RMSE of 0.049, a long term Rolling Window RMSE of 0.181, a short term ASE of 162.4 million and a long term ASE of 6.26 billion. Even though this model had the best long term ASE so far, it did worse in the other metrics.

Comparing all three ARMA/ARIMA models, we will choose the ARIMA(1,1,2). Here are what the short and long term forecasts look like zoomed in.

h.short = 4
h.long = 20
x = fed_housing_data$Median_Sales_Price
l = length(x)
d = artrans.wge(x,1)

dev.off()

## null device 
##           1

est = est.arma.wge(d,p=1,q=2)

##   
##   
## Coefficients of AR polynomial:  
## 0.6901 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.6901B              1.4490               0.6901       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.9720 -0.4007 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9720B+0.4007B^2    1.2128+-1.0122i      0.6330       0.1107
##   
##

f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE,plot=FALSE)

## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254

plot(seq(xmin_plot,l,1),x[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="ARIMA(1,1,2) Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.short+1),l,1),f$f,col="red")
lines(seq((l-h.short+1),l,1),f$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l-h.short+1),l,1),f$ul,col="blue",lty=3)
f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE,plot=FALSE)

## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254

plot(seq(xmin_plot,l,1),x[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="ARIMA(1,1,2) Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.long+1),l,1),f$f,col="red")
lines(seq((l-h.long+1),l,1),f$ll,col="blue",lty=3)
lines(seq((l-h.long+1),l,1),f$ul,col="blue",lty=3)

est = est.arma.wge(d.log.mhp,p=1,q=2)

##   
##   
## Coefficients of AR polynomial:  
## 0.6901 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.6901B              1.4490               0.6901       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.9720 -0.4007 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9720B+0.4007B^2    1.2128+-1.0122i      0.6330       0.1107
##   
##

f = fore.arima.wge(log.mhp,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE,plot=FALSE)

## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254

aic = est$aic
aic # -7.188559

## [1] -7.188559

resid = f$resid
xbar=est$xbar
xbar # 0.01211603

## [1] 0.01211603

vara = est$avar
vara # 0.0007251235

## [1] 0.0007251235

est$phi # 0.6901326

## [1] 0.6901326

est$theta # 0.9720137 -0.4007360

## [1]  0.9720137 -0.4007360

The model (for the log data) can be written as: (1-B)(1-0.69B)(x_t-0.012) = (1-0.972B+0.401B^2)a_t $\sigma$_t^2 = 0.001

Now we will look at residuals to make sure that they are white noise.

plotts.wge(resid)
title(main="Residuals")

acf(resid,lag.max=100,main="ACF of Residuals")

parzen.wge(resid)

## $freq
##  [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
##  [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
## 
## $pzgram
##  [1] -0.797004813 -0.607243200 -0.344198158 -0.063232781  0.188382589
##  [6]  0.378720094  0.491494531  0.523513299  0.481787408  0.381099823
## [11]  0.241868424  0.087774592 -0.057313938 -0.173121432 -0.246654172
## [16] -0.274416037 -0.262023406 -0.221275590 -0.165987055 -0.108279277
## [21] -0.056499886 -0.015078241  0.014084469  0.029552351  0.028689360
## [26]  0.007329816 -0.038795564 -0.111192859 -0.205284085 -0.308113669
## [31] -0.398868158 -0.453706774 -0.454596339 -0.398504316 -0.301238090
## [36] -0.192849981 -0.107095065 -0.070124261 -0.091740248 -0.159921479
## [41] -0.239308598 -0.276760569 -0.217958210 -0.032276536  0.269132798
## [46]  0.637082772  1.005808966  1.314459108  1.517957072  1.588609565
## [51]  1.513852867  1.293927215  0.941005071  0.479917398 -0.050304363
## [56] -0.595941123 -1.092179747 -1.473475908 -1.689279121 -1.717433678
## [61] -1.566927949 -1.270016865 -0.871193575 -0.418870627  0.039687761
## [66]  0.461726243  0.811445715  1.061599835  1.194392265  1.202139668
## [71]  1.088221025  0.868516713  0.572791063  0.244252201 -0.065594414
## [76] -0.308704934 -0.455642963 -0.504776005 -0.478004996 -0.406270007
## [81] -0.315364003 -0.219440713 -0.122757896 -0.026233531  0.065593538
## [86]  0.140919811  0.181970578  0.170235493  0.093240274 -0.049266474
## [91] -0.241155492 -0.449545009 -0.630933666 -0.745400980 -0.774350119
## [96] -0.730371530 -0.650938653 -0.580545801 -0.552964472

They appear to be white noise, and the ACF has very few values above 0.2. We will run a Ljung-Box test to be sure.

ljung.wge(resid,K=24,p=1,q=2) # p = 0.2272667, white

## Obs 0.00435293 -0.0244108 0.01796347 0.005240047 0.02127357 -0.03888973 -0.03903064 0.05468715 0.02530515 -0.142211 0.02934427 0.04507912 -0.1378715 -0.04501699 0.006830995 0.0376424 -0.03877854 -0.09888204 -0.07092551 0.1855184 -0.08752274 0.009655451 -0.06740214 -0.01970965

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 25.47283
## 
## $df
## [1] 21
## 
## $pval
## [1] 0.2272667

ljung.wge(resid,K=48,p=1,q=2) # p = 0.5584003, white

## Obs 0.00435293 -0.0244108 0.01796347 0.005240047 0.02127357 -0.03888973 -0.03903064 0.05468715 0.02530515 -0.142211 0.02934427 0.04507912 -0.1378715 -0.04501699 0.006830995 0.0376424 -0.03877854 -0.09888204 -0.07092551 0.1855184 -0.08752274 0.009655451 -0.06740214 -0.01970965 0.0001776949 -0.00201369 -0.02954499 0.006651707 0.08148863 -0.1088068 -0.02321975 0.1202522 -0.05252142 0.1307315 0.03517719 0.05725037 0.03895821 -0.02151136 -0.04091389 0.08019348 -0.01208333 -0.0003453521 0.006175633 -0.01274082 0.02231108 0.05047774 0.009971913 0.01279219

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 42.96777
## 
## $df
## [1] 45
## 
## $pval
## [1] 0.5584003

The test gives more evidence that the residuals are white noise. We can also check if they are normally distributed with a histogram.

hist(resid)

We also want to make sure that the ACFs and Parzen Windows generated from ARIMA(1,2,2) models look similar to that of our original data.

# Multiple ACFs 
set.seed(2)
sims = 10
ACF = acf(log.mhp, plot = "FALSE")
plot(ACF$lag ,ACF$acf , type = "l", lwd = 6,xlab="Lag",ylab="ACF",main="True ACF vs Generated Data")
for( i in 1: sims)
{
  ACF2 = acf(gen.arima.wge(l, phi = est$phi, theta=est$theta,d=1, plot="FALSE"), plot = "FALSE")
  lines(ACF2$lag ,ACF2$acf, lwd = 2, col = "red")
}

# Multiple Parzen 
set.seed(3)
sims = 10
SpecDen = parzen.wge(log.mhp, plot = "FALSE")
plot(SpecDen$freq,SpecDen$pzgram, type = "l", lwd = 6,xlab="Frequency",ylab="dB",main="True Spectral Density vs Generated Data")
for( i in 1: sims)
{
  SpecDen2 = parzen.wge(gen.aruma.wge(l,phi=est$phi, theta=est$theta,d=1, plot ="FALSE"), plot = "FALSE")
  lines(SpecDen2$freq,SpecDen2$pzgram, lwd = 2, col = "red")
}

The ACFs don’t match up exactly, but have the same slowly dampening behavior. The Parzen Windows are very close to the original.

We will look at the short and long term forecasts of future data as well.

h.short = 4
h.long = 20
x = fed_housing_data$Median_Sales_Price
l = length(x)
d = artrans.wge(x,1)

dev.off()

## null device 
##           1

est = est.arma.wge(d,p=1,q=2)

##   
##   
## Coefficients of AR polynomial:  
## 0.6901 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.6901B              1.4490               0.6901       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.9720 -0.4007 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9720B+0.4007B^2    1.2128+-1.0122i      0.6330       0.1107
##   
##

f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=FALSE,plot=FALSE)

## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254

plot(seq(xmin_plot,l+h.short,1),x[xmin_plot:(l+h.short)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="ARIMA(1,1,2) Short Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.short),1),f$f,col="red")
lines(seq((l+1),(l+h.short),1),f$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l+1),(l+h.short),1),f$ul,col="blue",lty=3)
f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=FALSE,plot=FALSE)

## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254

plot(seq(xmin_plot,l+h.long,1),x[xmin_plot:(l+h.long)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="ARIMA(1,1,2) Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),f$f,col="red")
lines(seq((l+1),(l+h.long),1),f$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l+1),(l+h.long),1),f$ul,col="blue",lty=3)

Unsurprisingly, the future forecasts didn’t increase with time. This is similar to the forecasts we used for predictions.

Exogenous Variable Forecasts

In order to use the Exogenous variables in either the MLR or Multi-Variate MLP, it is more realistic to make predictions for them first.

For error metrics, we will focus on the ASE for short and long term predictions. This is because we are specifically using these short and long term predictions in future models.

Home Ownership Rate

The Home Ownership Rate looked like either an ARMA or an ARIMA(p,1,q) might be appropriate.

x = fed_housing_data$Ownership_Rate 
plotts.sample.wge(x)

## $xbar
## [1] 65.53333
## 
## $autplt
##  [1] 1.0000000 0.9722424 0.9483443 0.9359713 0.9199870 0.8933705 0.8751577
##  [8] 0.8587103 0.8420414 0.8126124 0.7809415 0.7543250 0.7246543 0.6847273
## [15] 0.6445049 0.6104712 0.5736989 0.5329127 0.4938515 0.4583814 0.4201056
## [22] 0.3795946 0.3399360 0.3068421 0.2726943 0.2332908
## 
## $freq
##  [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
##  [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
## 
## $dbz
##  [1]  12.40084982  12.05208269  11.46667385  10.63837479   9.55841432
##  [6]   8.21574644   6.59803371   4.69464371   2.50495993   0.06013993
## [11]  -2.52569134  -4.96980287  -6.81825314  -7.77998095  -8.04843399
## [16]  -8.00240687  -7.87914699  -7.78191376  -7.76265554  -7.85818084
## [21]  -8.09312523  -8.47433777  -8.98713908  -9.59570117 -10.24851659
## [26] -10.88985551 -11.47485848 -11.98084976 -12.40803253 -12.77134027
## [31] -13.09179721 -13.39253528 -13.69753110 -14.02860060 -14.39874975
## [36] -14.80330620 -15.21201130 -15.56672034 -15.79120455 -15.81670210
## [41] -15.61309625 -15.20280732 -14.64701792 -14.01978302 -13.38927072
## [46] -12.81052548 -12.32491905 -11.96174963 -11.74003104 -11.66991253
## [51] -11.75354537 -11.98525285 -12.35089436 -12.82647847 -13.37653746
## [56] -13.95369967 -14.50206148 -14.96692894 -15.30987861 -15.52161538
## [61] -15.62332516 -15.65545084 -15.66236442 -15.68149000 -15.73910254
## [66] -15.85059016 -16.02243062 -16.25427422 -16.54062535 -16.87225502
## [71] -17.23773120 -17.62538046 -18.02559859 -18.43284892 -18.84631210
## [76] -19.26840545 -19.70126488 -20.14225382 -20.58010206 -20.99328730
## [81] -21.35180613 -21.62236172 -21.77529821 -21.79055554 -21.66095243
## [86] -21.39326846 -21.00804855 -20.53758741 -20.02117993 -19.49852922
## [91] -19.00371958 -18.56160453 -18.18687293 -17.88502998 -17.65440333
## [96] -17.48858032 -17.37895990 -17.31720434 -17.29734042

d = artrans.wge(x,1)

We will start with the ARIMA model.

aic5.wge(d,p=0:10,q=0:4,type='aic') # 8/1 best

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 6 4 
## Five Smallest Values of  aic 
##     p    q        aic
##     8    1  -2.113107
##     8    0  -2.111537
##     9    0  -2.108892
##     8    2  -2.105050
##     8    3  -2.103671

The model with the best AIC is p = 8, q = 1.

est = est.arma.wge(d,p=8,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 0.5294 -0.2041 0.1820 0.1108 -0.2424 0.1690 -0.1057 0.2697 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9134B              1.0948               0.9134       0.0000
## 1+0.8712B             -1.1478               0.8712       0.5000
## 1-1.3170B+0.7430B^2    0.8862+-0.7486i      0.8620       0.1116
## 1-0.0293B+0.7263B^2    0.0202+-1.1733i      0.8522       0.2473
## 1+0.8591B+0.6281B^2   -0.6839+-1.0604i      0.7925       0.3412
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.5818 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.5818B              1.7189               0.5818       0.0000
##   
##

f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma 0.5 -0.3 -0.1 0.1 0 0.3 -0.1 0 -0.3 0.5 -0.1 -0.1 -0.4 0.8 0.2 -0.6 0.1 0.9 -0.4 0.1 0 0.3 -0.3 0.1 -0.3 0.3 -0.4 -0.4 0.1 0 -0.4 0.2 0 0.1 -0.4 0.2 0 0 -0.5 0 0 -0.2 -0.4 0.1 0.2 0 0.1 -0.1 0 0.4 -0.1 -0.4 0 0.3 -0.2 0.1 -0.1 0.3 -0.3 0.2 -0.3 0.3 0.1 -0.2 0 0.3 0 -0.2 -0.1 0.4 0.1 -0.2 0.2 0.3 -0.1 -0.8 0 0.3 0.1 0 0.5 0.3 0.1 0 0.3 0.2 -0.2 0 0.3 0.3 -0.3 0.2 0.1 0.8 -0.4 0.3 -0.1 0.4 -0.1 0.2 0.1 0.5 -0.2 0 0.2 0.4 -0.1 -0.2 -0.2 0.4 0.3 -0.3 0 0.4 0.2 0 0.6 -0.2 0.2 -0.1 -0.5 0.2 0.2 -0.5 0.2 0.3 -0.1 -0.5 -0.2 0 -0.4 0 0.3 -0.2 -0.4 -0.2 0.1 0.2 -0.4 -0.1 -0.2 0 -0.4 -0.1 -0.5 0.4 -0.3 -0.6 0.1 0 -0.1 -0.4 0 0.3 -0.1 -0.4 -0.1 -0.3 -0.4 -0.3 -0.3 0.3 0.1 -0.3 -0.6 0.6 0.2 -0.1 0.1 0.2 0.3 0 0.1 0.1 0.4 -0.6 -0.1 0.7 0.3 0.2 2.6 -0.5 -1.6 -0.2 -0.2 0 0.1 -0.1 0.4 0.2 -0.1 0.1 -0.1 0.1 -0.3 -0.1 0

ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 0.03083697

## [1] 0.03083697

f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma 0.5 -0.3 -0.1 0.1 0 0.3 -0.1 0 -0.3 0.5 -0.1 -0.1 -0.4 0.8 0.2 -0.6 0.1 0.9 -0.4 0.1 0 0.3 -0.3 0.1 -0.3 0.3 -0.4 -0.4 0.1 0 -0.4 0.2 0 0.1 -0.4 0.2 0 0 -0.5 0 0 -0.2 -0.4 0.1 0.2 0 0.1 -0.1 0 0.4 -0.1 -0.4 0 0.3 -0.2 0.1 -0.1 0.3 -0.3 0.2 -0.3 0.3 0.1 -0.2 0 0.3 0 -0.2 -0.1 0.4 0.1 -0.2 0.2 0.3 -0.1 -0.8 0 0.3 0.1 0 0.5 0.3 0.1 0 0.3 0.2 -0.2 0 0.3 0.3 -0.3 0.2 0.1 0.8 -0.4 0.3 -0.1 0.4 -0.1 0.2 0.1 0.5 -0.2 0 0.2 0.4 -0.1 -0.2 -0.2 0.4 0.3 -0.3 0 0.4 0.2 0 0.6 -0.2 0.2 -0.1 -0.5 0.2 0.2 -0.5 0.2 0.3 -0.1 -0.5 -0.2 0 -0.4 0 0.3 -0.2 -0.4 -0.2 0.1 0.2 -0.4 -0.1 -0.2 0 -0.4 -0.1 -0.5 0.4 -0.3 -0.6 0.1 0 -0.1 -0.4 0 0.3 -0.1 -0.4 -0.1 -0.3 -0.4 -0.3 -0.3 0.3 0.1 -0.3 -0.6 0.6 0.2 -0.1 0.1 0.2 0.3 0 0.1 0.1 0.4 -0.6 -0.1 0.7 0.3 0.2 2.6 -0.5 -1.6 -0.2 -0.2 0 0.1 -0.1 0.4 0.2 -0.1 0.1 -0.1 0.1 -0.3 -0.1 0

ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 2.077416

## [1] 2.077416

This model has a short term ASE of 0.031 and a long term ASE of 2.08.

Next will compare this to an ARMA model.

aic5.wge(x,p=0:12,q=0:2,type='aic') # 9/1 best

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
##     p    q        aic
##     9    1  -2.144284
##     9    2  -2.134332
##    10    0  -2.123071
##     9    0  -2.122279
##    12    2  -2.116436

The model it chose had p = 9 and q = 1.

est = est.arma.wge(x,p=9,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 1.6768 -0.8806 0.4076 -0.1012 -0.3757 0.4520 -0.2987 0.3697 -0.2608 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.9534B+0.9571B^2    1.0204+-0.0589i      0.9783       0.0092
## 1-1.3559B+0.7452B^2    0.9098+-0.7171i      0.8632       0.1062
## 1+0.8617B             -1.1605               0.8617       0.5000
## 1-0.0469B+0.7004B^2    0.0335+-1.1944i      0.8369       0.2455
## 1+0.8177B+0.6059B^2   -0.6748+-1.0932i      0.7784       0.3380
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.7795 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.7795B              1.2828               0.7795       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6

ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 0.02468854

## [1] 0.02468854

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6

ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 1.108741

## [1] 1.108741

This model had better ASE values with 0.025 for short term and 1.11 for long term. This will be the predictions we use for our MLR and MLP models.

fed_housing_data_short = fed_housing_data
fed_housing_data_long = fed_housing_data
x = fed_housing_data$Ownership_Rate 
est = est.arma.wge(x,p=9,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 1.6768 -0.8806 0.4076 -0.1012 -0.3757 0.4520 -0.2987 0.3697 -0.2608 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.9534B+0.9571B^2    1.0204+-0.0589i      0.9783       0.0092
## 1-1.3559B+0.7452B^2    0.9098+-0.7171i      0.8632       0.1062
## 1+0.8617B             -1.1605               0.8617       0.5000
## 1-0.0469B+0.7004B^2    0.0335+-1.1944i      0.8369       0.2455
## 1+0.8177B+0.6059B^2   -0.6748+-1.0932i      0.7784       0.3380
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.7795 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.7795B              1.2828               0.7795       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6

hor.pred.short = f$f
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6

hor.pred.long = f$f
fed_housing_data_short$Ownership_Rate[(l-h.short+1):l] = hor.pred.short
fed_housing_data_long$Ownership_Rate[(l-h.long+1):l] = hor.pred.long

Housing Units Completed

The Housing Units Completed variable looked like either an ARMA or an ARIMA(p,1,q) might be appropriate for forecasting.

x = fed_housing_data$Housing_Units_Completed 
plotts.sample.wge(x)

## $xbar
## [1] 1354.924
## 
## $autplt
##  [1]  1.000000000  0.959798082  0.927757828  0.887546260  0.839449796
##  [6]  0.785021406  0.724421795  0.663945462  0.587584415  0.521735598
## [11]  0.459327920  0.391804222  0.331038625  0.272988776  0.219171898
## [16]  0.167685853  0.124990699  0.088571013  0.054233289  0.031115266
## [21]  0.011815053 -0.006148206 -0.028654674 -0.043678086 -0.059390137
## [26] -0.072592499
## 
## $freq
##  [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
##  [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
## 
## $dbz
##  [1]  11.4278852  11.2069031  10.8369187  10.3155427   9.6397210   8.8060964
##  [7]   7.8116051   6.6544671   5.3358410   3.8625822   2.2517055   0.5370433
## [13]  -1.2226229  -2.9385521  -4.5011052  -5.8132302  -6.8352084  -7.6004496
## [19]  -8.1859722  -8.6701451  -9.1089491  -9.5309632  -9.9399897 -10.3202888
## [25] -10.6455581 -10.8926675 -11.0556563 -11.1512734 -11.2119164 -11.2716270
## [31] -11.3544483 -11.4700563 -11.6162799 -11.7857504 -11.9732807 -12.1806396
## [37] -12.4166557 -12.6930160 -13.0180937 -13.3913894 -13.8002910 -14.2201381
## [43] -14.6182596 -14.9617572 -15.2265628 -15.4031355 -15.4952227 -15.5127543
## [49] -15.4639068 -15.3512372 -15.1736940 -14.9328641 -14.6394550 -14.3159142
## [55] -13.9937507 -13.7075613 -13.4890602 -13.3631708 -13.3463799 -13.4465623
## [61] -13.6634273 -13.9890858 -14.4085938 -14.9005598 -15.4379665 -15.9892628
## [67] -16.5196245 -16.9923207 -17.3706029 -17.6211731 -17.7199788 -17.6589302
## [73] -17.4496569 -17.1210744 -16.7117425 -16.2613673 -15.8051121 -15.3715052
## [79] -14.9827543 -14.6559553 -14.4041964 -14.2371325 -14.1609597 -14.1778746
## [85] -14.2851693 -14.4742024 -14.7296668 -15.0298492 -15.3487446 -15.6604883
## [91] -15.9452324 -16.1938387 -16.4083643 -16.5974483 -16.7688868 -16.9232350
## [97] -17.0515395 -17.1384247 -17.1693705

d = artrans.wge(x,1)

However, this looks like the autocorrelations dampen even less slowly than the Home Ownership Rate, meaning that an ARIMA(p,1,q) model would be even less appropriate. Since we chose the ARMA model for Home Ownership Rate over the ARIMA model. We will just focus on the ARMA model for Housing Units Complete.

aic5.wge(x,p=0:10,q=0:1,type='aic') # 10/0 best

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
##     p    q        aic
##    10    0   9.024872
##     9    0   9.025447
##     9    1   9.028069
##     8    0   9.028335
##     8    1   9.032021

The highest AIC value was for p = 10 and q = 0.

est = est.ar.wge(x,p=10)

##   
##   
## Coefficients of AR polynomial:  
## 0.8984 0.1900 0.0176 -0.0870 0.0137 -0.0379 0.1724 -0.3410 0.0211 0.0961 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.7434B+0.7704B^2    1.1315+-0.1333i      0.8777       0.0187
## 1+0.4915B+0.7394B^2   -0.3323+-1.1144i      0.8599       0.2961
## 1+1.5034B+0.7169B^2   -1.0486+-0.5435i      0.8467       0.4239
## 1-0.8325B+0.6888B^2    0.6043+-1.0424i      0.8300       0.1664
## 1-0.7644B              1.3083               0.7644       0.0000
## 1+0.4469B             -2.2378               0.4469       0.5000
##   
##

f = fore.arima.wge(x,phi=est$phi,n.ahead=h.short,lastn=TRUE)

## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659

ase = mean((x[(l-h.short+1):l]-f$f)^2)/1e3
ase # 12.98288

## [1] 12.98288

f = fore.arima.wge(x,phi=est$phi,n.ahead=h.long,lastn=TRUE)

## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659

ase = mean((x[(l-h.long+1):l]-f$f)^2)/1e3
ase # 8.546039

## [1] 8.546039

This gives a short term ASE of 13.0k and a long term ASE of 8.5k. Playing around with some of the other output from the aic5 output showed that an ARMA(9,1) model does slightly better for long term predictions and slightly worse for short term predictions.

est = est.arma.wge(x,p=9,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 1.4066 -0.2813 -0.0540 -0.1068 0.0604 -0.0496 0.1985 -0.4289 0.2277 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.7319B+0.7686B^2    1.1267+-0.1778i      0.8767       0.0249
## 1-0.8710B              1.1481               0.8710       0.0000
## 1+1.5502B+0.7380B^2   -1.0502+-0.5020i      0.8591       0.4290
## 1+0.5020B+0.6999B^2   -0.3586+-1.1402i      0.8366       0.2985
## 1-0.8558B+0.6584B^2    0.6499+-1.0471i      0.8114       0.1616
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.5043 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.5043B              1.9829               0.5043       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659

ase = mean((x[(l-h.short+1):l]-f$f)^2)/1e3
ase # 13.44075

## [1] 13.44075

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659

ase = mean((x[(l-h.long+1):l]-f$f)^2)/1e3
ase # 8.516217

## [1] 8.516217

We will proceed with the ARMA(9,1) forecast for our future models that use exogenous variables.

x = fed_housing_data$Housing_Units_Completed  
est = est.arma.wge(x,p=9,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 1.4066 -0.2813 -0.0540 -0.1068 0.0604 -0.0496 0.1985 -0.4289 0.2277 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.7319B+0.7686B^2    1.1267+-0.1778i      0.8767       0.0249
## 1-0.8710B              1.1481               0.8710       0.0000
## 1+1.5502B+0.7380B^2   -1.0502+-0.5020i      0.8591       0.4290
## 1+0.5020B+0.6999B^2   -0.3586+-1.1402i      0.8366       0.2985
## 1-0.8558B+0.6584B^2    0.6499+-1.0471i      0.8114       0.1616
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.5043 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.5043B              1.9829               0.5043       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659

huc.pred.short = f$f
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659

huc.pred.long = f$f
fed_housing_data_short$Housing_Units_Completed[(l-h.short+1):l] = huc.pred.short
fed_housing_data_long$Housing_Units_Completed[(l-h.long+1):l] = huc.pred.long

Supply of New Houses

Similar to Housing Units Completed, an ARMA model seems like it would be most appropriate for Supply of New Houses.

x = fed_housing_data$Supply_New_Houses 
plotts.sample.wge(x)

## $xbar
## [1] 6.167172
## 
## $autplt
##  [1]  1.000000000  0.840571774  0.745101265  0.663968698  0.563328441
##  [6]  0.489741871  0.419594057  0.325865642  0.257517222  0.230333020
## [11]  0.176617503  0.124718033  0.057267731  0.015708696  0.002656971
## [16] -0.031243001 -0.045540921 -0.054571142 -0.086637135 -0.094349258
## [21] -0.090844434 -0.085823390 -0.112309986 -0.115799497 -0.138576373
## [26] -0.143176298
## 
## $freq
##  [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
##  [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
## 
## $dbz
##  [1]   9.5891631   9.4770320   9.2843191   9.0037218   8.6275394   8.1495676
##  [7]   7.5669926   6.8822183   6.1045806   5.2517494   4.3502668   3.4342686
## [13]   2.5414258   1.7061818   0.9524166   0.2890741  -0.2890919  -0.7969233
## [19]  -1.2536889  -1.6795409  -2.0935880  -2.5125122  -2.9490091  -3.4100709
## [25]  -3.8954671  -4.3966342  -4.8959536  -5.3665345  -5.7734357  -6.0784044
## [31]  -6.2499149  -6.2763837  -6.1748891  -5.9881487  -5.7713034  -5.5772650
## [37]  -5.4474826  -5.4089954  -5.4753662  -5.6491352  -5.9245893  -6.2904934
## [43]  -6.7326928  -7.2362671  -7.7864647  -8.3673321  -8.9572359  -9.5218201
## [49] -10.0079681 -10.3467888 -10.4743998 -10.3657394 -10.0546383  -9.6189296
## [55]  -9.1454707  -8.7037197  -8.3370423  -8.0646735  -7.8865272  -7.7876591
## [61]  -7.7424992  -7.7201909  -7.6918762  -7.6388136  -7.5581028  -7.4629285
## [67]  -7.3773173  -7.3285781  -7.3409229  -7.4317827  -7.6103898  -7.8774389
## [73]  -8.2247650  -8.6344430  -9.0774007  -9.5127345  -9.8903916 -10.1604198
## [79] -10.2886844 -10.2715343 -10.1383759  -9.9394841  -9.7281415  -9.5473781
## [85]  -9.4243977  -9.3704536  -9.3833644  -9.4512929  -9.5574867  -9.6856568
## [91]  -9.8248218  -9.9717173 -10.1293502 -10.3020043 -10.4887435 -10.6782054
## [97] -10.8474029 -10.9665648 -11.0097488

d = artrans.wge(x,1)

aic5.wge(x,p=0:4,q=0:1,type='aic') # 1/0 best

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
##     p    q        aic
##     1    0 -0.1975496
##     1    1 -0.1970961
##     2    0 -0.1970576
##     3    1 -0.1966637
##     4    0 -0.1912306

The highest AIC value was for p = 1 and q = 0.

est = est.ar.wge(x,p=1)

##   
##   
## Coefficients of AR polynomial:  
## 0.8594 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.8594B              1.1636               0.8594       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,n.ahead=h.short,lastn=TRUE)

## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6

ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 0.6951032

## [1] 0.6951032

f = fore.arima.wge(x,phi=est$phi,n.ahead=h.long,lastn=TRUE)

## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6

ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 3.865913

## [1] 3.865913

est = est.arma.wge(x,p=1,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 0.8930 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.8930B              1.1198               0.8930       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.1287 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.1287B              7.7707               0.1287       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6

ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 0.4791473

## [1] 0.4791473

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6

ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 3.901133

## [1] 3.901133

The short term ASE for the ARMA(1,1) is significantly better, but the long term ASE is a little worse. We will proceed with the ARMA(1,1) model.

x = fed_housing_data$Supply_New_Houses   
est = est.arma.wge(x,p=1,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 0.8930 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.8930B              1.1198               0.8930       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.1287 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.1287B              7.7707               0.1287       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6

snh.pred.short = f$f
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6

snh.pred.long = f$f
fed_housing_data_short$Supply_New_Houses[(l-h.short+1):l] = snh.pred.short
fed_housing_data_long$Supply_New_Houses[(l-h.long+1):l] = snh.pred.long

House price index

x = fed_housing_data$Housing_Price_Index
plotts.sample.wge(x)

## $xbar
## [1] 260.0757
## 
## $autplt
##  [1] 1.0000000 0.9729202 0.9462056 0.9191307 0.8909986 0.8628031 0.8357103
##  [8] 0.8082155 0.7793027 0.7505719 0.7251203 0.7011544 0.6785046 0.6582523
## [15] 0.6404179 0.6234247 0.6071393 0.5914399 0.5759915 0.5608527 0.5458586
## [22] 0.5311570 0.5169824 0.5031950 0.4892727 0.4755910
## 
## $freq
##  [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
##  [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
## 
## $dbz
##  [1]  12.2782408  11.9222965  11.3282016  10.4956796   9.4267581   8.1292098
##  [7]   6.6225197   4.9470123   3.1742366   1.4094245  -0.2313272  -1.6725572
## [13]  -2.9162225  -4.0138553  -5.0001956  -5.8567808  -6.5364365  -7.0250111
## [19]  -7.3764536  -7.6840015  -8.0258094  -8.4339996  -8.8942563  -9.3642704
## [25]  -9.8011060 -10.1848714 -10.5238940 -10.8395701 -11.1462611 -11.4424491
## [31] -11.7173103 -11.9654010 -12.1970206 -12.4359357 -12.7070093 -13.0231920
## [37] -13.3793254 -13.7548915 -14.1237551 -14.4655588 -14.7716121 -15.0415198
## [43] -15.2744035 -15.4630500 -15.5964283 -15.6686425 -15.6865151 -15.6690104
## [49] -15.6391463 -15.6150166 -15.6056625 -15.6128189 -15.6358467 -15.6760504
## [55] -15.7377682 -15.8261407 -15.9436261 -16.0877799 -16.2516731 -16.4265637
## [61] -16.6050338 -16.7825128 -16.9562814 -17.1229779 -17.2768337 -17.4103793
## [67] -17.5174995 -17.5967749 -17.6525899 -17.6931056 -17.7265509 -17.7583210
## [73] -17.7904616 -17.8233217 -17.8578059 -17.8964986 -17.9429344 -17.9996839
## [79] -18.0666917 -18.1410233 -18.2181651 -18.2939853 -18.3660345 -18.4333635
## [85] -18.4951505 -18.5493235 -18.5923666 -18.6206174 -18.6322323 -18.6284354
## [91] -18.6131454 -18.5912560 -18.5667599 -18.5418862 -18.5175970 -18.4948317
## [97] -18.4754511 -18.4621442 -18.4573749

d = artrans.wge(x,1)

d2 = artrans.wge(d,1)

dev.off()

## null device 
##           1

parzen.wge(d2)

## $freq
##  [1] 0.005102041 0.010204082 0.015306122 0.020408163 0.025510204 0.030612245
##  [7] 0.035714286 0.040816327 0.045918367 0.051020408 0.056122449 0.061224490
## [13] 0.066326531 0.071428571 0.076530612 0.081632653 0.086734694 0.091836735
## [19] 0.096938776 0.102040816 0.107142857 0.112244898 0.117346939 0.122448980
## [25] 0.127551020 0.132653061 0.137755102 0.142857143 0.147959184 0.153061224
## [31] 0.158163265 0.163265306 0.168367347 0.173469388 0.178571429 0.183673469
## [37] 0.188775510 0.193877551 0.198979592 0.204081633 0.209183673 0.214285714
## [43] 0.219387755 0.224489796 0.229591837 0.234693878 0.239795918 0.244897959
## [49] 0.250000000 0.255102041 0.260204082 0.265306122 0.270408163 0.275510204
## [55] 0.280612245 0.285714286 0.290816327 0.295918367 0.301020408 0.306122449
## [61] 0.311224490 0.316326531 0.321428571 0.326530612 0.331632653 0.336734694
## [67] 0.341836735 0.346938776 0.352040816 0.357142857 0.362244898 0.367346939
## [73] 0.372448980 0.377551020 0.382653061 0.387755102 0.392857143 0.397959184
## [79] 0.403061224 0.408163265 0.413265306 0.418367347 0.423469388 0.428571429
## [85] 0.433673469 0.438775510 0.443877551 0.448979592 0.454081633 0.459183673
## [91] 0.464285714 0.469387755 0.474489796 0.479591837 0.484693878 0.489795918
## [97] 0.494897959 0.500000000
## 
## $pzgram
##  [1] -10.34279517  -9.98707527  -9.44865876  -8.78719239  -8.05897430
##  [6]  -7.31041968  -6.57762865  -5.88826262  -5.26349808  -4.71940306
## [11]  -4.26780579  -3.91686547  -3.67148234  -3.53359444  -3.50235713
## [16]  -3.57418301  -3.74261345  -3.99798412  -4.32683216  -4.71099290
## [21]  -5.12641206  -5.54197027  -5.91917652  -6.21425948  -6.38412736
## [26]  -6.39564482  -6.23410223  -5.90544523  -5.43096931  -4.83900922
## [31]  -4.15911471  -3.42028916  -2.65119399  -1.87971474  -1.13094464
## [36]  -0.42426542   0.22929907   0.82857798   1.38205740   1.90556547
## [41]   2.41764521   2.93328115   3.45799834   3.98499129   4.49667567
## [46]   4.96958060   5.37998631   5.70818229   5.94065817   6.07059152
## [51]   6.09727492   6.02494653   5.86123982   5.61535535   5.29612909
## [56]   4.91036143   4.46192749   3.95214761   3.38160525   2.75317137
## [61]   2.07556803   1.36636784   0.65279634  -0.03160561  -0.65535551
## [66]  -1.19994093  -1.66703125  -2.07665971  -2.45748803  -2.83554252
## [71]  -3.22693514  -3.63596757  -4.05714444  -4.47880516  -4.88624518
## [76]  -5.26287221  -5.58931279  -5.84223501  -5.99588123  -6.02835672
## [81]  -5.93096809  -5.71484147  -5.40960546  -5.05481896  -4.68972712
## [86]  -4.34617281  -4.04576320  -3.79996389  -3.61152832  -3.47636384
## [91]  -3.38558547  -3.32778303  -3.29144017  -3.26715430  -3.24907710
## [96]  -3.23509357  -3.22573312  -3.22238869

d3 = artrans.wge(d2,c(0,-1))
dev.off()

## null device 
##           1

acf(d3,lag.max=100)

This plot is more similar to the variable of interest, Medium Sale Price. This means that a linear signal plus noise model could be useful.

However, unlike the other variables, it looked like a single difference wasn’t white noise. Even differencing out the (1-B)^2 still had a 0.25 frequency. Differencing out that frequency as well looked more like white noise. So we will look at a signal plus noise model and a (1-B)^2*(1+B^2) model.

f = fore.sigplusnoise.wge(x,max.p=0,n.ahead=h.short,lastn=TRUE)

aic5.ar.wge(f$resid,p=0:20) # p = 9

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Error in aic calculation at 1 
## Error in aic calculation at 2 
## Error in aic calculation at 3 
## Error in aic calculation at 12 
## Error in aic calculation at 14 
## Error in aic calculation at 17 
## Error in aic calculation at 18 
## Error in aic calculation at 19 
## Error in aic calculation at 20 
## Five Smallest Values of  aic 
## Method= mle 
##     p        aic
##     9   2.299272
##     6   2.299300
##     7   2.305336
##    10   2.305733
##    13   2.311372

The model for the noise with the highest AIC value is p = 9.

f = fore.sigplusnoise.wge(x,max.p=9,n.ahead=h.short,lastn=TRUE)

##   
##   
## Coefficients of AR polynomial:  
## 1.8014 -1.2205 0.8755 -0.2603 -0.5252 0.4232 -0.2115 0.2703 -0.1651 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.9514B+0.9573B^2    1.0192+-0.0764i      0.9784       0.0119
## 1+0.0202B+0.8230B^2   -0.0123+-1.1022i      0.9072       0.2518
## 1+0.7906B             -1.2649               0.7906       0.5000
## 1-1.2751B+0.5746B^2    1.1097+-0.7136i      0.7580       0.0910
## 1+0.6143B+0.4614B^2   -0.6656+-1.3130i      0.6793       0.3247
##   
##

ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 161.2832

## [1] 161.2832

f = fore.sigplusnoise.wge(x,max.p=9,n.ahead=h.long,lastn=TRUE)

##   
##   
## Coefficients of AR polynomial:  
## 1.8014 -1.2205 0.8755 -0.2603 -0.5252 0.4232 -0.2115 0.2703 -0.1651 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.9514B+0.9573B^2    1.0192+-0.0764i      0.9784       0.0119
## 1+0.0202B+0.8230B^2   -0.0123+-1.1022i      0.9072       0.2518
## 1+0.7906B             -1.2649               0.7906       0.5000
## 1-1.2751B+0.5746B^2    1.1097+-0.7136i      0.7580       0.0910
## 1+0.6143B+0.4614B^2   -0.6656+-1.3130i      0.6793       0.3247
##   
##

ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 10014.69

## [1] 10014.69

The short term ASE is 161 and the long term ASE is 10,014.

aic5.wge(d3,p=0:8,q=0:6,type='aic') # 4/5 best

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
##     p    q        aic
##     4    5   2.318766
##     3    5   2.318801
##     5    5   2.331189
##     6    5   2.335669
##     2    5   2.343238

It looks like the model with p = 4 and q = 5 had the best AIC.

est = est.arma.wge(d3,p=4,q=5)

##   
##   
## Coefficients of AR polynomial:  
## -0.2653 0.1543 0.4275 0.2777 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.8773B              1.1399               0.8773       0.0000
## 1+0.5373B+0.5229B^2   -0.5138+-1.2839i      0.7231       0.3106
## 1+0.6053B             -1.6520               0.6053       0.5000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## -0.0912 -0.2272 0.4182 0.4609 0.4392 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.0000B              1.0000               1.0000       0.0000
## 1-0.0322B+0.8167B^2    0.0197+-1.1064i      0.9037       0.2472
## 1+1.1234B+0.5378B^2   -1.0444+-0.8767i      0.7334       0.3889
##   
##

m = mult.wge(fac1=est$phi,fac2=c(0,-1))
factor.wge(m$model.coef) # looks good

##   
##   
## Coefficients of AR polynomial:  
## -0.2653 -0.8457 0.1622 0.4320 0.4275 0.2777 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+0.0000B+1.0000B^2    0.0000+-1.0000i      1.0000       0.2500
## 1-0.8773B              1.1399               0.8773       0.0000
## 1+0.5373B+0.5229B^2   -0.5138+-1.2839i      0.7231       0.3106
## 1+0.6053B             -1.6520               0.6053       0.5000
##   
##

f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31

ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 16.01932

## [1] 16.01932

f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31

ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 2109.469

## [1] 2109.469

This model did significantly better with a short term ASE of 16 and a long term ASE of 2,109. Testing out some of the other values from aic5, found that p = 6 and q = 5 did even better.

est = est.arma.wge(d3,p=6,q=5)

##   
##   
## Coefficients of AR polynomial:  
## -0.8116 0.2231 0.5831 0.5250 0.1352 -0.2199 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.7230B+0.8346B^2   -1.0323+-0.3642i      0.9136       0.4460
## 1-0.8558B              1.1685               0.8558       0.0000
## 1+0.4123B+0.6580B^2   -0.3132+-1.1923i      0.8112       0.2909
## 1-0.4679B              2.1373               0.4679       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## -0.6419 -0.0609 0.3220 0.7473 0.6335 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.0000B              1.0000               1.0000       0.0000
## 1+0.0687B+0.8449B^2   -0.0407+-1.0872i      0.9192       0.2560
## 1+1.5732B+0.7497B^2   -1.0492+-0.4827i      0.8659       0.4314
##   
##

m = mult.wge(fac1=est$phi,fac2=c(0,-1))
factor.wge(m$model.coef) # looks good

##   
##   
## Coefficients of AR polynomial:  
## -0.8116 -0.7769 -0.2285 0.7481 0.7183 0.3051 0.1352 -0.2199 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+0.0000B+1.0000B^2    0.0000+-1.0000i      1.0000       0.2500
## 1+1.7230B+0.8346B^2   -1.0323+-0.3642i      0.9136       0.4460
## 1-0.8558B              1.1685               0.8558       0.0000
## 1+0.4123B+0.6580B^2   -0.3132+-1.1923i      0.8112       0.2909
## 1-0.4679B              2.1373               0.4679       0.0000
##   
##

f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31

ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 14.86637

## [1] 14.86637

f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31

ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 558.2356

## [1] 558.2356

This model had a short term ASE of 15 and a long term ASE of 558. This is the model we will use to get the forecasts.

x = fed_housing_data$Housing_Price_Index 
d = artrans.wge(x,1)

d2 = artrans.wge(d,1)

d3 = artrans.wge(d2,c(0,-1))

est = est.arma.wge(d3,p=6,q=5)

##   
##   
## Coefficients of AR polynomial:  
## -0.8116 0.2231 0.5831 0.5250 0.1352 -0.2199 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.7230B+0.8346B^2   -1.0323+-0.3642i      0.9136       0.4460
## 1-0.8558B              1.1685               0.8558       0.0000
## 1+0.4123B+0.6580B^2   -0.3132+-1.1923i      0.8112       0.2909
## 1-0.4679B              2.1373               0.4679       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## -0.6419 -0.0609 0.3220 0.7473 0.6335 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.0000B              1.0000               1.0000       0.0000
## 1+0.0687B+0.8449B^2   -0.0407+-1.0872i      0.9192       0.2560
## 1+1.5732B+0.7497B^2   -1.0492+-0.4827i      0.8659       0.4314
##   
##

m = mult.wge(fac1=est$phi,fac2=c(0,-1))
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.short,lastn=TRUE)

## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31

hpi.pred.short = f$f
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.long,lastn=TRUE)

## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31

hpi.pred.long = f$f
fed_housing_data_short$Housing_Price_Index[(l-h.short+1):l] = hpi.pred.short
fed_housing_data_long$Housing_Price_Index[(l-h.long+1):l] = hpi.pred.long

Signal Plus Noise Model

Our domain knowledge tells us that the positive trend of Median House Price is not random and does have an underlying regression line, but we can run a few tests for linear trend to be sure. We will test for linear trend using simple linear regression with a standard t-test, the Cochran Orcutt Test for Linear Trend, and the WBG Test (Woodward, Bottone, and Gray, 1997). All 3 tests have a p-value below 0.05, rejecting the null hypothesis. The residuals are not white noise yet, so we will fit an ARMA model along with the linear signal.

Test for linear Trend

# Testing for Linear Trend
t = 1:168
reg = slr.wge(train$Median_Sales_Price)
summary(reg)

##            Length Class  Mode   
## res        168    -none- numeric
## b0hat        1    -none- numeric
## b1hat        1    -none- numeric
## pvalue       1    -none- numeric
## tstatistic   1    -none- numeric

# Cochran Orcutt Test for Linear Trend
co_test_msp = co.wge(train$Median_Sales_Price)
co_test_msp$pvalue

## [1] 4.411261e-15

# WBG Test
wbg.boot.wge(train$Median_Sales_Price)

## $p
## [1] 5
## 
## $phi
## [1]  0.77778625  0.44429145  0.12173280 -0.08786636 -0.25901843
## 
## $pv
## [1] 0.04010025

# Linear model according to Simple Linear Regression (ignoring correlated errors)
plotts.wge(train$Median_Sales_Price)
fit = reg$b0hat + t*reg$b1hat
points(fit, type = "l")

# Examine Residuals
resid = train$Median_Sales_Price - fit
plot(resid, type = "p")
abline(h=0)

# Quicker way to plot residuals: plot(reg$res, type = "l")

x = fed_housing_data
reg = slr.wge(x$Median_Sales_Price)
plot(reg$res, type = "l")
abline(h=0)

t=1:198
plotts.wge(x$Median_Sales_Price,xlab="Time",ylab="log Median House Price",main="Linear fit to log Median House Price")
fit = reg$b0hat + t*reg$b1hat
points(fit, type = "l")

aic5.ar.wge(reg$res,p=0:10)

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
## Method= mle 
##     p        aic
##     6  -7.203842
##     5  -7.198616
##     7  -7.194018
##     8  -7.188880
##     4  -7.180527

Choosing estimates for the ARMA portion of the Signal Plus Noise Model

After comparing the MLE and Burg estimates on AIC, AICC, and BIC, we determined the MLE estimates were better for all three metrics. All estimates chose an AR(6), with the exception of the MLE estimate using the BIC, which chose an AR(2). We will move forward with the AR(6) model, compare the forecasts visually, and test whether the Average Square Error (ASE) is better for the MLE or the Burg estimates. The MLE estimates have better ASEs, as expected. While there isn’t much difference visually between the two, the MLE lower limit (blue line) does contain the actual values, whereas the Burg lower limit (green line) are slightly more narrow and doesn’t capture one of the actual values. Given the other evidence for using the MLE estimates, we will move forward with MLE estimates for an AR(6).

# Fit signal plus noise models

# MLE Estimates with AIC
spn.mle.aic = aic.ar.wge(reg$res, type = 'aic', p = 0:7) # ar(6), aic -7.269686

# MLE Estimates with AICC
spn.mle.aicc = aic.ar.wge(reg$res, type = 'aicc', p = 0:7) # ar(6), aicc -6.25239

# MLE Estimates with BIC
spn.mle.bic = aic.ar.wge(reg$res, type = 'bic', p = 0:7) # ar(2), bic -7.147245

# Burg Estimates with AIC
spn.b.aic = aic.burg.wge(reg$res, p = 1:7, type = 'aic') # ar(6), aic -7.278742

# Burg Estimates with AICC
spn.b.aicc = aic.burg.wge(reg$res, p = 1:7, type = 'aicc') # ar(6), aicc -6.261446

# Burg Estimates with BIC
spn.b.bic = aic.burg.wge(reg$res, p = 1:7, type = 'bic') # ar(6), bic -7.157681

# MLE estimates are better for all types: aic, aicc, bic - see comparison table

# Fit with MLE estimates, train/test set
fit.mle.sig = fore.sigplusnoise.wge(train$Median_Sales_Price, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 30)

##   
##   
## Coefficients of AR polynomial:  
## 0.6593 0.3962 0.1492 0.0073 -0.0863 -0.1550 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9202B              1.0867               0.9202       0.0000
## 1-0.9024B              1.1082               0.9024       0.0000
## 1+0.0258B+0.4419B^2   -0.0292+-1.5041i      0.6647       0.2531
## 1+1.1375B+0.4224B^2   -1.3465+-0.7446i      0.6499       0.4196
##   
##

# Examine residuals
plot(fit.mle.sig$resid)

# ASE with MLE estimates
ASE = mean((test$Median_Sales_Price - fit.mle.sig$f)^2)
ASEexp = mean((exp(test$Median_Sales_Price) - exp(fit.mle.sig$f))^2) # 826.5M

# Fit with burg estimates, train/test set
fit.b.sig = fore.sigplusnoise.wge(train$Median_Sales_Price, linear = TRUE, method = 'burg', freq = 0, max.p = 6, n.ahead = 30)

##   
##   
## Coefficients of AR polynomial:  
## 0.6479 0.3963 0.1454 0.0081 -0.0849 -0.1570 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.8164B+0.8282B^2    1.0967+-0.0695i      0.9100       0.0101
## 1+0.0238B+0.4439B^2   -0.0268+-1.5007i      0.6663       0.2528
## 1+1.1448B+0.4270B^2   -1.3404+-0.7383i      0.6535       0.4199
##   
##

# Examine residuals
plot(fit.b.sig$resid)

# ASE with Burg estimates
ASE.b = mean((test$Median_Sales_Price - fit.b.sig$f)^2)
ASE.b.exp = mean((exp(test$Median_Sales_Price) - exp(fit.b.sig$f))^2) # 1.11B

# Create table for comparison
labels = c("aic", "aicc", "bic", "ase for AR(6)")
mle.ests = c(spn.mle.aic$value, spn.mle.aicc$value, spn.mle.bic$value, ASE)
burg.ests = c(spn.b.aic$value, spn.b.aicc$value, spn.b.bic$value, ASE.b)
comparison = data.frame(labels,mle.ests,burg.ests)
comparison

##          labels   mle.ests    burg.ests
## 1           aic -7.2038419 -7.212180512
## 2          aicc -6.1898929 -6.198231498
## 3           bic -7.0989717 -7.108212533
## 4 ase for AR(6)  0.0061569  0.008076531

# Different Plot
log.mhp = fed_housing_data$Median_Sales_Price
plot(log.mhp, type = 'l')
lines(seq(169,198,1), fit.mle.sig$f, col = "red")
lines(seq(169,198,1), fit.mle.sig$ll, col = "blue")
lines(seq(169,198,1), fit.mle.sig$ul, col = "blue")
lines(seq(169,198,1), fit.b.sig$f, col = "orange")
lines(seq(169,198,1), fit.b.sig$ll, col = "green")
lines(seq(169,198,1), fit.b.sig$ul, col = "green")

Signal Plus Noise using 1 Yr Horizon

The 1 Year Horizon backcast (for the past year) is pretty close to the actual values, with an ASE (on the non-lagged values) around 50M. The residuals appear to be white noise when looking at both the plot and the ACF, indicating this model has explained most of the noise of the data.

# Making sure log.mhp is the correct data
log.mhp = fed_housing_data$Median_Sales_Price

# Fit with MLE estimates, using all data
fit.mle.sig_h4 = fore.sigplusnoise.wge(log.mhp, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 4, lastn = TRUE)

##   
##   
## Coefficients of AR polynomial:  
## 0.7158 0.3673 0.1374 -0.0619 -0.0761 -0.1179 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.8131B+0.8234B^2    1.1010+-0.0480i      0.9074       0.0069
## 1+1.0575B+0.3859B^2   -1.3702+-0.8449i      0.6212       0.4121
## 1+0.0398B+0.3709B^2   -0.0537+-1.6410i      0.6090       0.2552
##   
##

# Different Plot
plot(log.mhp, type = 'l')
lines(seq(195,198,1), fit.mle.sig_h4$f, col = "red")
lines(seq(195,198,1), fit.mle.sig_h4$ll, lty = 3, col = "blue")
lines(seq(195,198,1), fit.mle.sig_h4$ul, lty = 3, col = "blue")

# Zoomed In Plot
plot(seq(150,198,1),log.mhp[150:198], type = 'l', ylim = c(12.3, 13.2), main = "Linear Signal with AR(6) Noise Short Term Forecast", 
     xlab = "Time", ylab = "log Median Housing Sales Price")
lines(seq(195,198,1), fit.mle.sig_h4$f, col = "red")
lines(seq(195,198,1), fit.mle.sig_h4$ll, lty = 3, col = "blue")
lines(seq(195,198,1), fit.mle.sig_h4$ul, lty = 3, col = "blue")

# ASE with MLE estimates
ASE.h4 = mean((log.mhp[195:198] - fit.mle.sig_h4$f)^2)
ASEexp.h4 = mean((exp(log.mhp[195:198]) - exp(fit.mle.sig_h4$f))^2)
ASEexp.h4 # 50.92M

## [1] 50917914

# Examine residuals
plot(fit.mle.sig_h4$resid)

plotts.sample.wge(fit.mle.sig_h4$resid)

## $xbar
## [1] 0.0005583755
## 
## $autplt
##  [1]  1.000000000 -0.030928685 -0.019285044 -0.016430741 -0.018749777
##  [6] -0.011967976 -0.020841820 -0.072191596  0.032269287  0.021997333
## [11] -0.139312067  0.034046387  0.050705330 -0.130392909 -0.017846377
## [16]  0.013239634  0.049386836 -0.032977920 -0.073065205 -0.066452453
## [21]  0.207698279 -0.082585175  0.031084812 -0.062395719 -0.017646344
## [26] -0.005338563
## 
## $freq
##  [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
##  [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
## 
## $dbz
##  [1] -2.2164539108 -1.9849206395 -1.6524595119 -1.2753123609 -0.9026883503
##  [6] -0.5697330186 -0.2973356993 -0.0947071844  0.0378192323  0.1066356847
## [11]  0.1232846159  0.1030012479  0.0628682718  0.0193305301 -0.0146466493
## [16] -0.0318985733 -0.0318047089 -0.0189600869 -0.0003052964  0.0179951969
## [21]  0.0326096059  0.0432898408  0.0513817478  0.0574210279  0.0591293252
## [26]  0.0510549508  0.0263798048 -0.0196075733 -0.0856570020 -0.1619594895
## [31] -0.2305057758 -0.2696686264 -0.2626013516 -0.2060711169 -0.1144768534
## [36] -0.0160724930  0.0565573644  0.0770660068  0.0337627161 -0.0648105292
## [41] -0.1875268450 -0.2840930541 -0.2984854839 -0.1921574926  0.0349986465
## [46]  0.3425768179  0.6678254499  0.9482788033  1.1354639949  1.1985632117
## [51]  1.1230390343  0.9085430614  0.5680860846  0.1287458457 -0.3670491750
## [56] -0.8619714586 -1.2893954797 -1.5863119898 -1.7101006697 -1.6495832635
## [61] -1.4233038891 -1.0686030283 -0.6305291543 -0.1551810623  0.3136053611
## [66]  0.7364264052  1.0801353372  1.3192114335  1.4366475909  1.4246924028
## [71]  1.2859157508  1.0348495407  0.6998155913  0.3233817577 -0.0414560620
## [76] -0.3419837227 -0.5414473463 -0.6307151786 -0.6265856294 -0.5579564343
## [81] -0.4506893185 -0.3202405118 -0.1733955011 -0.0151343742  0.1447352341
## [86]  0.2892056076  0.3969424807  0.4485239121  0.4332923725  0.3548702321
## [91]  0.2340288625  0.1070212601  0.0168377124 -0.0026877158  0.0568993323
## [96]  0.1734383141  0.3043620452  0.4046463365  0.4419284082

acf(fit.mle.sig_h4$resid)

ljung.wge(fit.mle.sig_h4$resid, p = 6, q = 0, K = 48)

## Obs -0.03092868 -0.01928504 -0.01643074 -0.01874978 -0.01196798 -0.02084182 -0.0721916 0.03226929 0.02199733 -0.1393121 0.03404639 0.05070533 -0.1303929 -0.01784638 0.01323963 0.04938684 -0.03297792 -0.07306521 -0.06645245 0.2076983 -0.08258517 0.03108481 -0.06239572 -0.01764634 -0.005338563 0.01017477 -0.03592154 0.01086301 0.07515347 -0.1090446 -0.02897116 0.1183135 -0.06710514 0.1362477 0.02578356 0.04932395 0.02279133 -0.02142985 -0.0590818 0.079141 -0.02309291 2.742465e-05 -0.0007788745 -0.01091636 0.02298131 0.07171739 0.008512692 0.03126207

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 44.69122
## 
## $df
## [1] 42
## 
## $pval
## [1] 0.3594121

Signal Plus Noise using 5 Yr Horizon

The 5 Year Horizon backcast (for the past 5 years) is pretty close to the actual values, with an ASE (on the non-lagged values) around 1.1B. The residuals appear to be white noise when looking at both the plot and the ACF, indicating this model has explained most of the noise of the data.

# Making sure log.mhp is the correct data
log.mhp = fed_housing_data$Median_Sales_Price

# Fit with MLE estimates, using all data
fit.mle.sig_h20 = fore.sigplusnoise.wge(log.mhp, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 20, lastn = TRUE)

##   
##   
## Coefficients of AR polynomial:  
## 0.7158 0.3673 0.1374 -0.0619 -0.0761 -0.1179 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.8131B+0.8234B^2    1.1010+-0.0480i      0.9074       0.0069
## 1+1.0575B+0.3859B^2   -1.3702+-0.8449i      0.6212       0.4121
## 1+0.0398B+0.3709B^2   -0.0537+-1.6410i      0.6090       0.2552
##   
##

# Different Plot
plot(log.mhp, type = 'l')
lines(seq(179,198,1), fit.mle.sig_h20$f, col = "red")
lines(seq(179,198,1), fit.mle.sig_h20$ll, lty = 3, col = "blue")
lines(seq(179,198,1), fit.mle.sig_h20$ul, lty = 3, col = "blue")

# Zoomed In
plot(seq(150,198,1),log.mhp[150:198], type = 'l', ylim = c(12.3, 13.2), main = "Linear Signal with AR(6) Noise Long Term Forecast", 
     xlab = "Time", ylab = "log Median Housing Sales Price")
lines(seq(179,198,1), fit.mle.sig_h20$f, col = "red")
lines(seq(179,198,1), fit.mle.sig_h20$ll, lty = 3, col = "blue")
lines(seq(179,198,1), fit.mle.sig_h20$ul, lty = 3, col = "blue")

# ASE with MLE estimates
ASE.h20 = mean((log.mhp[179:198] - fit.mle.sig_h20$f)^2)
ASEexp.h20 = mean((exp(log.mhp[179:198]) - exp(fit.mle.sig_h20$f))^2)
ASEexp.h20 # 1.1B

## [1] 1104897278

# Examine residuals
plot(fit.mle.sig_h20$resid)

plotts.sample.wge(fit.mle.sig_h20$resid)

## $xbar
## [1] 0.0005583755
## 
## $autplt
##  [1]  1.000000000 -0.030928685 -0.019285044 -0.016430741 -0.018749777
##  [6] -0.011967976 -0.020841820 -0.072191596  0.032269287  0.021997333
## [11] -0.139312067  0.034046387  0.050705330 -0.130392909 -0.017846377
## [16]  0.013239634  0.049386836 -0.032977920 -0.073065205 -0.066452453
## [21]  0.207698279 -0.082585175  0.031084812 -0.062395719 -0.017646344
## [26] -0.005338563
## 
## $freq
##  [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
##  [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
## 
## $dbz
##  [1] -2.2164539108 -1.9849206395 -1.6524595119 -1.2753123609 -0.9026883503
##  [6] -0.5697330186 -0.2973356993 -0.0947071844  0.0378192323  0.1066356847
## [11]  0.1232846159  0.1030012479  0.0628682718  0.0193305301 -0.0146466493
## [16] -0.0318985733 -0.0318047089 -0.0189600869 -0.0003052964  0.0179951969
## [21]  0.0326096059  0.0432898408  0.0513817478  0.0574210279  0.0591293252
## [26]  0.0510549508  0.0263798048 -0.0196075733 -0.0856570020 -0.1619594895
## [31] -0.2305057758 -0.2696686264 -0.2626013516 -0.2060711169 -0.1144768534
## [36] -0.0160724930  0.0565573644  0.0770660068  0.0337627161 -0.0648105292
## [41] -0.1875268450 -0.2840930541 -0.2984854839 -0.1921574926  0.0349986465
## [46]  0.3425768179  0.6678254499  0.9482788033  1.1354639949  1.1985632117
## [51]  1.1230390343  0.9085430614  0.5680860846  0.1287458457 -0.3670491750
## [56] -0.8619714586 -1.2893954797 -1.5863119898 -1.7101006697 -1.6495832635
## [61] -1.4233038891 -1.0686030283 -0.6305291543 -0.1551810623  0.3136053611
## [66]  0.7364264052  1.0801353372  1.3192114335  1.4366475909  1.4246924028
## [71]  1.2859157508  1.0348495407  0.6998155913  0.3233817577 -0.0414560620
## [76] -0.3419837227 -0.5414473463 -0.6307151786 -0.6265856294 -0.5579564343
## [81] -0.4506893185 -0.3202405118 -0.1733955011 -0.0151343742  0.1447352341
## [86]  0.2892056076  0.3969424807  0.4485239121  0.4332923725  0.3548702321
## [91]  0.2340288625  0.1070212601  0.0168377124 -0.0026877158  0.0568993323
## [96]  0.1734383141  0.3043620452  0.4046463365  0.4419284082

acf(fit.mle.sig_h20$resid)

ljung.wge(fit.mle.sig_h20$resid, p = 6, q = 0, K = 48)

## Obs -0.03092868 -0.01928504 -0.01643074 -0.01874978 -0.01196798 -0.02084182 -0.0721916 0.03226929 0.02199733 -0.1393121 0.03404639 0.05070533 -0.1303929 -0.01784638 0.01323963 0.04938684 -0.03297792 -0.07306521 -0.06645245 0.2076983 -0.08258517 0.03108481 -0.06239572 -0.01764634 -0.005338563 0.01017477 -0.03592154 0.01086301 0.07515347 -0.1090446 -0.02897116 0.1183135 -0.06710514 0.1362477 0.02578356 0.04932395 0.02279133 -0.02142985 -0.0590818 0.079141 -0.02309291 2.742465e-05 -0.0007788745 -0.01091636 0.02298131 0.07171739 0.008512692 0.03126207

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 44.69122
## 
## $df
## [1] 42
## 
## $pval
## [1] 0.3594121

Rolling Window RMSE for SPN

To compare this model with other models, we will use code that Aaron has written to create a rolling window that measures RMSE for the model. We will use the MLE estimates for p values of 1-6 to assure ourselves that an AR(6) is the best AR model to accompany the signal. The average RMSE for all 1 year windows (4 quarters) is 0.0326, and the average RMSE for all 5 year windows (20 quarters) is 0.07595.

# Aaron's Rolling Window RMSE Code

# If you wanted to use all the fore.sigplusnoise.wge parameters, it would look something like this:
series = log.mhp
horizon = 12
linear = TRUE
method = "mle"
freq=0
max.p=5

# Rolling Window for 1 Year Horizon with MLE
horizon = 4
method = "mle"
source("functions_Aaron.R")
mle.p1h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=1) # 0.03564353

mle.p2h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=2) # 0.03547613

mle.p3h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=3) # 0.03495764

mle.p4h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=4) # 0.03326752

mle.p5h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=5) # 0.03261551

mle.p6h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=6) # 0.03259507
mle.p7h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=7) # 0.03259507

# Rolling Window for 5 Year Horizon  with MLE
horizon = 20
source("functions_Aaron.R")
mle.p1h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=1) # 0.08107048

mle.p2h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=2) # 0.08065467

mle.p3h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=3) # 0.07973718

mle.p4h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=4) # 0.07797091

mle.p5h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=5) # 0.07669397

mle.p6h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=6) # 0.0759512
mle.p7h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=7) # 0.0759512

Evaluating Models

We will evaluate the model by comparing the ACFs and the Spectral Densities for the same model on generated signal plus noise data. The loop given in class for Unit 11 does not work with signal plus noise generated realizations because the output includes the AR portion of the model regardless of whether plot = TRUE or FALSE. Thus, we will generate 10 different realizations and then take the ACFs and Spectral Densities of all 10 objects. The

# Compare Spectral Densities
sims = 10
SpecDen = parzen.wge(log.mhp, plot = FALSE)
plot(SpecDen$freq,SpecDen$pzgram, type = "l", lwd = 3,xlab="Frequency",ylab="dB",main="True Spectral Density vs Generated Data")

for( i in 1: sims)
{
   SpecDen2 = parzen.wge(ten.generated[[i]], plot = FALSE)
   lines(SpecDen2$freq,SpecDen2$pzgram, lwd = 2, col = "red")
}

#Compare ACFs
sims = 10
ACF = acf(log.mhp, plot = "FALSE")
plot(ACF$lag ,ACF$acf , type = "l", lwd = 4,xlab="Lag",ylab="ACF",main="True ACF vs Generated Data")

for( i in 1: sims)
{
   ACF2 = acf(ten.generated[[i]], plot = "FALSE")
   lines(ACF2$lag ,ACF2$acf, lwd = 1, col = "red")
}

# Residuals
plot(fit.mle.sig$resid, xlab="Time", ylab="", main="Residuals",type="b",col = "black", pch = 19)

acf(fit.mle.sig$resid,main="ACF of Residuals",lag.max=100)

Model Choice

The MLE estimates for forecasting had better ASE and better rolling window RMSE for all three forecasts (1 yr, 3 yr, and 5 yr)

Our final Signal Plus Noise Model is Xt = 10.871 + .011t + zt, where (1 - .716B - .367B2 - 0.137B3 + 0.062B4 + .076B5 + .118B6)Zt with sigma2 = 0.0006929526

Wording:

In 1 year, we are 95% confident that the median home sale price will be between $387,759 (e^12.86814) and $468,321 (e^13.05691). Our best estimate is $426,142 (e^12.96253).

In 5 years, we are 95% confident that the median home sale price will be between $419,048 (e^12.94574) and $669,482 (e^13.41426). Our best estimate is $529,665 (e^13.18).

log.mhp = fed_housing_data$Median_Sales_Price
x = fed_housing_data$Median_Sales_Price
l = length(log.mhp)
h.short = 4
h.long = 20

# Fit with MLE estimates, using all data, forecasting ahead 1 year
fit.mle.sig_h4_ahead = fore.sigplusnoise.wge(log.mhp, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 4, lastn = FALSE)

##   
##   
## Coefficients of AR polynomial:  
## 0.7158 0.3673 0.1374 -0.0619 -0.0761 -0.1179 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.8131B+0.8234B^2    1.1010+-0.0480i      0.9074       0.0069
## 1+1.0575B+0.3859B^2   -1.3702+-0.8449i      0.6212       0.4121
## 1+0.0398B+0.3709B^2   -0.0537+-1.6410i      0.6090       0.2552
##   
##

# Fit with MLE estimates, using all data, forecasting ahead 5 years
fit.mle.sig_h20_ahead = fore.sigplusnoise.wge(log.mhp, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 20, lastn = FALSE)

##   
##   
## Coefficients of AR polynomial:  
## 0.7158 0.3673 0.1374 -0.0619 -0.0761 -0.1179 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.8131B+0.8234B^2    1.1010+-0.0480i      0.9074       0.0069
## 1+1.0575B+0.3859B^2   -1.3702+-0.8449i      0.6212       0.4121
## 1+0.0398B+0.3709B^2   -0.0537+-1.6410i      0.6090       0.2552
##   
##

# Checking if models all have same intercept, slope, and phis
fit.mle.sig_h4$b0hat == fit.mle.sig_h4_ahead$b0hat

## (Intercept) 
##        TRUE

fit.mle.sig_h4$b1hat == fit.mle.sig_h4_ahead$b1hat

## time 
## TRUE

fit.mle.sig_h4$phi.z == fit.mle.sig_h4_ahead$phi.z

## [1] TRUE TRUE TRUE TRUE TRUE TRUE

# Model Coefficients
fit.mle.sig_h4$phi.z

## [1]  0.71575088  0.36727152  0.13737786 -0.06190173 -0.07612962 -0.11786143

fit.mle.sig_h4$b0hat

## (Intercept) 
##    10.87039

fit.mle.sig_h4$b1hat

##       time 
## 0.01084782

fit.mle.sig_h4$wnv

## [1] 0.0006929526

# Confidence Intervals

# Find forecasted Values
fit.mle.sig_h4_ahead$f[4] # 12.96253 ($426,142.89)

## [1] 12.96253

fit.mle.sig_h4_ahead$ll[4] # 12.86814 ($387,759.27)

## [1] 12.86814

fit.mle.sig_h4_ahead$ul[4] # 13.05691 ($468,321.36)

## [1] 13.05691

fit.mle.sig_h20_ahead$f[20] # 13.18 ($529,665.00)

## [1] 13.18

fit.mle.sig_h20_ahead$ll[20] # 12.94574 ($419,047.69)

## [1] 12.94574

fit.mle.sig_h20_ahead$ul[20] # 13.41426 ($669,482.30)

## [1] 13.41426

# Forecasting next year (Zoomed In)
plot(seq(xmin_plot,l+h.short,1),x[xmin_plot:(l+h.short)],type="l",col="black",
     xlab="Time", ylab = "log Median Housing Sales Price",
     main="Linear Signal with AR(6) Noise Short Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.short),1),fit.mle.sig_h4_ahead$f,col="red")
lines(seq((l+1),(l+h.short),1),fit.mle.sig_h4_ahead$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l+1),(l+h.short),1),fit.mle.sig_h4_ahead$ul,col="blue",lty=3)

# Forecasting next 5 years (Zoomed In)
plot(seq(xmin_plot,l+h.long,1),x[xmin_plot:(l+h.long)],type="l",col="black",
     xlab="Time", ylab = "log Median Housing Sales Price",
     main="Linear Signal with AR(6) Noise Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),fit.mle.sig_h20_ahead$f,col="red")
lines(seq((l+1),(l+h.long),1),fit.mle.sig_h20_ahead$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l+1),(l+h.long),1),fit.mle.sig_h20_ahead$ul,col="blue",lty=3)

Multi-variate Models

We will try out an MLR model with lag and a VAR model that predicts all variables at the same time. The exogenous predictions we made previously will be used for the MLR Model.

MLR Model

We will first start by looking at the lag plots for the four exogenous variables.

x = fed_housing_data
l = ccf(x$Median_Sales_Price,x$Ownership_Rate,lag.max=80)

which.max(l$acf)

## [1] 131

l$acf[20] # 0.3090685

## [1] -0.03980345

l$lag[20] # 0

## [1] -61

l = ccf(x$Median_Sales_Price,x$Housing_Units_Completed,lag.max=80)

which.max(l$acf)

## [1] 150

l$acf[150] # 0.263895

## [1] 0.263895

l$lag[150] # 69, 21 seemed alright

## [1] 69

ccf(x$Median_Sales_Price,x$Supply_New_Houses,main="Median Sales Price & Supply of New Houses")

l = ccf(x$Median_Sales_Price,x$Supply_New_Houses,lag.max=60)

which.min(l$acf)

## [1] 70

l$acf[70] # -0.2592145

## [1] -0.2592145

l$lag[70] # 9

## [1] 9

l = ccf(x$Median_Sales_Price,x$Housing_Price_Index,lag.max=60)

which.max(l$acf)

## [1] 61

l$acf[61] # 0.933195

## [1] 0.933195

l$lag[61] # 0

## [1] 0

It looks like Ownership Rate has a lag of 0, Housing Units Completed has a lag of 21, Supply of New Houses has a lag of 9, and Housing Price Index has a lag of 0.

What does a model with these 4 lagged variables look like.

x.short = fed_housing_data_short
x.long = fed_housing_data_long
x$Year_Quarter = c()
x.short$Year_Quarter = c()
x.long$Year_Quarter = c()
l = length(x$Median_Sales_Price)
t=1:l
t.train.short= 1:(l-h.short)
t.test.short=(l-h.short+1):l
t.train.long= 1:(l-h.long)
t.test.long=(l-h.long+1):l
x$Housing_Units_Completed_l21 = dplyr::lag(x$Housing_Units_Completed,21)
x$Supply_New_Houses_l9 = dplyr::lag(x$Supply_New_Houses,9)
x.short$Housing_Units_Completed_l21 = dplyr::lag(x.short$Housing_Units_Completed,21)
x.short$Supply_New_Houses_l9 = dplyr::lag(x.short$Supply_New_Houses,9)
x.long$Housing_Units_Completed_l21 = dplyr::lag(x.long$Housing_Units_Completed,21)
x.long$Supply_New_Houses_l9 = dplyr::lag(x.long$Supply_New_Houses,9)
ksfit = lm(x$Median_Sales_Price~x$Ownership_Rate+x$Housing_Units_Completed_l21+x$Supply_New_Houses_l9+x$Housing_Price_Index+t)
summary(ksfit)

## 
## Call:
## lm(formula = x$Median_Sales_Price ~ x$Ownership_Rate + x$Housing_Units_Completed_l21 + 
##     x$Supply_New_Houses_l9 + x$Housing_Price_Index + t)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.161704 -0.039905  0.000082  0.037073  0.169569 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    1.121e+01  2.627e-01  42.655  < 2e-16 ***
## x$Ownership_Rate              -2.029e-03  4.165e-03  -0.487  0.62676    
## x$Housing_Units_Completed_l21  5.980e-05  2.174e-05   2.751  0.00658 ** 
## x$Supply_New_Houses_l9        -2.926e-02  3.955e-03  -7.400 5.88e-12 ***
## x$Housing_Price_Index          2.695e-04  1.058e-04   2.548  0.01172 *  
## t                              9.371e-03  2.996e-04  31.282  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06558 on 171 degrees of freedom
##   (21 observations deleted due to missingness)
## Multiple R-squared:  0.9847, Adjusted R-squared:  0.9842 
## F-statistic:  2195 on 5 and 171 DF,  p-value: < 2.2e-16

AIC(ksfit) # -454.2937

## [1] -454.2937

The Ownership rate variable didn’t have a very high p value, so it wasn’t significant in the model. Let’s see what happens when it is removed.

ksfit = lm(x$Median_Sales_Price~x$Housing_Units_Completed_l21+x$Supply_New_Houses_l9+x$Housing_Price_Index+t)
summary(ksfit)

## 
## Call:
## lm(formula = x$Median_Sales_Price ~ x$Housing_Units_Completed_l21 + 
##     x$Supply_New_Houses_l9 + x$Housing_Price_Index + t)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.162411 -0.041527 -0.001268  0.037562  0.172413 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    1.108e+01  3.039e-02 364.541  < 2e-16 ***
## x$Housing_Units_Completed_l21  5.310e-05  1.679e-05   3.163  0.00185 ** 
## x$Supply_New_Houses_l9        -2.821e-02  3.303e-03  -8.541 6.84e-15 ***
## x$Housing_Price_Index          2.695e-04  1.055e-04   2.554  0.01151 *  
## t                              9.341e-03  2.925e-04  31.935  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06543 on 172 degrees of freedom
##   (21 observations deleted due to missingness)
## Multiple R-squared:  0.9846, Adjusted R-squared:  0.9843 
## F-statistic:  2756 on 4 and 172 DF,  p-value: < 2.2e-16

AIC(ksfit) # -456.0482

## [1] -456.0482

In this model, all the variables are significant. The AIC value is slightly lower as well. We will now determine how to model the residuals using an ARMA model.

aic5.wge(ksfit$residuals,p=0:4,q=0:2,type='aic') # best 2/0

## ---------WORKING... PLEASE WAIT... 
## 
## 
## Five Smallest Values of  aic 
##     p    q        aic
##     2    0  -6.610677
##     1    1  -6.608485
##     3    0  -6.599501
##     2    1  -6.599466
##     1    2  -6.595973

The model with the best AIC is an AR(2).

fit=arima(x.short$Median_Sales_Price[t.train.short],order=c(2,0,0),xreg=cbind(t.train.short,x.short$Housing_Units_Completed_l21[t.train.short],x.short$Supply_New_Houses_l9[t.train.short],x.short$Housing_Price_Index[t.train.short]))
preds = predict(fit,newxreg = data.frame(t=t.test.short,Housing_Units_Completed_l21=x.short$Housing_Units_Completed_l21[t.test.short],Supply_New_Houses_l9=x.short$Supply_New_Houses_l9[t.test.short],Housing_Price_Index=x.short$Housing_Price_Index[t.test.short]))
ase = mean((fed_housing_data_NL$Median_Sales_Price[t.test.short]-exp(preds$pred))^2)/1e6
ase # 588.675

## [1] 588.675

plot(seq(1,l,1),x.short$Median_Sales_Price,type="b")
points(seq((l-h.short+1),l,1),preds$pred,type="b",pch=15,col="blue")

fit=arima(x.long$Median_Sales_Price[t.train.long],order=c(2,0,0),xreg=cbind(t.train.long,x$Housing_Units_Completed_l21[t.train.long],x$Supply_New_Houses_l9[t.train.long],x$Housing_Price_Index[t.train.long]))
preds = predict(fit,newxreg = data.frame(t=t.test.short,Housing_Units_Completed_l21=x.long$Housing_Units_Completed_l21[t.test.long],Supply_New_Houses_l9=x.long$Supply_New_Houses_l9[t.test.long],Housing_Price_Index=x.long$Housing_Price_Index[t.test.long]))
ase = mean((fed_housing_data_NL$Median_Sales_Price[t.test.long]-exp(preds$pred))^2)/1e6
ase # 1431.873

## [1] 1431.873

plot(seq(1,l,1),x.long$Median_Sales_Price,type="b")
points(seq((l-h.long+1),l,1),preds$pred,type="b",pch=15,col="blue")

fit=arima(x.short$Median_Sales_Price[t.train.short],order=c(2,0,0),xreg=cbind(t.train.short,x.short$Housing_Units_Completed_l21[t.train.short],x.short$Supply_New_Houses_l9[t.train.short],x.short$Housing_Price_Index[t.train.short]))
preds = predict(fit,newxreg = data.frame(t=t.test.short,Housing_Units_Completed_l21=x.short$Housing_Units_Completed_l21[t.test.short],Supply_New_Houses_l9=x.short$Supply_New_Houses_l9[t.test.short],Housing_Price_Index=x.short$Housing_Price_Index[t.test.short]))
plot(seq(xmin_plot,l,1),x.short$Median_Sales_Price[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.short+1),l,1),preds$pred,col="red")

fit=arima(x.long$Median_Sales_Price[t.train.long],order=c(2,0,0),xreg=cbind(t.train.long,x$Housing_Units_Completed_l21[t.train.long],x$Supply_New_Houses_l9[t.train.long],x$Housing_Price_Index[t.train.long]))
preds = predict(fit,newxreg = data.frame(t=t.test.short,Housing_Units_Completed_l21=x.long$Housing_Units_Completed_l21[t.test.long],Supply_New_Houses_l9=x.long$Supply_New_Houses_l9[t.test.long],Housing_Price_Index=x.long$Housing_Price_Index[t.test.long]))
plot(seq(xmin_plot,l,1),x.long$Median_Sales_Price[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.long+1),l,1),preds$pred,col="red")

The short term ASE is 589 M and the long term ASE is 1.4 B.

In order to forecast out past the current date, we had to make forecasts for the exogenous variables as well.

x = fed_housing_data
x$Year_Quarter = c()
fore_df <- as.data.frame(matrix(0, nrow = h.long, ncol = ncol(x)))
colnames(fore_df) <- colnames(x)

# Ownership Rate
x = fed_housing_data$Ownership_Rate 
est = est.arma.wge(x,p=9,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 1.6768 -0.8806 0.4076 -0.1012 -0.3757 0.4520 -0.2987 0.3697 -0.2608 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.9534B+0.9571B^2    1.0204+-0.0589i      0.9783       0.0092
## 1-1.3559B+0.7452B^2    0.9098+-0.7171i      0.8632       0.1062
## 1+0.8617B             -1.1605               0.8617       0.5000
## 1-0.0469B+0.7004B^2    0.0335+-1.1944i      0.8369       0.2455
## 1+0.8177B+0.6059B^2   -0.6748+-1.0932i      0.7784       0.3380
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.7795 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.7795B              1.2828               0.7795       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=FALSE,plot=FALSE)

## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6

fore_df$Ownership_Rate[1:h.long] = f$f

# Housing Units Complete
x = fed_housing_data$Housing_Units_Completed  
est = est.arma.wge(x,p=9,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 1.4066 -0.2813 -0.0540 -0.1068 0.0604 -0.0496 0.1985 -0.4289 0.2277 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.7319B+0.7686B^2    1.1267+-0.1778i      0.8767       0.0249
## 1-0.8710B              1.1481               0.8710       0.0000
## 1+1.5502B+0.7380B^2   -1.0502+-0.5020i      0.8591       0.4290
## 1+0.5020B+0.6999B^2   -0.3586+-1.1402i      0.8366       0.2985
## 1-0.8558B+0.6584B^2    0.6499+-1.0471i      0.8114       0.1616
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.5043 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.5043B              1.9829               0.5043       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=FALSE,plot=FALSE)

## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659

fore_df$Housing_Units_Completed[1:h.long] = f$f

# Housing Price Index
x = fed_housing_data$Housing_Price_Index 
d = artrans.wge(x,1)

d2 = artrans.wge(d,1)

d3 = artrans.wge(d2,c(0,-1))

dev.off()

## null device 
##           1

est = est.arma.wge(d3,p=6,q=5)

##   
##   
## Coefficients of AR polynomial:  
## -0.8116 0.2231 0.5831 0.5250 0.1352 -0.2199 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.7230B+0.8346B^2   -1.0323+-0.3642i      0.9136       0.4460
## 1-0.8558B              1.1685               0.8558       0.0000
## 1+0.4123B+0.6580B^2   -0.3132+-1.1923i      0.8112       0.2909
## 1-0.4679B              2.1373               0.4679       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## -0.6419 -0.0609 0.3220 0.7473 0.6335 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.0000B              1.0000               1.0000       0.0000
## 1+0.0687B+0.8449B^2   -0.0407+-1.0872i      0.9192       0.2560
## 1+1.5732B+0.7497B^2   -1.0492+-0.4827i      0.8659       0.4314
##   
##

m = mult.wge(fac1=est$phi,fac2=c(0,-1))
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.long,lastn=FALSE, plot=FALSE)

## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31

fore_df$Housing_Price_Index[1:h.long] = f$f

# Supply of New Houses
x = fed_housing_data$Supply_New_Houses   
est = est.arma.wge(x,p=1,q=1)

##   
##   
## Coefficients of AR polynomial:  
## 0.8930 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.8930B              1.1198               0.8930       0.0000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## 0.1287 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.1287B              7.7707               0.1287       0.0000
##   
##

f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=FALSE, plot=FALSE)

## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6

fore_df$Supply_New_Houses[1:h.long] = f$f

# Forecast df
x = fed_housing_data
x$Year_Quarter = c()
fed_housing_data_forecast <- rbind(x, fore_df)

x = fed_housing_data_forecast
x$Housing_Units_Completed_l21 = dplyr::lag(x$Housing_Units_Completed,21)
x$Supply_New_Houses_l9 = dplyr::lag(x$Supply_New_Houses,9)
t=1:l
t.fore.short = (l+1):(l+h.short)
t.fore.long = (l+1):(l+h.long)
fit=arima(x$Median_Sales_Price[t],order=c(2,0,0),xreg=cbind(t,x$Housing_Units_Completed_l21[t],x$Supply_New_Houses_l9[t],x$Housing_Price_Index[t]))
preds = predict(fit,newxreg = data.frame(t=t.fore.short,Housing_Units_Completed_l21=x$Housing_Units_Completed_l21[t.fore.short],Supply_New_Houses_l9=x$Supply_New_Houses_l9[t.fore.short],Housing_Price_Index=x$Housing_Price_Index[t.fore.short]))
plot(seq(xmin_plot,l+h.short,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.short)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Short Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.short),1),preds$pred,col="red")

preds = predict(fit,newxreg = data.frame(t=t.fore.long,Housing_Units_Completed_l21=x$Housing_Units_Completed_l21[t.fore.long],Supply_New_Houses_l9=x$Supply_New_Houses_l9[t.fore.long],Housing_Price_Index=x$Housing_Price_Index[t.fore.long]))
plot(seq(xmin_plot,l+h.long,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.long)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),preds$pred,col="red")

The MLR forecasts continued to increase with time.

VAR Model

x = fed_housing_data
x$Year_Quarter = c()
VARselect(x,lag.max=16,type="both",season=NULL,exogen=NULL) # lag = 4

## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      4      2      1      4 
## 
## $criteria
##                1        2        3        4        5        6        7        8
## AIC(n)  2.330754 1.933777 1.831970 1.690855 1.754028 1.829078 1.939386 1.901505
## HQ(n)   2.580534 2.361972 2.438579 2.475879 2.717466 2.970931 3.259654 3.400187
## SC(n)   2.946909 2.990043 3.328346 3.627343 4.130626 4.645787 5.196206 5.598435
## FPE(n) 10.287646 6.922207 6.263258 5.456431 5.841904 6.344193 7.157132 6.985458
##               9       10       11       12       13       14       15        16
## AIC(n) 1.737782 1.757358 1.775183 1.828460 1.877812 1.886325 1.936906  1.981409
## HQ(n)  3.414879 3.612869 3.809109 4.040800 4.268567 4.455494 4.684489  4.907407
## SC(n)  5.874824 6.334511 6.792446 7.285834 7.775297 8.223921 8.714612  9.199226
## FPE(n) 6.035151 6.291181 6.580194 7.172464 7.838893 8.285860 9.211707 10.275979

The model with lag = 4 was chosen.

fit = VAR(x,p=4,type='both') 
summary(fit) # trend and const were significant, but only lag up to 2 for variable of interest, huc not very predictive

## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: Ownership_Rate, Housing_Units_Completed, Supply_New_Houses, Housing_Price_Index, Median_Sales_Price 
## Deterministic variables: both 
## Sample size: 194 
## Log Likelihood: -1409.069 
## Roots of the characteristic polynomial:
## 1.019 0.9661 0.944 0.944 0.8338 0.8338 0.827 0.827 0.6592 0.6592 0.5734 0.5734 0.5261 0.5261 0.4336 0.4336 0.3419 0.3419 0.3146 0.09555
## Call:
## VAR(y = x, p = 4, type = "both")
## 
## 
## Estimation results for equation Ownership_Rate: 
## =============================================== 
## Ownership_Rate = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend 
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## Ownership_Rate.l1           0.8802438  0.0772448  11.396   <2e-16 ***
## Housing_Units_Completed.l1  0.0003522  0.0003466   1.016   0.3110    
## Supply_New_Houses.l1        0.0178765  0.0320110   0.558   0.5773    
## Housing_Price_Index.l1      0.0006370  0.0080791   0.079   0.9372    
## Median_Sales_Price.l1      -1.2365449  1.1566036  -1.069   0.2865    
## Ownership_Rate.l2          -0.1675348  0.1006641  -1.664   0.0979 .  
## Housing_Units_Completed.l2  0.0001776  0.0004052   0.438   0.6617    
## Supply_New_Houses.l2       -0.0620023  0.0390373  -1.588   0.1141    
## Housing_Price_Index.l2      0.0013218  0.0153120   0.086   0.9313    
## Median_Sales_Price.l2       0.7609832  1.2429600   0.612   0.5412    
## Ownership_Rate.l3           0.2373131  0.0992393   2.391   0.0179 *  
## Housing_Units_Completed.l3 -0.0002194  0.0003990  -0.550   0.5831    
## Supply_New_Houses.l3        0.0633595  0.0399637   1.585   0.1147    
## Housing_Price_Index.l3     -0.0003650  0.0158064  -0.023   0.9816    
## Median_Sales_Price.l3       0.6246737  1.2399144   0.504   0.6150    
## Ownership_Rate.l4           0.0110775  0.0763513   0.145   0.8848    
## Housing_Units_Completed.l4  0.0001236  0.0003175   0.389   0.6976    
## Supply_New_Houses.l4       -0.0003583  0.0358984  -0.010   0.9920    
## Housing_Price_Index.l4     -0.0034601  0.0096862  -0.357   0.7214    
## Median_Sales_Price.l4      -0.4423237  1.0989918  -0.402   0.6878    
## const                       4.9666621  3.7849625   1.312   0.1912    
## trend                       0.0084872  0.0054763   1.550   0.1230    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.3583 on 172 degrees of freedom
## Multiple R-Squared: 0.9552,  Adjusted R-squared: 0.9497 
## F-statistic: 174.7 on 21 and 172 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation Housing_Units_Completed: 
## ======================================================== 
## Housing_Units_Completed = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend 
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## Ownership_Rate.l1             1.21161   16.47348   0.074  0.94145    
## Housing_Units_Completed.l1    0.62130    0.07391   8.406 1.55e-14 ***
## Supply_New_Houses.l1        -12.72643    6.82677  -1.864  0.06400 .  
## Housing_Price_Index.l1       -1.27929    1.72297  -0.742  0.45880    
## Median_Sales_Price.l1        -3.72119  246.66115  -0.015  0.98798    
## Ownership_Rate.l2            16.99057   21.46795   0.791  0.42978    
## Housing_Units_Completed.l2    0.19786    0.08642   2.290  0.02326 *  
## Supply_New_Houses.l2        -33.79267    8.32521  -4.059 7.47e-05 ***
## Housing_Price_Index.l2       -0.63782    3.26549  -0.195  0.84537    
## Median_Sales_Price.l2      -415.41469  265.07780  -1.567  0.11892    
## Ownership_Rate.l3           -20.99121   21.16410  -0.992  0.32267    
## Housing_Units_Completed.l3    0.11180    0.08509   1.314  0.19063    
## Supply_New_Houses.l3         -9.99171    8.52278  -1.172  0.24268    
## Housing_Price_Index.l3        8.58651    3.37092   2.547  0.01173 *  
## Median_Sales_Price.l3        33.54969  264.42828   0.127  0.89919    
## Ownership_Rate.l4            -0.91091   16.28293  -0.056  0.95545    
## Housing_Units_Completed.l4   -0.08470    0.06771  -1.251  0.21267    
## Supply_New_Houses.l4         14.11716    7.65581   1.844  0.06691 .  
## Housing_Price_Index.l4       -5.63593    2.06570  -2.728  0.00703 ** 
## Median_Sales_Price.l4       309.08091  234.37467   1.319  0.18901    
## const                      1582.57402  807.19375   1.961  0.05154 .  
## trend                        -2.14430    1.16790  -1.836  0.06808 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 76.4 on 172 degrees of freedom
## Multiple R-Squared: 0.9572,  Adjusted R-squared: 0.9519 
## F-statistic: 183.1 on 21 and 172 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation Supply_New_Houses: 
## ================================================== 
## Supply_New_Houses = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend 
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## Ownership_Rate.l1          -0.2024886  0.1845156  -1.097   0.2740    
## Housing_Units_Completed.l1 -0.0004697  0.0008279  -0.567   0.5712    
## Supply_New_Houses.l1        0.6855507  0.0764650   8.966 5.07e-16 ***
## Housing_Price_Index.l1      0.0214770  0.0192986   1.113   0.2673    
## Median_Sales_Price.l1       1.5817461  2.7627931   0.573   0.5677    
## Ownership_Rate.l2          -0.1408463  0.2404574  -0.586   0.5588    
## Housing_Units_Completed.l2 -0.0009644  0.0009679  -0.996   0.3205    
## Supply_New_Houses.l2        0.1186182  0.0932488   1.272   0.2051    
## Housing_Price_Index.l2     -0.0333143  0.0365760  -0.911   0.3637    
## Median_Sales_Price.l2       0.6123029  2.9690737   0.206   0.8369    
## Ownership_Rate.l3           0.3641744  0.2370540   1.536   0.1263    
## Housing_Units_Completed.l3 -0.0003113  0.0009531  -0.327   0.7443    
## Supply_New_Houses.l3        0.0635468  0.0954617   0.666   0.5065    
## Housing_Price_Index.l3     -0.0060500  0.0377569  -0.160   0.8729    
## Median_Sales_Price.l3       3.2160054  2.9617986   1.086   0.2791    
## Ownership_Rate.l4          -0.0840977  0.1823813  -0.461   0.6453    
## Housing_Units_Completed.l4  0.0016403  0.0007584   2.163   0.0319 *  
## Supply_New_Houses.l4       -0.1508597  0.0857509  -1.759   0.0803 .  
## Housing_Price_Index.l4      0.0254085  0.0231374   1.098   0.2737    
## Median_Sales_Price.l4      -4.5626989  2.6251752  -1.738   0.0840 .  
## const                      -3.4228974  9.0411862  -0.379   0.7055    
## trend                      -0.0259743  0.0130814  -1.986   0.0487 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.8558 on 172 degrees of freedom
## Multiple R-Squared: 0.7811,  Adjusted R-squared: 0.7543 
## F-statistic: 29.22 on 21 and 172 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation Housing_Price_Index: 
## ==================================================== 
## Housing_Price_Index = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend 
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## Ownership_Rate.l1           -0.985346   0.650593  -1.515  0.13173    
## Housing_Units_Completed.l1   0.001248   0.002919   0.427  0.66957    
## Supply_New_Houses.l1        -0.756803   0.269612  -2.807  0.00558 ** 
## Housing_Price_Index.l1       1.679492   0.068046  24.682  < 2e-16 ***
## Median_Sales_Price.l1        3.452220   9.741473   0.354  0.72349    
## Ownership_Rate.l2           -0.287772   0.847841  -0.339  0.73471    
## Housing_Units_Completed.l2  -0.004677   0.003413  -1.370  0.17234    
## Supply_New_Houses.l2         0.036430   0.328791   0.111  0.91190    
## Housing_Price_Index.l2      -1.206842   0.128965  -9.358  < 2e-16 ***
## Median_Sales_Price.l2       10.364575  10.468808   0.990  0.32354    
## Ownership_Rate.l3           -0.036252   0.835841  -0.043  0.96546    
## Housing_Units_Completed.l3   0.004608   0.003361   1.371  0.17211    
## Supply_New_Houses.l3        -0.082369   0.336593  -0.245  0.80697    
## Housing_Price_Index.l3       1.032985   0.133129   7.759 7.24e-13 ***
## Median_Sales_Price.l3      -14.997317  10.443156  -1.436  0.15279    
## Ownership_Rate.l4            0.967596   0.643067   1.505  0.13425    
## Housing_Units_Completed.l4   0.001041   0.002674   0.389  0.69745    
## Supply_New_Houses.l4         0.115456   0.302354   0.382  0.70304    
## Housing_Price_Index.l4      -0.495141   0.081581  -6.069 7.97e-09 ***
## Median_Sales_Price.l4       -5.006937   9.256239  -0.541  0.58926    
## const                       89.325044  31.878778   2.802  0.00566 ** 
## trend                        0.063292   0.046124   1.372  0.17179    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 3.017 on 172 degrees of freedom
## Multiple R-Squared: 0.9996,  Adjusted R-squared: 0.9996 
## F-statistic: 2.097e+04 on 21 and 172 DF,  p-value: < 2.2e-16 
## 
## 
## Estimation results for equation Median_Sales_Price: 
## =================================================== 
## Median_Sales_Price = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend 
## 
##                              Estimate Std. Error t value Pr(>|t|)    
## Ownership_Rate.l1          -6.003e-03  5.113e-03  -1.174 0.241960    
## Housing_Units_Completed.l1  1.114e-05  2.294e-05   0.486 0.627737    
## Supply_New_Houses.l1       -7.269e-03  2.119e-03  -3.431 0.000753 ***
## Housing_Price_Index.l1      1.695e-03  5.347e-04   3.170 0.001806 ** 
## Median_Sales_Price.l1       4.816e-01  7.655e-02   6.291 2.52e-09 ***
## Ownership_Rate.l2           5.273e-03  6.663e-03   0.791 0.429799    
## Housing_Units_Completed.l2 -2.239e-05  2.682e-05  -0.835 0.404910    
## Supply_New_Houses.l2        4.012e-03  2.584e-03   1.553 0.122329    
## Housing_Price_Index.l2     -3.055e-04  1.013e-03  -0.301 0.763406    
## Median_Sales_Price.l2       3.289e-01  8.227e-02   3.998 9.46e-05 ***
## Ownership_Rate.l3          -3.138e-04  6.568e-03  -0.048 0.961956    
## Housing_Units_Completed.l3  4.574e-05  2.641e-05   1.732 0.085056 .  
## Supply_New_Houses.l3       -1.031e-03  2.645e-03  -0.390 0.697110    
## Housing_Price_Index.l3     -1.645e-03  1.046e-03  -1.573 0.117607    
## Median_Sales_Price.l3       1.478e-01  8.207e-02   1.801 0.073454 .  
## Ownership_Rate.l4           2.436e-03  5.053e-03   0.482 0.630377    
## Housing_Units_Completed.l4 -2.301e-05  2.101e-05  -1.095 0.275009    
## Supply_New_Houses.l4        3.184e-03  2.376e-03   1.340 0.181943    
## Housing_Price_Index.l4      9.110e-05  6.411e-04   0.142 0.887170    
## Median_Sales_Price.l4      -3.937e-02  7.274e-02  -0.541 0.589017    
## const                       8.113e-01  2.505e-01   3.239 0.001441 ** 
## trend                       1.077e-03  3.625e-04   2.972 0.003381 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.02371 on 172 degrees of freedom
## Multiple R-Squared: 0.9986,  Adjusted R-squared: 0.9985 
## F-statistic:  5986 on 21 and 172 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##                         Ownership_Rate Housing_Units_Completed
## Ownership_Rate                0.128346                -0.32121
## Housing_Units_Completed      -0.321209              5837.34662
## Supply_New_Houses             0.006495                -9.27767
## Housing_Price_Index          -0.123702                -5.77346
## Median_Sales_Price           -0.001310                 0.01227
##                         Supply_New_Houses Housing_Price_Index
## Ownership_Rate                   0.006495           -0.123702
## Housing_Units_Completed         -9.277673           -5.773455
## Supply_New_Houses                0.732337           -0.448006
## Housing_Price_Index             -0.448006            9.104649
## Median_Sales_Price              -0.001032            0.003326
##                         Median_Sales_Price
## Ownership_Rate                  -0.0013105
## Housing_Units_Completed          0.0122743
## Supply_New_Houses               -0.0010317
## Housing_Price_Index              0.0033263
## Median_Sales_Price               0.0005622
## 
## Correlation matrix of residuals:
##                         Ownership_Rate Housing_Units_Completed
## Ownership_Rate                 1.00000               -0.011735
## Housing_Units_Completed       -0.01174                1.000000
## Supply_New_Houses              0.02118               -0.141898
## Housing_Price_Index           -0.11443               -0.025044
## Median_Sales_Price            -0.15427                0.006775
##                         Supply_New_Houses Housing_Price_Index
## Ownership_Rate                    0.02118            -0.11443
## Housing_Units_Completed          -0.14190            -0.02504
## Supply_New_Houses                 1.00000            -0.17350
## Housing_Price_Index              -0.17350             1.00000
## Median_Sales_Price               -0.05085             0.04649
##                         Median_Sales_Price
## Ownership_Rate                   -0.154270
## Housing_Units_Completed           0.006775
## Supply_New_Houses                -0.050846
## Housing_Price_Index               0.046491
## Median_Sales_Price                1.000000

fit = VAR(x[1:(l-h.short),],p=4,type='both') 
preds=predict(fit,n.ahead=h.short)
ase = mean((fed_housing_data_NL$Median_Sales_Price[(l-h.short+1):l]-exp(preds$fcst$Median_Sales_Price[,1]))^2)/1e6
ase # 43.63274

## [1] 43.63274

plot(seq(1,l,1),x$Median_Sales_Price,type="b")
points(seq(l-h.short+1,l,1),preds$fcst$Median_Sales_Price[1:h.short,1],type="b",pch=15,col="blue")

fanchart(preds)

fit = VAR(x[1:(l-h.long),],p=4,type='both') 
preds=predict(fit,n.ahead=h.long)
ase = mean((fed_housing_data_NL$Median_Sales_Price[(l-h.long+1):l]-exp(preds$fcst$Median_Sales_Price[,1]))^2)/1e6
ase # 3009.65

## [1] 3009.65

plot(seq(1,l,1),x$Median_Sales_Price,type="b")
points(seq(l-h.long+1,l,1),preds$fcst$Median_Sales_Price[1:h.long,1],type="b",pch=15,col="blue")

fanchart(preds)

x = fed_housing_data
x$Year_Quarter = c()
fit = VAR(x[1:(l-h.short),],p=4,type='both') 
preds=predict(fit,n.ahead=h.short)
plot(seq(xmin_plot,l,1),x$Median_Sales_Price[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="VAR Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.short+1),l,1),preds$fcst$Median_Sales_Price[,1],col="red")
lines(seq((l-h.short+1),l,1),preds$fcst$Median_Sales_Price[,2],col="blue",lty=3) # 2nd column is lower bound
lines(seq((l-h.short+1),l,1),preds$fcst$Median_Sales_Price[,3],col="blue",lty=3) # 3rd column is upper bound

fit = VAR(x[1:(l-h.long),],p=4,type='both')
preds=predict(fit,n.ahead=h.long)
plot(seq(xmin_plot,l,1),x$Median_Sales_Price[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="VAR Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.long+1),l,1),preds$fcst$Median_Sales_Price[,1],col="red")
lines(seq((l-h.long+1),l,1),preds$fcst$Median_Sales_Price[,2],col="blue",lty=3)
lines(seq((l-h.long+1),l,1),preds$fcst$Median_Sales_Price[,3],col="blue",lty=3)

The VAR model has a very small short term ASE of 46.4 M. This is even smaller than the signal plus noise model. However, its long term ASE is larger than the MLR model.

We forcasted out the VAR model for 1 year and 5 years.

x = fed_housing_data
x$Year_Quarter = c()
fit = VAR(x,p=4,type='both') 
preds=predict(fit,n.ahead=h.short)
plot(seq(xmin_plot,l+h.short,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.short)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="VAR Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l+1),(l+h.short),1),preds$fcst$Median_Sales_Price[,1],col="red")
lines(seq((l+1),(l+h.short),1),preds$fcst$Median_Sales_Price[,2],col="blue",lty=3) # 2nd column is lower bound
lines(seq((l+1),(l+h.short),1),preds$fcst$Median_Sales_Price[,3],col="blue",lty=3) # 3rd column is upper bound

fit = VAR(x,p=4,type='both')
preds=predict(fit,n.ahead=h.long)
plot(seq(xmin_plot,l+h.long,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.long)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="VAR Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l+1),(l+h.long),1),preds$fcst$Median_Sales_Price[,1],col="red")
lines(seq((l+1),(l+h.long),1),preds$fcst$Median_Sales_Price[,2],col="blue",lty=3)
lines(seq((l+1),(l+h.long),1),preds$fcst$Median_Sales_Price[,3],col="blue",lty=3)

plot(seq(xmin_plot,l+h.long,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.long)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),preds$pred,col="red")

Neural Networks

We investigated 3 Neural Nets for predictions: Multilayer Perceptron (MLP), Long Short-Term Memory(LSTM), and Temporal Fusion Transformer (TFT).

Multilayer Perceptron

Four different univariate MLP models were fit to the data for both horizons, and the ASE and RMSE were best in both cases using MLP’s default parameters. For the 1 year horizon, the function chose 5 hidden nodes and a Difference of 1. The univariate lags were 1, 2, 3, and 4. For the 5 year horizon, the function chose 5 hidden nodes and a Difference of 1 as well, with lags at 1, 3, and 4. The univariate MLP was used as a baseline for the Multivariate MLP.

# Univariate
log.mhp = fed_housing_data$Median_Sales_Price
msp.194 = ts(log.mhp[1:194])
msp.178 = ts(log.mhp[1:178])

# 1 Year Horizons
mspFit.4 = mlp(msp.194, comb = 'median')
mspFit.d0.4 = mlp(msp.194, comb = 'median', difforder = 0)
mspFit.d1.4 = mlp(msp.194, comb = 'median', difforder = 1)
mspFit.d2.4 = mlp(msp.194, comb = 'median', difforder = 2)

plot(mspFit.4)

plot(mspFit.4$fitted)

f.4 = forecast(mspFit.4, h = 4)
f.d0.4 = forecast(mspFit.d0.4, h = 4)
f.d1.4 = forecast(mspFit.d1.4, h = 4)
f.d2.4 = forecast(mspFit.d2.4, h = 4)

# Defaults
ASE.mlp.exp.h4 = mean((exp(log.mhp[195:198]) - exp(f.4$mean))^2) # 60.95M
rwfit.mlp4 = roll.win.rmse.nn.wge(log.mhp, horizon = 4, fit_model = mspFit.4) # 0.029

## [1] 9
## [1] 186
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 186 windows."

## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.002734 0.015904 0.023128 0.029275 0.039717 0.110918 
## [1] "The Rolling Window RMSE is:  0.029"

# Difforder = 0
ASE.mlp.exp.d0.h4 = mean((exp(log.mhp[195:198]) - exp(f.d0.4$mean))^2) # 99.75M
rwfit.mlp4.d0 = roll.win.rmse.nn.wge(log.mhp, horizon = 4, fit_model = mspFit.d0.4) # 0.033

## [1] 9
## [1] 186
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 186 windows."

## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.002723 0.016662 0.025692 0.032713 0.042055 0.113279 
## [1] "The Rolling Window RMSE is:  0.033"

# Difforder = 1
ASE.mlp.exp.d1.h4 = mean((exp(log.mhp[195:198]) - exp(f.d1.4$mean))^2) # 178.5M
rwfit.mlp4.d1 = roll.win.rmse.nn.wge(log.mhp, horizon = 4, fit_model = mspFit.d1.4) # 0.029

## [1] 9
## [1] 186
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 186 windows."

## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005039 0.015694 0.023916 0.028928 0.037541 0.098347 
## [1] "The Rolling Window RMSE is:  0.029"

# Difforder = 2
ASE.mlp.exp.d2.h4 = mean((exp(log.mhp[195:198]) - exp(f.d2.4$mean))^2) # 205.4M
rwfit.mlp4.d2 = roll.win.rmse.nn.wge(log.mhp, horizon = 4, fit_model = mspFit.d2.4) # 0.031

## [1] 9
## [1] 186
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 186 windows."

## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005211 0.018303 0.024713 0.030557 0.039818 0.098140 
## [1] "The Rolling Window RMSE is:  0.031"

# 1 Year Horizon
plot(log.mhp[195:198], type = 'l', ylim = c(12.9, 13))
lines(seq(1,4), f.4$mean, col = "blue")
lines(seq(1,4), f.d0.4$mean, col = "red")
lines(seq(1,4), f.d1.4$mean, col = "orange")
lines(seq(1,4), f.d2.4$mean, col = "green")

# 5 Year Horizon
mspFit.20 = mlp(msp.178, comb = 'median')
plot(mspFit.20)

mspFit.d0.20 = mlp(msp.178, comb = 'median', difforder = 0)
mspFit.d1.20 = mlp(msp.178, comb = 'median', difforder = 1)
mspFit.d2.20 = mlp(msp.178, comb = 'median', difforder = 2)

f.20 = forecast(mspFit.20, h = 20)
plot(f.20)

f.d0.20 = forecast(mspFit.d0.20, h = 20)
f.d1.20 = forecast(mspFit.d1.20, h = 20)
f.d2.20 = forecast(mspFit.d2.20, h = 20)

# Defaults
ASE.mlp.exp.h20 = mean((exp(log.mhp[179:198]) - exp(f.20$mean))^2) # 1.72B
rwfit.mlp20 = roll.win.rmse.nn.wge(log.mhp, horizon = 20, fit_model = mspFit.20) # 0.072

## [1] 9
## [1] 170
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 170 windows."

## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01490 0.03531 0.06311 0.07289 0.09689 0.23675 
## [1] "The Rolling Window RMSE is:  0.073"

# Difforder = 0
ASE.mlp.exp.d0.h20 = mean((exp(log.mhp[179:198]) - exp(f.d0.20$mean))^2) # 7.24B
rwfit.mlp20.d0 = roll.win.rmse.nn.wge(log.mhp, horizon = 20, fit_model = mspFit.d0.20) # 0.076

## [1] 9
## [1] 170
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 170 windows."

## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01315 0.03614 0.06175 0.07512 0.09523 0.23384 
## [1] "The Rolling Window RMSE is:  0.075"

# Difforder = 1
ASE.mlp.exp.d1.h20 = mean((exp(log.mhp[179:198]) - exp(f.d1.20$mean))^2) # 2.02B
rwfit.mlp20.d1 = roll.win.rmse.nn.wge(log.mhp, horizon = 20, fit_model = mspFit.d1.20) # 0.073

## [1] 9
## [1] 170
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 170 windows."

## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01498 0.03505 0.06517 0.07272 0.09145 0.22887 
## [1] "The Rolling Window RMSE is:  0.073"

# Difforder = 2
ASE.mlp.exp.d2.h20 = mean((exp(log.mhp[179:198]) - exp(f.d2.20$mean))^2) # 2.61B
rwfit.mlp20.d2 = roll.win.rmse.nn.wge(log.mhp, horizon = 20, fit_model = mspFit.d2.20) # 0.077

## [1] 9
## [1] 170
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 170 windows."

## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01288 0.03846 0.06654 0.07606 0.10646 0.22748 
## [1] "The Rolling Window RMSE is:  0.076"

# Create table for comparison
labels = c("Defaults", "DiffOrder = 0", "DiffOrder = 1", "DiffOrder = 2")
ASE_4 = c(ASE.mlp.exp.h4, ASE.mlp.exp.d0.h4, ASE.mlp.exp.d1.h4, ASE.mlp.exp.d2.h4)
ASE_20 = c(ASE.mlp.exp.h20, ASE.mlp.exp.d0.h20, ASE.mlp.exp.d1.h20, ASE.mlp.exp.d2.h20)
RMSE_4 = c(rwfit.mlp4$rwRMSE,rwfit.mlp4.d0$rwRMSE,rwfit.mlp4.d1$rwRMSE,rwfit.mlp4.d2$rwRMSE) 
RMSE_20 = c(rwfit.mlp20$rwRMSE,rwfit.mlp20.d0$rwRMSE,rwfit.mlp20.d1$rwRMSE,rwfit.mlp20.d2$rwRMSE) 
comparison = data.frame(Metrics = labels, ASE_1_yr = ASE_4, ASE_5_Yr = ASE_20, RMSE_1_Yr = RMSE_4, RMSE_5_Yr = RMSE_20)
comparison

##         Metrics  ASE_1_yr   ASE_5_Yr  RMSE_1_Yr  RMSE_5_Yr
## 1      Defaults  56113947 2360549683 0.02927483 0.07288691
## 2 DiffOrder = 0  71331704 7377710156 0.03271345 0.07511880
## 3 DiffOrder = 1  91170472 2050048407 0.02892788 0.07271617
## 4 DiffOrder = 2 225962073 2323174956 0.03055707 0.07605920

# Comparing 5 Year Forecasts
plot(log.mhp[179:198], type = 'l', ylim = c(12.5, 13))
lines(seq(1,20), f.20$mean, col = "blue")
lines(seq(1,20), f.d0.20$mean, col = "red")
lines(seq(1,20), f.d1.20$mean, col = "orange")
lines(seq(1,20), f.d2.20$mean, col = "green")

# Forecasts
plot(log.mhp, type = 'l', lwd = 1, main = 'Univariate Forecasts, 1 and 5 Years')
points(seq(195,198), f.4$mean, type = 'l', col = 'red')
points(seq(179,198), f.20$mean, type = 'l', col = 'red')

# Zoomed In, 1 yr
plot(seq(150,198,1),log.mhp[150:198], type = 'l', lwd = 1, main = 'Univariate MLP Forecast, 1 Year')
points(seq(195,198,1), f.4$mean, type = 'l', col = 'red')

# Zoomed In, 5 yr
plot(seq(150,198,1), log.mhp[150:198], type = 'l', lwd = 1, main = 'Univariate MLP Forecast, 5 Years')
points(seq(179,198,1), f.20$mean, type = 'l', col = 'red')

Confidence Intervals for Univariate Model

We were able to create confidence intervals for the univariate model by taking the 5th and 95th percentiles of bootstrapped residuals. These confidence intervals did contain the actual values for both backcasts.

################################################################################

# Trying what Dr Sadler did, but for an mlp model

# xt = gen.arma.wge(100,phi = .9)
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSPUS.csv"
mhp <- read.csv(file_path, header = TRUE)
mhp_1975 = mhp[49:246,]
xt = log(mhp_1975$MSPUS)

# Horizon = 4
h.short = 4
l = length(xt)
xtTrain = xt[1:(l-h.short)]

# est = est.arma.wge(xtTrain,p = 1)
library(nnfor)
set.seed(30)
fit.mlp = mlp(ts(xt), comb = 'median')
lf = length(fit.mlp$fitted)
res = xt[(l-lf+1):l]-fit.mlp$fitted

# Are residuals white noise?
plotts.sample.wge(res)

## $xbar
## [1] 1.744812e-05
## 
## $autplt
##  [1]  1.000000000 -0.036871716 -0.030558554  0.001573962  0.048845593
##  [6]  0.012174237 -0.075906809 -0.011477166  0.043462683 -0.014153964
## [11] -0.149881096  0.013599918  0.053422387 -0.142806880 -0.086030066
## [16]  0.047973796  0.068682257 -0.072539163 -0.081279932 -0.103337635
## [21]  0.184191684 -0.097595647 -0.010598749 -0.014667230 -0.022377763
## [26] -0.002837652
## 
## $freq
##  [1] 0.005181347 0.010362694 0.015544041 0.020725389 0.025906736 0.031088083
##  [7] 0.036269430 0.041450777 0.046632124 0.051813472 0.056994819 0.062176166
## [13] 0.067357513 0.072538860 0.077720207 0.082901554 0.088082902 0.093264249
## [19] 0.098445596 0.103626943 0.108808290 0.113989637 0.119170984 0.124352332
## [25] 0.129533679 0.134715026 0.139896373 0.145077720 0.150259067 0.155440415
## [31] 0.160621762 0.165803109 0.170984456 0.176165803 0.181347150 0.186528497
## [37] 0.191709845 0.196891192 0.202072539 0.207253886 0.212435233 0.217616580
## [43] 0.222797927 0.227979275 0.233160622 0.238341969 0.243523316 0.248704663
## [49] 0.253886010 0.259067358 0.264248705 0.269430052 0.274611399 0.279792746
## [55] 0.284974093 0.290155440 0.295336788 0.300518135 0.305699482 0.310880829
## [61] 0.316062176 0.321243523 0.326424870 0.331606218 0.336787565 0.341968912
## [67] 0.347150259 0.352331606 0.357512953 0.362694301 0.367875648 0.373056995
## [73] 0.378238342 0.383419689 0.388601036 0.393782383 0.398963731 0.404145078
## [79] 0.409326425 0.414507772 0.419689119 0.424870466 0.430051813 0.435233161
## [85] 0.440414508 0.445595855 0.450777202 0.455958549 0.461139896 0.466321244
## [91] 0.471502591 0.476683938 0.481865285 0.487046632 0.492227979 0.497409326
## 
## $dbz
##  [1] -1.607957116 -1.333199475 -0.956723495 -0.557414826 -0.197276541
##  [6]  0.085349888  0.272652115  0.361801013  0.360223739  0.282220036
## [11]  0.146507367 -0.025815122 -0.213195430 -0.395491732 -0.555594521
## [16] -0.680578970 -0.762299845 -0.797561975 -0.788096904 -0.740445041
## [21] -0.665592502 -0.578073322 -0.494348614 -0.430523065 -0.399659407
## [26] -0.409013366 -0.457541674 -0.534241914 -0.618359356 -0.682912713
## [31] -0.702371497 -0.662742157 -0.569047029 -0.445123891 -0.325406257
## [36] -0.243202185 -0.220148551 -0.258493774 -0.336107381 -0.405719151
## [41] -0.403461300 -0.271459484  0.012476221  0.419856106  0.884930351
## [46]  1.333003310  1.701732802  1.948434446  2.048685019  1.992600708
## [51]  1.782086937  1.430042768  0.961445003  0.415563152 -0.152520890
## [56] -0.674526939 -1.079724859 -1.313789861 -1.356838696 -1.227881250
## [61] -0.973113995 -0.648429223 -0.306628226  0.008382131  0.263182077
## [66]  0.435070212  0.512334692  0.494937033  0.395382225  0.238913052
## [71]  0.061382161 -0.097075599 -0.202658934 -0.238764730 -0.210263380
## [76] -0.137750779 -0.045712783  0.048036443  0.136964928  0.223984419
## [81]  0.315192773  0.412483461  0.508696665  0.587734345  0.629338738
## [86]  0.616060741  0.539807287  0.406419980  0.237274360  0.066394493
## [91] -0.068523599 -0.140043634 -0.143310298 -0.098167394 -0.039055134
## [96]  0.000482310

acf(res,lag.max=100)

ljung.wge(res,K=24) # p = 0.3804433, white noise

## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 29.63275
## 
## $df
## [1] 24
## 
## $pval
## [1] 0.1972232

ljung.wge(res,K=48) # p = 0.6786805, white

## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776 -0.002837652 -0.05137369 -0.01063967 0.0134555 0.05280341 -0.137294 -0.04656288 0.1030121 -0.05345923 0.1223522 0.06316164 0.07034745 0.002489447 -0.02428342 0.007679022 0.06837862 -0.02410031 -0.02900041 0.03084667 0.0004189358 0.01420003 0.0386115 0.03606818 -0.008020311

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 47.31109
## 
## $df
## [1] 48
## 
## $pval
## [1] 0.5009818

hist(res) # look normally distributed

# BSSim = function(ts,res,phi,horizon) {
num_back = l - lf
BSSim = function(ts,res,fit,num_back,horizon) {
  origin = length(ts)
  ForHolder = numeric(horizon)
  for(i in (origin+1):(origin+horizon)){
    # ts[i] = ts[i-1]*phi + sample(res,1)
    # print(i)
    needed_to_predict = ts[(i-num_back):(i-1)]
    fore=forecast(fit,y=needed_to_predict,h=1)
    ts[i]=fore$mean+sample(res,1)
    ForHolder[(i-origin)]=ts[i]
  }
  return(ForHolder)
}

# BSSamples = 100000
BSSamples = 1000 # I'm worried 100k will take a really long time, so make it less

# holder = matrix(nrow = BSSamples,ncol = 50)
holder = matrix(nrow = BSSamples,ncol = h.short)

for(i in 1:BSSamples)
{
  # xtNewFor = BSSim(xtTrain,est$res,est$phi,50)
  if ((i %% 100) == 0){
    print(i)
  }
  xtNewFor = BSSim(xtTrain,res,fit.mlp,num_back,h.short)
  holder[i,] = xtNewFor
}

## [1] 100
## [1] 200
## [1] 300
## [1] 400
## [1] 500
## [1] 600
## [1] 700
## [1] 800
## [1] 900
## [1] 1000

# Calculate percentiles for each column
percentiles_per_column <- apply(holder, 2, function(column) {
  quantile(column, probs = c(0.025, 0.975))
})

# Transpose the result for better readability
percentiles_per_column <- t(percentiles_per_column)

# Print the result
print(percentiles_per_column)

##          2.5%    97.5%
## [1,] 12.91162 13.01384
## [2,] 12.89698 13.01408
## [3,] 12.87999 13.03489
## [4,] 12.86808 13.06548

xtNewForBS = colMeans(holder)

result = fit.mlp$fitted

# plotts.wge(c(xtTrain-est$res,xtNewForBS))
#plotts.wge(c(fit.mlp$fitted,xtNewForBS),ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1), )
#lines(seq(lf+1,lf+h.short,1),percentiles_per_column[,1],col = "blue")
#lines(seq(lf+1,lf+h.short,1),percentiles_per_column[,2],col = "blue")
#lines(seq(lf+1,lf+h.short,1),xt[(l-h.short+1):l],col = "red")

# Alex edited to plot black line as the actual values and predicted as red
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
plot(xt[6:198], type = 'l', 
     main = 'Univariate MLP Short Term Forecast with Confidence Intervals', 
     xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(min(x),ymax_future))
lines(seq(lf+1,lf+h.short,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(lf+1,lf+h.short,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(lf+1,lf+h.short,1),xtNewForBS, col = "red")

# Zoomed In
plot(seq(150,198,1), xt[150:198], type = 'l', 
     main = 'Univariate MLP Short Term Forecast with Confidence Intervals', 
     xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(ymin_plot,ymax_future))
lines(seq(195,198,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(195,198,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(195,198,1),xtNewForBS,col = "red")

# Horizon = 20  
h.long = 20
l = length(xt)
xtTrain = xt[1:(l-h.long)]

# est = est.arma.wge(xtTrain,p = 1)
library(nnfor)
set.seed(30)
fit.mlp = mlp(ts(xt), comb = 'median')
lf = length(fit.mlp$fitted)
res = xt[(l-lf+1):l]-fit.mlp$fitted

# Are residuals white noise?
plotts.sample.wge(res)

## $xbar
## [1] 1.744812e-05
## 
## $autplt
##  [1]  1.000000000 -0.036871716 -0.030558554  0.001573962  0.048845593
##  [6]  0.012174237 -0.075906809 -0.011477166  0.043462683 -0.014153964
## [11] -0.149881096  0.013599918  0.053422387 -0.142806880 -0.086030066
## [16]  0.047973796  0.068682257 -0.072539163 -0.081279932 -0.103337635
## [21]  0.184191684 -0.097595647 -0.010598749 -0.014667230 -0.022377763
## [26] -0.002837652
## 
## $freq
##  [1] 0.005181347 0.010362694 0.015544041 0.020725389 0.025906736 0.031088083
##  [7] 0.036269430 0.041450777 0.046632124 0.051813472 0.056994819 0.062176166
## [13] 0.067357513 0.072538860 0.077720207 0.082901554 0.088082902 0.093264249
## [19] 0.098445596 0.103626943 0.108808290 0.113989637 0.119170984 0.124352332
## [25] 0.129533679 0.134715026 0.139896373 0.145077720 0.150259067 0.155440415
## [31] 0.160621762 0.165803109 0.170984456 0.176165803 0.181347150 0.186528497
## [37] 0.191709845 0.196891192 0.202072539 0.207253886 0.212435233 0.217616580
## [43] 0.222797927 0.227979275 0.233160622 0.238341969 0.243523316 0.248704663
## [49] 0.253886010 0.259067358 0.264248705 0.269430052 0.274611399 0.279792746
## [55] 0.284974093 0.290155440 0.295336788 0.300518135 0.305699482 0.310880829
## [61] 0.316062176 0.321243523 0.326424870 0.331606218 0.336787565 0.341968912
## [67] 0.347150259 0.352331606 0.357512953 0.362694301 0.367875648 0.373056995
## [73] 0.378238342 0.383419689 0.388601036 0.393782383 0.398963731 0.404145078
## [79] 0.409326425 0.414507772 0.419689119 0.424870466 0.430051813 0.435233161
## [85] 0.440414508 0.445595855 0.450777202 0.455958549 0.461139896 0.466321244
## [91] 0.471502591 0.476683938 0.481865285 0.487046632 0.492227979 0.497409326
## 
## $dbz
##  [1] -1.607957116 -1.333199475 -0.956723495 -0.557414826 -0.197276541
##  [6]  0.085349888  0.272652115  0.361801013  0.360223739  0.282220036
## [11]  0.146507367 -0.025815122 -0.213195430 -0.395491732 -0.555594521
## [16] -0.680578970 -0.762299845 -0.797561975 -0.788096904 -0.740445041
## [21] -0.665592502 -0.578073322 -0.494348614 -0.430523065 -0.399659407
## [26] -0.409013366 -0.457541674 -0.534241914 -0.618359356 -0.682912713
## [31] -0.702371497 -0.662742157 -0.569047029 -0.445123891 -0.325406257
## [36] -0.243202185 -0.220148551 -0.258493774 -0.336107381 -0.405719151
## [41] -0.403461300 -0.271459484  0.012476221  0.419856106  0.884930351
## [46]  1.333003310  1.701732802  1.948434446  2.048685019  1.992600708
## [51]  1.782086937  1.430042768  0.961445003  0.415563152 -0.152520890
## [56] -0.674526939 -1.079724859 -1.313789861 -1.356838696 -1.227881250
## [61] -0.973113995 -0.648429223 -0.306628226  0.008382131  0.263182077
## [66]  0.435070212  0.512334692  0.494937033  0.395382225  0.238913052
## [71]  0.061382161 -0.097075599 -0.202658934 -0.238764730 -0.210263380
## [76] -0.137750779 -0.045712783  0.048036443  0.136964928  0.223984419
## [81]  0.315192773  0.412483461  0.508696665  0.587734345  0.629338738
## [86]  0.616060741  0.539807287  0.406419980  0.237274360  0.066394493
## [91] -0.068523599 -0.140043634 -0.143310298 -0.098167394 -0.039055134
## [96]  0.000482310

acf(res,lag.max=100)

ljung.wge(res,K=24) # p = 0.3804433, white noise

## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 29.63275
## 
## $df
## [1] 24
## 
## $pval
## [1] 0.1972232

ljung.wge(res,K=48) # p = 0.6786805, white

## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776 -0.002837652 -0.05137369 -0.01063967 0.0134555 0.05280341 -0.137294 -0.04656288 0.1030121 -0.05345923 0.1223522 0.06316164 0.07034745 0.002489447 -0.02428342 0.007679022 0.06837862 -0.02410031 -0.02900041 0.03084667 0.0004189358 0.01420003 0.0386115 0.03606818 -0.008020311

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 47.31109
## 
## $df
## [1] 48
## 
## $pval
## [1] 0.5009818

hist(res) # look normally distributed

# BSSim = function(ts,res,phi,horizon) {
num_back = l - lf
BSSim = function(ts,res,fit,num_back,horizon) {
  origin = length(ts)
  ForHolder = numeric(horizon)
  for(i in (origin+1):(origin+horizon)){
    # ts[i] = ts[i-1]*phi + sample(res,1)
    # print(i)
    needed_to_predict = ts[(i-num_back):(i-1)]
    fore=forecast(fit,y=needed_to_predict,h=1)
    ts[i]=fore$mean+sample(res,1)
    ForHolder[(i-origin)]=ts[i]
  }
  return(ForHolder)
}

# BSSamples = 100000
BSSamples = 1000 # I'm worried 100k will take a really long time, so make it less

# holder = matrix(nrow = BSSamples,ncol = 50)
holder = matrix(nrow = BSSamples,ncol = h.long)

for(i in 1:BSSamples)
{
  # xtNewFor = BSSim(xtTrain,est$res,est$phi,50)
  if ((i %% 100) == 0){
    print(i)
  }
  xtNewFor = BSSim(xtTrain,res,fit.mlp,num_back,h.long)
  holder[i,] = xtNewFor
}

## [1] 100
## [1] 200
## [1] 300
## [1] 400
## [1] 500
## [1] 600
## [1] 700
## [1] 800
## [1] 900
## [1] 1000

# Calculate percentiles for each column
percentiles_per_column <- apply(holder, 2, function(column) {
  quantile(column, probs = c(0.025, 0.975))
})

# Transpose the result for better readability
percentiles_per_column <- t(percentiles_per_column)

# Print the result
print(percentiles_per_column)

##           2.5%    97.5%
##  [1,] 12.61870 12.72615
##  [2,] 12.61532 12.73625
##  [3,] 12.60530 12.75608
##  [4,] 12.59535 12.78286
##  [5,] 12.59227 12.81404
##  [6,] 12.58460 12.83386
##  [7,] 12.58620 12.86215
##  [8,] 12.58660 12.88227
##  [9,] 12.58345 12.90117
## [10,] 12.58453 12.92735
## [11,] 12.58455 12.94401
## [12,] 12.56501 12.97066
## [13,] 12.57141 12.98767
## [14,] 12.57361 13.00649
## [15,] 12.56880 13.02081
## [16,] 12.56941 13.04938
## [17,] 12.57778 13.06957
## [18,] 12.57743 13.08411
## [19,] 12.57157 13.10765
## [20,] 12.58245 13.14049

xtNewForBS = colMeans(holder)

# plotts.wge(c(xtTrain-est$res,xtNewForBS))
#plotts.wge(c(fit.mlp$fitted,xtNewForBS),ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1), col = 'red')
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,1],col = "blue")
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,2],col = "blue")
#lines(seq(lf+1,lf+h.long,1),xt[(l-h.long+1):l],col = "black")

# Alex edited to plot black line as the actual values and predicted as red
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
plot(xt[6:198], type = 'l', 
     main = 'Univariate MLP Long Term Forecast with Confidence Intervals', 
     xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(min(xt),ymax_future))
lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(lf+1,lf+h.long,1),xtNewForBS,col = "red")

# Zoomed In
plot(seq(150,198,1),xt[150:198], type = 'l', 
     main = 'Univariate MLP Long Term Forecast with Confidence Intervals', 
     xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(ymin_plot,ymax_future))
lines(seq(179,198,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(179,198,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(179,198,1),xtNewForBS, col = "red")

Future Forecast

log.mhp = ts(fed_housing_data$Median_Sales_Price)

# Univariate MLP Forecasting into the Future
future.mspFit.4 = mlp(log.mhp, comb = 'median')
future.f.4 = forecast(future.mspFit.4, h = 4)

future.mspFit.20 = mlp(log.mhp, comb = 'median')
future.f.20 = forecast(future.mspFit.20, h = 20)

future.f.4$mean[4] # 12.96544

##      t+4 
## 12.94831

future.f.20$mean[20] # 13.10206

##     t+20 
## 13.16013

exp(future.f.4$mean[4]) # $427,384.77

##      t+4 
## 420125.9

exp(future.f.20$mean[20]) # $489,950.67

##     t+20 
## 519243.2

h.short = 4
h.long = 20
x = fed_housing_data$Median_Sales_Price
l = length(x)

# Forecasting next year (Zoomed In)
plot(seq(xmin_plot,l+h.short,1),x[xmin_plot:(l+h.short)],type="l",col="black",
     xlab="Time",ylab="log Median Housing Sales Price",
     main="Univariate MLP Short Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.short),1),future.f.4$mean,col="red")

# Forecasting next 5 years (Zoomed In)
plot(seq(xmin_plot,l+h.long,1),x[xmin_plot:(l+h.long)],type="l",col="black",
     xlab="Time",ylab="log Median Housing Sales Price",
     main="Univariate MLP Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),future.f.20$mean,col="red")

CI for Future Forecast

Wording:

In 1 year, we are 95% confident that the median home sale price will be between $386,583 (e^12.8651) and $463,006 (e^13.04549). Our best estimate is $422,090 (e^12.95297).

In 5 years, we are 95% confident that the median home sale price will be between $375,203 (e^12.83522) and $654,866 (e^13.39219). Our best estimate is $496,983 (e^13.11631).

################################################################################

# Trying what Dr Sadler did, but for an mlp model

# xt = gen.arma.wge(100,phi = .9)
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSPUS.csv"
mhp <- read.csv(file_path, header = TRUE)
mhp_1975 = mhp[49:246,]
xt = log(mhp_1975$MSPUS)

# Horizon = 4
h.short = 4
l = length(xt)
xtTrain = xt

# est = est.arma.wge(xtTrain,p = 1)
library(nnfor)
set.seed(30)
fit.mlp = mlp(ts(xt), comb = 'median')
lf = length(fit.mlp$fitted)
res = xt[(l-lf+1):l]-fit.mlp$fitted

# Are residuals white noise?
plotts.sample.wge(res)

## $xbar
## [1] 1.744812e-05
## 
## $autplt
##  [1]  1.000000000 -0.036871716 -0.030558554  0.001573962  0.048845593
##  [6]  0.012174237 -0.075906809 -0.011477166  0.043462683 -0.014153964
## [11] -0.149881096  0.013599918  0.053422387 -0.142806880 -0.086030066
## [16]  0.047973796  0.068682257 -0.072539163 -0.081279932 -0.103337635
## [21]  0.184191684 -0.097595647 -0.010598749 -0.014667230 -0.022377763
## [26] -0.002837652
## 
## $freq
##  [1] 0.005181347 0.010362694 0.015544041 0.020725389 0.025906736 0.031088083
##  [7] 0.036269430 0.041450777 0.046632124 0.051813472 0.056994819 0.062176166
## [13] 0.067357513 0.072538860 0.077720207 0.082901554 0.088082902 0.093264249
## [19] 0.098445596 0.103626943 0.108808290 0.113989637 0.119170984 0.124352332
## [25] 0.129533679 0.134715026 0.139896373 0.145077720 0.150259067 0.155440415
## [31] 0.160621762 0.165803109 0.170984456 0.176165803 0.181347150 0.186528497
## [37] 0.191709845 0.196891192 0.202072539 0.207253886 0.212435233 0.217616580
## [43] 0.222797927 0.227979275 0.233160622 0.238341969 0.243523316 0.248704663
## [49] 0.253886010 0.259067358 0.264248705 0.269430052 0.274611399 0.279792746
## [55] 0.284974093 0.290155440 0.295336788 0.300518135 0.305699482 0.310880829
## [61] 0.316062176 0.321243523 0.326424870 0.331606218 0.336787565 0.341968912
## [67] 0.347150259 0.352331606 0.357512953 0.362694301 0.367875648 0.373056995
## [73] 0.378238342 0.383419689 0.388601036 0.393782383 0.398963731 0.404145078
## [79] 0.409326425 0.414507772 0.419689119 0.424870466 0.430051813 0.435233161
## [85] 0.440414508 0.445595855 0.450777202 0.455958549 0.461139896 0.466321244
## [91] 0.471502591 0.476683938 0.481865285 0.487046632 0.492227979 0.497409326
## 
## $dbz
##  [1] -1.607957116 -1.333199475 -0.956723495 -0.557414826 -0.197276541
##  [6]  0.085349888  0.272652115  0.361801013  0.360223739  0.282220036
## [11]  0.146507367 -0.025815122 -0.213195430 -0.395491732 -0.555594521
## [16] -0.680578970 -0.762299845 -0.797561975 -0.788096904 -0.740445041
## [21] -0.665592502 -0.578073322 -0.494348614 -0.430523065 -0.399659407
## [26] -0.409013366 -0.457541674 -0.534241914 -0.618359356 -0.682912713
## [31] -0.702371497 -0.662742157 -0.569047029 -0.445123891 -0.325406257
## [36] -0.243202185 -0.220148551 -0.258493774 -0.336107381 -0.405719151
## [41] -0.403461300 -0.271459484  0.012476221  0.419856106  0.884930351
## [46]  1.333003310  1.701732802  1.948434446  2.048685019  1.992600708
## [51]  1.782086937  1.430042768  0.961445003  0.415563152 -0.152520890
## [56] -0.674526939 -1.079724859 -1.313789861 -1.356838696 -1.227881250
## [61] -0.973113995 -0.648429223 -0.306628226  0.008382131  0.263182077
## [66]  0.435070212  0.512334692  0.494937033  0.395382225  0.238913052
## [71]  0.061382161 -0.097075599 -0.202658934 -0.238764730 -0.210263380
## [76] -0.137750779 -0.045712783  0.048036443  0.136964928  0.223984419
## [81]  0.315192773  0.412483461  0.508696665  0.587734345  0.629338738
## [86]  0.616060741  0.539807287  0.406419980  0.237274360  0.066394493
## [91] -0.068523599 -0.140043634 -0.143310298 -0.098167394 -0.039055134
## [96]  0.000482310

acf(res,lag.max=100)

ljung.wge(res,K=24) # p = 0.3804433, white noise

## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 29.63275
## 
## $df
## [1] 24
## 
## $pval
## [1] 0.1972232

ljung.wge(res,K=48) # p = 0.6786805, white

## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776 -0.002837652 -0.05137369 -0.01063967 0.0134555 0.05280341 -0.137294 -0.04656288 0.1030121 -0.05345923 0.1223522 0.06316164 0.07034745 0.002489447 -0.02428342 0.007679022 0.06837862 -0.02410031 -0.02900041 0.03084667 0.0004189358 0.01420003 0.0386115 0.03606818 -0.008020311

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 47.31109
## 
## $df
## [1] 48
## 
## $pval
## [1] 0.5009818

hist(res) # look normally distributed

# BSSim = function(ts,res,phi,horizon) {
num_back = l - lf
BSSim = function(ts,res,fit,num_back,horizon) {
  origin = length(ts)
  ForHolder = numeric(horizon)
  for(i in (origin+1):(origin+horizon)){
    # ts[i] = ts[i-1]*phi + sample(res,1)
    # print(i)
    needed_to_predict = ts[(i-num_back):(i-1)]
    fore=forecast(fit,y=needed_to_predict,h=1)
    ts[i]=fore$mean+sample(res,1)
    ForHolder[(i-origin)]=ts[i]
  }
  return(ForHolder)
}

# BSSamples = 100000
BSSamples = 1000 # I'm worried 100k will take a really long time, so make it less

# holder = matrix(nrow = BSSamples,ncol = 50)
holder = matrix(nrow = BSSamples,ncol = h.short)

for(i in 1:BSSamples)
{
  # xtNewFor = BSSim(xtTrain,est$res,est$phi,50)
  if ((i %% 100) == 0){
    print(i)
  }
  xtNewFor = BSSim(xtTrain,res,fit.mlp,num_back,h.short)
  holder[i,] = xtNewFor
}

## [1] 100
## [1] 200
## [1] 300
## [1] 400
## [1] 500
## [1] 600
## [1] 700
## [1] 800
## [1] 900
## [1] 1000

# Calculate percentiles for each column
percentiles_per_column <- apply(holder, 2, function(column) {
  quantile(column, probs = c(0.025, 0.975))
})

# Transpose the result for better readability
percentiles_per_column <- t(percentiles_per_column)

# Print the result
print(percentiles_per_column)

##          2.5%    97.5%
## [1,] 12.89311 12.99534
## [2,] 12.88840 12.99354
## [3,] 12.88019 13.01974
## [4,] 12.86510 13.04549

xtNewForBS = colMeans(holder)

# Forecast 1 year, log scale
xtNewForBS[4] # 12.95297

## [1] 12.95297

percentiles_per_column[4,1] # 12.8651

##    2.5% 
## 12.8651

percentiles_per_column[4,2] # 13.04549

##    97.5% 
## 13.04549

# Forecast 1 year, exp
exp(xtNewForBS[4]) # $422,089.9

## [1] 422089.9

exp(percentiles_per_column[4,1]) # $386,583.3

##     2.5% 
## 386583.3

exp(percentiles_per_column[4,2]) # $463,005.6

##    97.5% 
## 463005.6

# plotts.wge(c(xtTrain-est$res,xtNewForBS))
#plotts.wge(c(fit.mlp$fitted,xtNewForBS),ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1), )
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,1],col = "blue")
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,2],col = "blue")
#lines(seq(lf+1,lf+h.long,1),xt[(l-h.long+1):l],col = "red")

# Alex hard coded seq and changed prediction line to red
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
plot(seq(1,l+h.short,1),xt[1:(l+h.short)], type = 'l', 
     main = 'Univariate MLP Short Term Future Forecast with Confidence Intervals', 
     xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(min(xt),ymax_future))
lines(seq(199,202,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(199,202,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(199,202,1),xtNewForBS, col = "red")

# Zoomed In
plot(seq(150,202,1), xt[xmin_plot:(l+h.short)], type = 'l', 
     main = 'Univariate MLP Short Term Future Forecast with Confidence Intervals', 
     xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(ymin_plot,ymax_future))
lines(seq(199,202,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(199,202,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(199,202,1),xtNewForBS,ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1),col = "red")

# Horizon = 20  
h.long = 20
l = length(xt)
xtTrain = xt

# est = est.arma.wge(xtTrain,p = 1)
library(nnfor)
set.seed(30)
fit.mlp = mlp(ts(xt), comb = 'median')
lf = length(fit.mlp$fitted)
res = xt[(l-lf+1):l]-fit.mlp$fitted

# Are residuals white noise?
plotts.sample.wge(res)

## $xbar
## [1] 1.744812e-05
## 
## $autplt
##  [1]  1.000000000 -0.036871716 -0.030558554  0.001573962  0.048845593
##  [6]  0.012174237 -0.075906809 -0.011477166  0.043462683 -0.014153964
## [11] -0.149881096  0.013599918  0.053422387 -0.142806880 -0.086030066
## [16]  0.047973796  0.068682257 -0.072539163 -0.081279932 -0.103337635
## [21]  0.184191684 -0.097595647 -0.010598749 -0.014667230 -0.022377763
## [26] -0.002837652
## 
## $freq
##  [1] 0.005181347 0.010362694 0.015544041 0.020725389 0.025906736 0.031088083
##  [7] 0.036269430 0.041450777 0.046632124 0.051813472 0.056994819 0.062176166
## [13] 0.067357513 0.072538860 0.077720207 0.082901554 0.088082902 0.093264249
## [19] 0.098445596 0.103626943 0.108808290 0.113989637 0.119170984 0.124352332
## [25] 0.129533679 0.134715026 0.139896373 0.145077720 0.150259067 0.155440415
## [31] 0.160621762 0.165803109 0.170984456 0.176165803 0.181347150 0.186528497
## [37] 0.191709845 0.196891192 0.202072539 0.207253886 0.212435233 0.217616580
## [43] 0.222797927 0.227979275 0.233160622 0.238341969 0.243523316 0.248704663
## [49] 0.253886010 0.259067358 0.264248705 0.269430052 0.274611399 0.279792746
## [55] 0.284974093 0.290155440 0.295336788 0.300518135 0.305699482 0.310880829
## [61] 0.316062176 0.321243523 0.326424870 0.331606218 0.336787565 0.341968912
## [67] 0.347150259 0.352331606 0.357512953 0.362694301 0.367875648 0.373056995
## [73] 0.378238342 0.383419689 0.388601036 0.393782383 0.398963731 0.404145078
## [79] 0.409326425 0.414507772 0.419689119 0.424870466 0.430051813 0.435233161
## [85] 0.440414508 0.445595855 0.450777202 0.455958549 0.461139896 0.466321244
## [91] 0.471502591 0.476683938 0.481865285 0.487046632 0.492227979 0.497409326
## 
## $dbz
##  [1] -1.607957116 -1.333199475 -0.956723495 -0.557414826 -0.197276541
##  [6]  0.085349888  0.272652115  0.361801013  0.360223739  0.282220036
## [11]  0.146507367 -0.025815122 -0.213195430 -0.395491732 -0.555594521
## [16] -0.680578970 -0.762299845 -0.797561975 -0.788096904 -0.740445041
## [21] -0.665592502 -0.578073322 -0.494348614 -0.430523065 -0.399659407
## [26] -0.409013366 -0.457541674 -0.534241914 -0.618359356 -0.682912713
## [31] -0.702371497 -0.662742157 -0.569047029 -0.445123891 -0.325406257
## [36] -0.243202185 -0.220148551 -0.258493774 -0.336107381 -0.405719151
## [41] -0.403461300 -0.271459484  0.012476221  0.419856106  0.884930351
## [46]  1.333003310  1.701732802  1.948434446  2.048685019  1.992600708
## [51]  1.782086937  1.430042768  0.961445003  0.415563152 -0.152520890
## [56] -0.674526939 -1.079724859 -1.313789861 -1.356838696 -1.227881250
## [61] -0.973113995 -0.648429223 -0.306628226  0.008382131  0.263182077
## [66]  0.435070212  0.512334692  0.494937033  0.395382225  0.238913052
## [71]  0.061382161 -0.097075599 -0.202658934 -0.238764730 -0.210263380
## [76] -0.137750779 -0.045712783  0.048036443  0.136964928  0.223984419
## [81]  0.315192773  0.412483461  0.508696665  0.587734345  0.629338738
## [86]  0.616060741  0.539807287  0.406419980  0.237274360  0.066394493
## [91] -0.068523599 -0.140043634 -0.143310298 -0.098167394 -0.039055134
## [96]  0.000482310

acf(res,lag.max=100)

ljung.wge(res,K=24) # p = 0.3804433, white noise

## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 29.63275
## 
## $df
## [1] 24
## 
## $pval
## [1] 0.1972232

ljung.wge(res,K=48) # p = 0.6786805, white

## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776 -0.002837652 -0.05137369 -0.01063967 0.0134555 0.05280341 -0.137294 -0.04656288 0.1030121 -0.05345923 0.1223522 0.06316164 0.07034745 0.002489447 -0.02428342 0.007679022 0.06837862 -0.02410031 -0.02900041 0.03084667 0.0004189358 0.01420003 0.0386115 0.03606818 -0.008020311

## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 47.31109
## 
## $df
## [1] 48
## 
## $pval
## [1] 0.5009818

hist(res) # look normally distributed

# BSSim = function(ts,res,phi,horizon) {
num_back = l - lf
BSSim = function(ts,res,fit,num_back,horizon) {
  origin = length(ts)
  ForHolder = numeric(horizon)
  for(i in (origin+1):(origin+horizon)){
    # ts[i] = ts[i-1]*phi + sample(res,1)
    # print(i)
    needed_to_predict = ts[(i-num_back):(i-1)]
    fore=forecast(fit,y=needed_to_predict,h=1)
    ts[i]=fore$mean+sample(res,1)
    ForHolder[(i-origin)]=ts[i]
  }
  return(ForHolder)
}

# BSSamples = 100000
BSSamples = 1000 # I'm worried 100k will take a really long time, so make it less

# holder = matrix(nrow = BSSamples,ncol = 50)
holder = matrix(nrow = BSSamples,ncol = h.long)

for(i in 1:BSSamples)
{
  # xtNewFor = BSSim(xtTrain,est$res,est$phi,50)
  if ((i %% 100) == 0){
    print(i)
  }
  xtNewFor = BSSim(xtTrain,res,fit.mlp,num_back,h.long)
  holder[i,] = xtNewFor
}

## [1] 100
## [1] 200
## [1] 300
## [1] 400
## [1] 500
## [1] 600
## [1] 700
## [1] 800
## [1] 900
## [1] 1000

# Calculate percentiles for each column
percentiles_per_column <- apply(holder, 2, function(column) {
  quantile(column, probs = c(0.025, 0.975))
})

# Transpose the result for better readability
percentiles_per_column <- t(percentiles_per_column)

# Print the result
print(percentiles_per_column)

##           2.5%    97.5%
##  [1,] 12.89311 13.00056
##  [2,] 12.88194 13.00451
##  [3,] 12.87148 13.02123
##  [4,] 12.85401 13.04448
##  [5,] 12.85060 13.06950
##  [6,] 12.84008 13.08704
##  [7,] 12.83404 13.11585
##  [8,] 12.83586 13.13375
##  [9,] 12.83169 13.15603
## [10,] 12.83207 13.17937
## [11,] 12.83095 13.20530
## [12,] 12.81948 13.21947
## [13,] 12.82531 13.24356
## [14,] 12.82483 13.26146
## [15,] 12.82571 13.27875
## [16,] 12.82621 13.30590
## [17,] 12.83141 13.33041
## [18,] 12.82385 13.34815
## [19,] 12.82350 13.36700
## [20,] 12.83522 13.39219

xtNewForBS = colMeans(holder)

# Forecast 5 years, log scale
xtNewForBS[20] # 13.11631

## [1] 13.11631

percentiles_per_column[20,1] # 12.83522

##     2.5% 
## 12.83522

percentiles_per_column[20,2] # 13.39219

##    97.5% 
## 13.39219

# Forecast 5 years, exp
exp(xtNewForBS[20]) # 496982.7

## [1] 496982.7

exp(percentiles_per_column[20,1]) # 375202.5

##     2.5% 
## 375202.5

exp(percentiles_per_column[20,2]) # 654865.7

##    97.5% 
## 654865.7

# plotts.wge(c(xtTrain-est$res,xtNewForBS))
#plotts.wge(c(fit.mlp$fitted,xtNewForBS),ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1), )
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,1],col = "blue")
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,2],col = "blue")
#lines(seq(lf+1,lf+h.long,1),xt[(l-h.long+1):l],col = "red")

# Alex hard coded seq and changed prediction line to red
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
plot(seq(1,l+h.long,1),xt[1:(l+h.long)], type = 'l', 
     main = 'Multivariate MLP Long Term Future Forecast', 
     xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(min(xt),ymax_future))
lines(seq(199,218,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(199,218,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(199,218,1),xtNewForBS,col = "red")

# Zoomed In
plot(seq(150,218,1),xt[xmin_plot:(l+h.long)], type = 'l',  
     main = 'Multivariate MLP Long Term Future Forecast',
     xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(ymin_plot,ymax_future))
lines(seq(199,218,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(199,218,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(199,218,1),xtNewForBS, col = "red")

LSTM

LSTMs are a type of Recurrent Neural Network (RNN) that have shown success with Time Series data.

The LSTM was created in Python. The code is below.

import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import random

class TimeSeriesDataset(Dataset):
    def __init__(self, data, sequence_length):
        self.data = torch.FloatTensor(data)
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, idx):
        return (
            self.data[idx:idx+self.sequence_length],
            self.data[idx+self.sequence_length]
        )

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True
        )
        
        self.fc = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # Initialize cell state
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.unsqueeze(-1) # Ensure batch_x is 3-D: (batch_size, sequence_length, input_size)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')

# After evaluation
def save_results_to_csv(predictions, actuals, ase, horizon_type="short"):
    results_df = pd.DataFrame({
        'Prediction': predictions,
        'Actual': actuals,
        'Error': [pred - act for pred, act in zip(predictions, actuals)]
    })
    
    # Add summary statistics
    summary_df = pd.DataFrame({
        'Metric': ['ASE'],
        'Value': [ase]
    })
    
    # Save to CSV files
    results_df.to_csv(f'predictions_{horizon_type}_term.csv', index=True)
    summary_df.to_csv(f'metrics_{horizon_type}_term.csv', index=False)

# Set seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Load and preprocess your data
df = pd.read_csv('https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSPUS.csv')
# import pdb
#pdb.set_trace()
data = df['MSPUS'].values[49:246]  # Select rows 49 through 245 inclusive

# After loading data but before normalization
data = np.log(data)  # Apply log transformation

# After loading data but before creating datasets
# Initialize the scaler
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data.reshape(-1, 1)).flatten()

# Define parameters
# sequence_length = 10
sequence_length = 8
# sequence_length = 5
hidden_size = 64
# hidden_size = 128
num_layers = 2
# num_layers = 3
batch_size = 32
# batch_size = 16
num_epochs = 100
learning_rate = 0.01
# learning_rate = 0.005
h_short = 4
h_long = 20

# Create dataset and dataloader
# dataset = TimeSeriesDataset(data, sequence_length)
# train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Split data into train and test
train_data = data_normalized[:-h_short]  # All data except last h_short entries
test_data = data_normalized[-sequence_length-h_short:]  # Last sequence_length + h_short entries

# Create datasets and dataloaders
train_dataset = TimeSeriesDataset(train_data, sequence_length)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TimeSeriesDataset(test_data, sequence_length)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Initialize model, criterion, and optimizer
model = LSTM(input_size=1, hidden_size=hidden_size, num_layers=num_layers)
# model = LSTM(input_size=1, hidden_size=hidden_size, num_layers=num_layers, dropout=0.2)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs)

# Add evaluation mode
def evaluate_model(model, test_loader):
    model.eval()
    predictions = []
    actuals = []
    total_se = 0  # Initialize before the loop
    n = 0  # Initialize before the loop
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x = batch_x.unsqueeze(-1)  # Add this line to ensure 3D input
            output = model(batch_x)
            # Denormalize predictions and actuals
            pred = scaler.inverse_transform(output.numpy())[0][0]
            actual = scaler.inverse_transform(batch_y.numpy().reshape(-1, 1))[0][0]
            predictions.append(pred)
            actuals.append(actual)
            # predictions.append(output.item())
            # actuals.append(batch_y.item())

            # Then un-log transform
            pred = np.exp(pred)
            actual = np.exp(actual)

            # Calculate squared error for this prediction
            se = (pred - actual) ** 2
            total_se += se
            n += 1
            
    ase = total_se / 1e6 / n if n > 0 else 0
    return predictions, actuals, ase
    # return predictions, actuals

# Evaluate on test data
# predictions, actuals = evaluate_model(model, test_loader)
predictions, actuals, ase = evaluate_model(model, test_loader)
print("\nTest Results for short term prediction:")
for i, (pred, actual) in enumerate(zip(predictions, actuals)):
    print(f"Prediction {i+1}: {pred:.2f}, Actual: {actual:.2f}")
print(f"\nAverage Squared Error: {ase:,.2f}")

# Save results
save_results_to_csv(predictions, actuals, ase, "short")

# Also do it with the long term evaluation
train_data = data_normalized[:-h_long]  # All data except last h_long entries
test_data = data_normalized[-sequence_length-h_long:]  # Last sequence_length + h_long entries
train_dataset = TimeSeriesDataset(train_data, sequence_length)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TimeSeriesDataset(test_data, sequence_length)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
train_model(model, train_loader, criterion, optimizer, num_epochs)
predictions, actuals, ase = evaluate_model(model, test_loader)
print("\nTest Results for long term prediction:")
for i, (pred, actual) in enumerate(zip(predictions, actuals)):
    print(f"Prediction {i+1}: {pred:.2f}, Actual: {actual:.2f}")
print(f"\nAverage Squared Error: {ase:,.2f}")
save_results_to_csv(predictions, actuals, ase, "long")

The short term prediction was the following:

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_predictions_short_term.csv"
pred <- read.csv(file_path, header = TRUE)
pred

##   X Prediction   Actual        Error
## 1 0   12.97172 12.98402 -0.012302399
## 2 1   12.97577 12.95560  0.020169258
## 3 2   12.97100 12.96407  0.006934166
## 4 3   12.96965 12.93483  0.034823418

x = fed_housing_data$Median_Sales_Price
l = length(x)
plot(seq(1,l,1),x,type="b")
points(seq(l-h.short+1,l,1),pred$Prediction,type="b",pch=15,col="blue")

The short term ASE was 81.8 M. This was slightly better than the ARIMA model. Note that it is like our other ASEs, where it compares to the unlogged, original Medium Sales Prices.

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_metrics_short_term.csv"
ase <- read.csv(file_path, header = TRUE)
ase

##   Metric    Value
## 1    ASE 81.81499

The long term prediction was the following:

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_predictions_long_term.csv"
pred <- read.csv(file_path, header = TRUE)
pred

##     X Prediction   Actual        Error
## 1   0   12.70576 12.67106  0.034692764
## 2   1   12.70608 12.69802  0.008055687
## 3   2   12.72127 12.70381  0.017460823
## 4   3   12.73490 12.66697  0.067928314
## 5   4   12.72078 12.70046  0.020320892
## 6   5   12.72533 12.73257 -0.007248879
## 7   6   12.74889 12.77987 -0.030979156
## 8   7   12.79208 12.81530 -0.023219109
## 9   8   12.83588 12.88715 -0.051267624
## 10  9   12.89563 12.93362 -0.037988663
## 11 10   12.94953 12.93241  0.017118454
## 12 11   12.96874 12.98929 -0.020545006
## 13 12   12.99935 12.98997  0.009381294
## 14 13   13.01107 13.00042  0.010648727
## 15 14   13.01676 12.96921  0.047543526
## 16 15   12.99711 12.94443  0.052679062
## 17 16   12.96769 12.98402 -0.016334534
## 18 17   12.97404 12.95560  0.018435478
## 19 18   12.96841 12.96407  0.004336357
## 20 19   12.97093 12.93483  0.036101340

plot(seq(1,l,1),x,type="b")
points(seq(l-h.long+1,l,1),pred$Prediction,type="b",pch=15,col="blue")

The long term ASE was 149 M. This was an order of magnitude better than the Signal Plus Noise model, which had the best long term ASE so far. And the plot of the predictions (of the logged data) vs the actual values seems to line up almost exactly.

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_metrics_long_term.csv"
ase <- read.csv(file_path, header = TRUE)
ase

##   Metric    Value
## 1    ASE 149.8804

x = fed_housing_data$Median_Sales_Price
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_predictions_short_term.csv"
pred <- read.csv(file_path, header = TRUE)
plot(seq(xmin_plot,l,1),x[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="LSTM Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.short+1),l,1),pred$Prediction,col="red")

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_predictions_long_term.csv"
pred <- read.csv(file_path, header = TRUE)
plot(seq(xmin_plot,l,1),x[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="LSTM Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.long+1),l,1),pred$Prediction,col="red")

TFT

TFT (Temporal Fusion Transformer) is an algorithm that uses Transformers and Attention to forecast Time Series data. The model is known for being able to do very well with non-linear data, when it has tens of thousands or even hundreds of thousands of training examples. We have less than 1000, so it is unlikely to do well on this dataset.

The TFT was created in Python. The code is below.

# https://unit8co.github.io/darts/generated_api/darts.models.forecasting.tft_model.html#darts.models.forecasting.tft_model.TFTModel

# from darts.datasets import WeatherDataset
from darts.models import TFTModel
from darts import TimeSeries
from darts.explainability.tft_explainer import TFTExplainer
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


# series = WeatherDataset().load()
series = pd.read_csv('https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/fed_housing_data.csv')

# Convert 'Year_Quarter' to datetime
# import pdb
# pdb.set_trace()
# series['Year_Quarter'] = pd.to_datetime(series['Year_Quarter'], format='%Y-Q%q')  # Adjust format as needed
series['Year_Quarter'] = pd.to_datetime(series['Year_Quarter'])

# Convert pandas Series/DataFrames to Darts TimeSeries
# Assuming 'Year_Quarter' is your time index
# First set the index to Year_Quarter
series = series.set_index('Year_Quarter')

# predicting atmospheric pressure
# target = series['p (mbar)'][:100]
h_short = 4
h_long = 20
# import pdb
# pdb.set_trace()
target = TimeSeries.from_dataframe(series[['Median_Sales_Price']][:-h_short])  # All data except last h_short entries

# optionally, past observed rainfall (pretending to be unknown beyond index 100)
# import pdb
# pdb.set_trace()
past_cov = TimeSeries.from_dataframe(series[['Ownership_Rate','Housing_Units_Completed','Supply_New_Houses','Housing_Price_Index']][:-h_short])

# future temperatures (pretending this component is a forecast)
# future_cov = series['T (degC)'][:106]

# by default, TFTModel is trained using a `QuantileRegression` making it a probabilistic forecasting model
# input_chunk_length = 12
input_chunk_length = 8
# n_epochs = 5
# n_epochs = 200
n_epochs = 20
random_state = 42
model = TFTModel(
    # input_chunk_length=8,
    input_chunk_length=input_chunk_length,
    output_chunk_length=h_short,
    hidden_size=128,              # Larger hidden size for more capacity
    lstm_layers=2,                # More LSTM layers
    num_attention_heads=4,        # More attention heads
    dropout=0.1,                  # Some dropout for regularization
    batch_size=16,                # Smaller batch size since we have limited data
    n_epochs=n_epochs,
    # learning_rate=0.001,          # Standard learning rate, ValueError: Invalid model creation parameters. Model `TFTModel` has no args/kwargs `['learning_rate']`
    random_state=random_state
)

# future_covariates are mandatory for `TFTModel`
# import pdb
# pdb.set_trace()
# future_cov = TimeSeries.from_dataframe(series[['Year_Quarter']])
# future_cov = TimeSeries.from_dataframe(series.iloc[:, 0])
future_cov = TimeSeries.from_dataframe(series.iloc[:, [0]])
model.fit(target, past_covariates=past_cov, future_covariates=future_cov)

# TFTModel is probabilistic by definition; using `num_samples >> 1` to generate probabilistic forecasts
# pred = model.predict(h_short, num_samples=1000)

# Get mean prediction
# mean_pred = pred.mean()

# Get quantiles if you want confidence intervals
# quantiles_pred = pred.quantile([0.025, 0.975])  # 95% confidence interval
# import pdb
# pdb.set_trace()
# quantiles_pred = pred.quantile([0.025, 0.975])  # 95% confidence interval
# quantiles_pred_1 = pred.quantile(0.025)
# quantiles_pred_2 = pred.quantile(0.975)

def evaluate_model(model, h):
    """
    Evaluate the TFT model using multiple metrics
    """
    # Make predictions
    # predictions = model.predict(n=h_short, num_samples=100)
    predictions = model.predict(n=h, num_samples=1000)
    mean_predictions = predictions.mean()

    # Get confidence intervals
    # confidence_intervals = predictions.quantiles([0.025, 0.975])
    lower_bound = predictions.quantile(0.025)  # 2.5th percentile
    upper_bound = predictions.quantile(0.975)  # 97.5th percentile
    
    # Get actual values (last h_short values)
    # actuals = target[-h_short:]
    actuals = TimeSeries.from_dataframe(series[['Median_Sales_Price']][-h:])
    
    # Convert to numpy arrays for calculations
    pred_values = mean_predictions.values()
    actual_values = actuals.values()
    lower_values = lower_bound.values()
    upper_values = upper_bound.values()
    
    # Calculate metrics
    total_se = 0
    n = len(pred_values)
    predictions_list = []
    actuals_list = []
    lower_bounds_list = []
    upper_bounds_list = []
    
    for i in range(n):
        pred = pred_values[i][0]  # Assuming single-variable predictions
        actual = actual_values[i][0]
        lower = lower_values[i][0]
        upper = upper_values[i][0]
        
        # Store predictions and actuals
        predictions_list.append(pred)
        actuals_list.append(actual)
        lower_bounds_list.append(lower)
        upper_bounds_list.append(upper)

        # Then un-log transform
        pred = np.exp(pred)
        actual = np.exp(actual)
        
        # Calculate squared error
        se = (pred - actual) ** 2
        total_se += se
    
    # Calculate average squared error
    ase = total_se / 1e6 /n if n > 0 else 0
    
    return predictions_list, actuals_list, lower_bounds_list, upper_bounds_list, ase

# Use the evaluation function
preds, acts, lower_bounds, upper_bounds, ase = evaluate_model(model, h_short)

# Print results
print(f"Average Squared Error for short term prediction: {ase:,.2f}")
print("\nPredictions vs Actuals with 95% Confidence Intervals:")
for i, (p, a, l, u) in enumerate(zip(preds, acts, lower_bounds, upper_bounds)):
    print(f"Period {i+1}:")
    print(f"  Predicted = {p:,.2f} [{l:,.2f}, {u:,.2f}]")
    print(f"  Actual    = {a:,.2f}")
    print(f"  Diff      = {abs(p-a):,.2f}")
    print(f"  Width CI  = {u-l:,.2f}")

# Optional: Calculate additional metrics
# mape_score = mape(target[-h_short:], model.predict(n=h_short).mean())
# print(f"\nMAPE: {mape_score:.2%}")

# After training the model
# static_covariates = None  # If you have any static covariates, include them here

def save_detailed_results(preds, acts, lower_bounds, upper_bounds, ase, model_name="short_term"):
    # Predictions DataFrame
    results_df = pd.DataFrame({
        'Predicted': preds,
        'Actual': acts,
        'Lower_Bound': lower_bounds,
        'Upper_Bound': upper_bounds,
        'Absolute_Error': np.abs(np.array(preds) - np.array(acts)),
        'Squared_Error': (np.array(preds) - np.array(acts))**2,
        'CI_Width': np.array(upper_bounds) - np.array(lower_bounds),
        'Within_CI': (np.array(acts) >= np.array(lower_bounds)) & 
                    (np.array(acts) <= np.array(upper_bounds))
    })
    
    # Calculate summary metrics
    metrics_df = pd.DataFrame({
        'Metric': ['ASE', 'MAE', 'RMSE', 'Mean_CI_Width', 'Coverage_Rate'],
        'Value': [
            ase,
            np.mean(np.abs(np.array(preds) - np.array(acts))),
            np.sqrt(np.mean((np.array(preds) - np.array(acts))**2)),
            np.mean(np.array(upper_bounds) - np.array(lower_bounds)),
            np.mean((np.array(acts) >= np.array(lower_bounds)) & 
                   (np.array(acts) <= np.array(upper_bounds)))
        ]
    })
    
    # Save both files
    results_df.to_csv(f'predictions_detailed_{model_name}.csv', index=True)
    metrics_df.to_csv(f'metrics_detailed_{model_name}.csv', index=False)
    
    print(f"Detailed results saved to predictions_detailed_{model_name}.csv and metrics_detailed_{model_name}.csv")

save_detailed_results(preds, acts, lower_bounds, upper_bounds, ase, model_name="TFT_short_term")

# Get interpretability components
# interpretability = model.interpret_output(
# import pdb
# pdb.set_trace()
# interpretability = model.interpret(target,past_covariates=past_cov,future_covariates=future_cov,static_covariates=static_covariates)
explainer = TFTExplainer(model)
results = explainer.explain()

# Save the variable selection plot
# plt.figure(figsize=(12, 8))
explainer.plot_variable_selection(results)
# plt.savefig('variable_importance_short.png')  # Save to file
# plt.close()  # Close the plot to free memory

# Try with long term predictions as well
# import pdb
# pdb.set_trace()
target = TimeSeries.from_dataframe(series[['Median_Sales_Price']][:-h_long])
past_cov = TimeSeries.from_dataframe(series[['Ownership_Rate','Housing_Units_Completed','Supply_New_Houses','Housing_Price_Index']][:-h_long])
model = TFTModel(
    # input_chunk_length=8,
    input_chunk_length=input_chunk_length,
    output_chunk_length=h_long,
    hidden_size=128,              # Larger hidden size for more capacity
    lstm_layers=2,                # More LSTM layers
    num_attention_heads=4,        # More attention heads
    dropout=0.1,                  # Some dropout for regularization
    batch_size=16,                # Smaller batch size since we have limited data
    n_epochs=n_epochs,
    # learning_rate=0.001,        # Standard learning rate, ValueError: Invalid model creation parameters. Model `TFTModel` has no args/kwargs `['learning_rate']`
    random_state=random_state
)
model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
preds, acts, lower_bounds, upper_bounds, ase = evaluate_model(model, h_long)
print(f"Average Squared Error for long term prediction: {ase:,.2f}")
print("\nPredictions vs Actuals with 95% Confidence Intervals:")
for i, (p, a, l, u) in enumerate(zip(preds, acts, lower_bounds, upper_bounds)):
    print(f"Period {i+1}:")
    print(f"  Predicted = {p:,.2f} [{l:,.2f}, {u:,.2f}]")
    print(f"  Actual    = {a:,.2f}")
    print(f"  Diff      = {abs(p-a):,.2f}")
    print(f"  Width CI  = {u-l:,.2f}")
save_detailed_results(preds, acts, lower_bounds, upper_bounds, ase, model_name="TFT_long_term")
explainer = TFTExplainer(model)
results = explainer.explain()
explainer.plot_variable_selection(results)

The short term prediction was the following:

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/predictions_detailed_TFT_short_term.csv"
pred <- read.csv(file_path, header = TRUE)
pred

##   X Predicted   Actual Lower_Bound Upper_Bound Absolute_Error Squared_Error
## 1 0  12.66963 12.98402    11.89675    13.02597      0.3143873    0.09883936
## 2 1  12.66474 12.95560    11.81557    13.04598      0.2908556    0.08459696
## 3 2  12.66228 12.96407    11.70568    13.03513      0.3017937    0.09107942
## 4 3  12.68728 12.93483    11.96067    13.03658      0.2475483    0.06128015
##   CI_Width Within_CI
## 1 1.129227      True
## 2 1.230405      True
## 3 1.329448      True
## 4 1.075911      True

h.short = 4
x = fed_housing_data$Median_Sales_Price
l = length(x)
plot(seq(1,l,1),x,type="b")
points(seq(l-h.short+1,l,1),pred$Predicted,type="b",pch=15,col="blue")
lines(seq(l-h.short+1,l,1),pred$Lower_Bound,lty=2, col="red", lwd=2)
lines(seq(l-h.short+1,l,1),pred$Upper_Bound,lty=2, col="red", lwd=2)

The short term ASE was 11.5 B. This was significantly worse than other models. The poor results aren’t unexpected though, as there were so few data points to train on.

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/metrics_detailed_TFT_short_term.csv"
metrics <- read.csv(file_path, header = TRUE)
metrics # 1146

##          Metric        Value
## 1           ASE 1.145680e+04
## 2           MAE 2.886462e-01
## 3          RMSE 2.897395e-01
## 4 Mean_CI_Width 1.191248e+00
## 5 Coverage_Rate 1.000000e+00

The long term prediction was the following:

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/predictions_detailed_TFT_long_term.csv"
pred <- read.csv(file_path, header = TRUE)
pred

##     X Predicted   Actual Lower_Bound Upper_Bound Absolute_Error Squared_Error
## 1   0  12.43832 12.67106    12.21089    12.75497      0.2327429    0.05416924
## 2   1  12.47469 12.69802    12.23497    12.76391      0.2233344    0.04987824
## 3   2  12.49115 12.70381    12.24657    12.83910      0.2126641    0.04522604
## 4   3  12.48697 12.66697    12.24163    12.80761      0.1800009    0.03240032
## 5   4  12.49683 12.70046    12.24852    12.83739      0.2036384    0.04146860
## 6   5  12.49251 12.73257    12.24345    12.83757      0.2400679    0.05763260
## 7   6  12.49833 12.77987    12.25592    12.81032      0.2815470    0.07926873
## 8   7  12.50507 12.81529    12.25759    12.82373      0.3102269    0.09624075
## 9   8  12.49876 12.88715    12.25309    12.81391      0.3883918    0.15084819
## 10  9  12.50641 12.93362    12.27013    12.77429      0.4272105    0.18250879
## 11 10  12.51155 12.93241    12.26884    12.82441      0.4208647    0.17712707
## 12 11  12.51987 12.98929    12.26915    12.78959      0.4694197    0.22035481
## 13 12  12.53635 12.98997    12.27686    12.85769      0.4536235    0.20577428
## 14 13  12.53622 13.00042    12.27722    12.84829      0.4642035    0.21548488
## 15 14  12.54577 12.96921    12.28666    12.87724      0.4234453    0.17930589
## 16 15  12.54831 12.94443    12.28880    12.87036      0.3961252    0.15691519
## 17 16  12.54871 12.98402    12.28873    12.83241      0.4353107    0.18949542
## 18 17  12.55493 12.95560    12.29005    12.86720      0.4006676    0.16053455
## 19 18  12.55411 12.96407    12.29579    12.89488      0.4099561    0.16806402
## 20 19  12.55270 12.93483    12.29220    12.84341      0.3821264    0.14602056
##     CI_Width Within_CI
## 1  0.5440809      True
## 2  0.5289398      True
## 3  0.5925264      True
## 4  0.5659723      True
## 5  0.5888688      True
## 6  0.5941170      True
## 7  0.5544005      True
## 8  0.5661477      True
## 9  0.5608154     False
## 10 0.5041576     False
## 11 0.5555715     False
## 12 0.5204439     False
## 13 0.5808367     False
## 14 0.5710670     False
## 15 0.5905744     False
## 16 0.5815662     False
## 17 0.5436749     False
## 18 0.5771557     False
## 19 0.5990881     False
## 20 0.5512100     False

h.long=20
x = fed_housing_data$Median_Sales_Price
l = length(x)
plot(seq(1,l,1),x,type="b")
points(seq(l-h.long+1,l,1),pred$Predicted,type="b",pch=15,col="blue")
lines(seq(l-h.long+1,l,1),pred$Lower_Bound,lty=2, col="red", lwd=2)
lines(seq(l-h.long+1,l,1),pred$Upper_Bound,lty=2, col="red", lwd=2)

The long term ASE was 15.0 B. Similar to the short term predictions, this model did very poorly compared to the other models.

file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/metrics_detailed_TFT_long_term.csv"
metrics <- read.csv(file_path, header = TRUE)
metrics # 1146

##          Metric        Value
## 1           ASE 1.498351e+04
## 2           MAE 3.477784e-01
## 3          RMSE 3.611591e-01
## 4 Mean_CI_Width 5.635607e-01
## 5 Coverage_Rate 4.000000e-01

Time Series Final Project

Time Series Project

Aaron Abromowitz and Alex Thibeaux

2024-11-16

Libraries

Load Data

EDA

Median Housing Sale Price

Home Ownership Rate

Housing Units Completed

Supply of New Houses

House Price Index

AR(I)MA Model Investigation

First ARIMA Model

ARMA Model

ARIMA(p,2,q) model

Exogenous Variable Forecasts

Home Ownership Rate

Housing Units Completed

Supply of New Houses

House price index

Signal Plus Noise Model

Test for linear Trend

Choosing estimates for the ARMA portion of the Signal Plus Noise Model

Signal Plus Noise using 1 Yr Horizon

Signal Plus Noise using 5 Yr Horizon

Rolling Window RMSE for SPN

Evaluating Models

Model Choice

Multi-variate Models

MLR Model

VAR Model

Neural Networks

Multilayer Perceptron

Confidence Intervals for Univariate Model

Future Forecast

CI for Future Forecast

LSTM

TFT