Time Series Final Project
Project investigating Housing Price Data
<!DOCTYPE html>
Time Series Project
Aaron Abromowitz and Alex Thibeaux
2024-11-16
Libraries
library(tidyverse)
library(tswge)
library(vars)
library(lubridate)
library(vars)
library(nnfor)
library(caret)
Load Data
# Variable of Interest - Quarterly from 1/1/63
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSPUS.csv"
mhp <- read.csv(file_path, header = TRUE)
# Home ownership rate - Quarterly from 1/1/65
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/RHORUSQ156N.csv"
hor <- read.csv(file_path, header = TRUE)
# Housing units completed - Monthly from 1/1/1968
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/COMPUTSA.csv"
huc <- read.csv(file_path, header = TRUE)
# Supply of new houses - Monthly from 1/1/1963
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSACSR.csv"
snh <- read.csv(file_path, header = TRUE)
# House price index - Quarterly from 1/1/1975
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/USSTHPI.csv"
hpi <- read.csv(file_path, header = TRUE)
# Converting Monthly Data to Quarterly Data
# Preserve Monthly format
snh_monthly <- snh
huc_monthly <- huc
# Supply of New Houses Variable
snh$DATE = as.Date(snh$DATE)
snh$month <- month(snh$DATE)
head(snh)
## DATE MSACSR month
## 1 1963-01-01 4.7 1
## 2 1963-02-01 6.6 2
## 3 1963-03-01 6.4 3
## 4 1963-04-01 5.3 4
## 5 1963-05-01 5.1 5
## 6 1963-06-01 6.0 6
snh_quarterly <- snh %>%
filter(snh$month == 1 | snh$month == 4 | snh$month == 7 | snh$month == 10)
summary(snh_quarterly)
## DATE MSACSR month
## Min. :1963-01-01 Min. : 3.300 Min. : 1.000
## 1st Qu.:1978-05-16 1st Qu.: 4.900 1st Qu.: 2.500
## Median :1993-10-01 Median : 5.800 Median : 4.000
## Mean :1993-09-30 Mean : 6.117 Mean : 5.482
## 3rd Qu.:2009-02-15 3rd Qu.: 6.950 3rd Qu.: 7.000
## Max. :2024-07-01 Max. :12.200 Max. :10.000
# Housing Units Completed Variable
huc$DATE = as.Date(huc$DATE)
huc$month <- month(huc$DATE)
head(huc)
## DATE COMPUTSA month
## 1 1968-01-01 1257 1
## 2 1968-02-01 1174 2
## 3 1968-03-01 1323 3
## 4 1968-04-01 1328 4
## 5 1968-05-01 1367 5
## 6 1968-06-01 1184 6
huc_quarterly <- huc %>%
filter(huc$month == 1 | huc$month == 4 | huc$month == 7 | huc$month == 10)
summary(huc_quarterly)
## DATE COMPUTSA month
## Min. :1968-01-01 Min. : 520 Min. : 1.00
## 1st Qu.:1982-02-15 1st Qu.:1196 1st Qu.: 2.50
## Median :1996-04-01 Median :1389 Median : 4.00
## Mean :1996-03-31 Mean :1394 Mean : 5.48
## 3rd Qu.:2010-05-16 3rd Qu.:1632 3rd Qu.: 7.00
## Max. :2024-07-01 Max. :2195 Max. :10.00
# Using same time frames, which would be starting at 1975 Q1 and ending at 2024 Q2 (due to hpi data)
# hor observation 41 is 1975 Q1
hor_1975 = hor[41:238,]
hor_1975$DATE <- as.Date(hor_1975$DATE)
summary(hor_1975)
## DATE RHORUSQ156N
## Min. :1975-01-01 Min. :62.90
## 1st Qu.:1987-04-23 1st Qu.:64.20
## Median :1999-08-16 Median :65.15
## Mean :1999-08-16 Mean :65.53
## 3rd Qu.:2011-12-09 3rd Qu.:66.58
## Max. :2024-04-01 Max. :69.20
# huc_quarterlly observation 29
huc_1975 = huc_quarterly[29:226,]
summary(huc_1975)
## DATE COMPUTSA month
## Min. :1975-01-01 Min. : 520 Min. : 1.00
## 1st Qu.:1987-04-23 1st Qu.:1136 1st Qu.: 1.75
## Median :1999-08-16 Median :1378 Median : 4.00
## Mean :1999-08-16 Mean :1355 Mean : 5.47
## 3rd Qu.:2011-12-09 3rd Qu.:1599 3rd Qu.: 7.00
## Max. :2024-04-01 Max. :2071 Max. :10.00
# mhp observation 49 is 1975 Q1
mhp_1975 = mhp[49:246,]
mhp_1975$DATE <- as.Date(mhp_1975$DATE)
summary(mhp_1975)
## DATE MSPUS
## Min. :1975-01-01 Min. : 38100
## 1st Qu.:1987-04-23 1st Qu.:104050
## Median :1999-08-16 Median :161150
## Mean :1999-08-16 Mean :185330
## 3rd Qu.:2011-12-09 3rd Qu.:247350
## Max. :2024-04-01 Max. :442600
# snh_quarterly observation 49 is 1975 Q1
snh_1975 = snh_quarterly[49:246,]
summary(snh_1975)
## DATE MSACSR month
## Min. :1975-01-01 Min. : 3.300 Min. : 1.00
## 1st Qu.:1987-04-23 1st Qu.: 4.900 1st Qu.: 1.75
## Median :1999-08-16 Median : 6.000 Median : 4.00
## Mean :1999-08-16 Mean : 6.167 Mean : 5.47
## 3rd Qu.:2011-12-09 3rd Qu.: 7.075 3rd Qu.: 7.00
## Max. :2024-04-01 Max. :12.200 Max. :10.00
# Housing Price Index variable already from 1975 Q1 - 2024 Q2
hpi$DATE <- as.Date(hpi$DATE)
summary(hpi)
## DATE USSTHPI
## Min. :1975-01-01 Min. : 60.03
## 1st Qu.:1987-04-23 1st Qu.:145.09
## Median :1999-08-16 Median :224.69
## Mean :1999-08-16 Mean :260.08
## 3rd Qu.:2011-12-09 3rd Qu.:351.31
## Max. :2024-04-01 Max. :682.18
# Create Dataframe
# Combined (Train/Test) Set NOT LOGGED
fed_housing_data_NL = data.frame(Year_Quarter = mhp_1975$DATE, Ownership_Rate = hor_1975$RHORUSQ156N, Housing_Units_Completed = huc_1975$COMPUTSA, Supply_New_Houses = snh_1975$MSACSR, Housing_Price_Index = hpi$USSTHPI, Median_Sales_Price = mhp_1975$MSPUS)
# Combined (Train/Test) Set LOGGED
fed_housing_data = data.frame(Year_Quarter = as.Date(mhp_1975$DATE), Ownership_Rate = hor_1975$RHORUSQ156N, Housing_Units_Completed = huc_1975$COMPUTSA, Supply_New_Houses = snh_1975$MSACSR, Housing_Price_Index = hpi$USSTHPI, Median_Sales_Price = log(mhp_1975$MSPUS))
# Train & Test sets
train = fed_housing_data[1:168,]
test = fed_housing_data[169:198,]
summary(fed_housing_data)
## Year_Quarter Ownership_Rate Housing_Units_Completed Supply_New_Houses
## Min. :1975-01-01 Min. :62.90 Min. : 520 Min. : 3.300
## 1st Qu.:1987-04-23 1st Qu.:64.20 1st Qu.:1136 1st Qu.: 4.900
## Median :1999-08-16 Median :65.15 Median :1378 Median : 6.000
## Mean :1999-08-16 Mean :65.53 Mean :1355 Mean : 6.167
## 3rd Qu.:2011-12-09 3rd Qu.:66.58 3rd Qu.:1599 3rd Qu.: 7.075
## Max. :2024-04-01 Max. :69.20 Max. :2071 Max. :12.200
## Housing_Price_Index Median_Sales_Price
## Min. : 60.03 Min. :10.55
## 1st Qu.:145.09 1st Qu.:11.55
## Median :224.69 Median :11.99
## Mean :260.08 Mean :11.95
## 3rd Qu.:351.31 3rd Qu.:12.42
## Max. :682.18 Max. :13.00
# plotting variables
xmin_plot = 150
ymin_plot = 12.3
ymax_plot = 13.1
ymax_future = 13.4
EDA
Median Housing Sale Price
Our variable of interest is the Median Housing Sale Price. This was quarterly data starting in 1965.
mhp=mhp$MSPUS
plot(ts(mhp/1000, frequency=4,start=c(1965,1)),xlab='Year',ylab='Housing price (in thousands of dollars)')
title(main='Median US Housing Price from 1965')
acf(mhp,ylab='',main='ACF')
x = parzen.wge(mhp)
The realization looks like it is increasing linearly. This data could be model with a signal plus noise, with a linear signal.
The realization appears to be non stationary. There is evidence that the data is increasing over time. In addition, the variation appears much higher in later years as in earlier years, showing that variance is increasing over time as well. When modeling, it may be useful to take the logarithm of the data before modeling.
The ACF shows very slowly dampening autocorrelations. The parzen window also shows a very low frequency. This could points to a (1-B) term which could be removed with a high-pass difference filter.
To fix the problem with the increasing variance. We decided to take the log of the Mean Housing Price.
plot(ts(log(mhp), frequency=4,start=c(1965,1)),xlab='Year',ylab='Log Housing price')
title(main='Log Median US Housing Price from 1965')
Home Ownership Rate
We used 4 exogenous variables. The first was home ownership rate. Like the variable of interest, this was quarterly data starting from 1965.
hor <- hor$RHORUSQ156N
plot(ts(hor, frequency=4,start=c(1965,1)),xlab='Year',ylab='Percentage')
title(main='Home Ownership Rate from 1965')
acf(hor,ylab='',main='ACF')
x = parzen.wge(hor)
Unlike the median house price, the home ownership rate appears to be stationary. There isn’t a linear trend to the data, and the variance seems to be constant over time.
The ACF does show that there are dampening autocorrelations, but not quite at the rate as for median house price. This could point to an ARMA model being appropriate for the home ownership rate.
Similar to median house price, the Parzen Window shows that 0 is a prominent frequency.
Housing Units Completed
The next exogenous variable is Housing Units Completed. This was a monthly variable, starting from 1968.
huc <- huc$COMPUTSA
plot(ts(huc, frequency=12,start=c(1968,1)),xlab='Year',ylab='')
title(main='Housing Units Completed from 1968')
acf(huc,ylab='',main='ACF')
x = parzen.wge(huc)
This has very similar characteristics to home ownership rate. It’s autocorrelations dampen a little more slowly, but the ACF and Parzen Window look very similar. This variable likewise shows evidence for being stationary.
Supply of New Houses
snh <- snh$MSACSR
plot(ts(snh, frequency=12,start=c(1963,1)),xlab='Year',ylab='')
title(main='Supply of New Houses from 1963')
acf(snh,ylab='',main='ACF')
x = parzen.wge(snh)
Similar to home ownership rate as well, but with even more quickly dampening autocorrelations. This variable likewise shows evidence for being stationary.
House Price Index
The next exogenous variable is House Price Index. This was a quarterly variable, starting from 1975.
hpi <- hpi$USSTHPI
plot(ts(hpi, frequency=4,start=c(1975,1)),xlab='Year',ylab='')
title(main='House Price Index from 1975')
acf(hpi,ylab='',main='ACF')
x = parzen.wge(hpi)
The realization has a similar upward trend to the Housing Sale Price. It seems very smooth though, very little variance. Because of the upward trend though, it shows evidence against stationarity.
The ACF and Parzen Window are also similar to those of Housing Sale Price.
Not all the data matches up by date range or by frequency of observation (monthly vs quarterly). To make everything consistent, we will only use quarterly data going from Q1 1975 to Q2 2024.
AR(I)MA Model Investigation
First ARIMA Model
The first model we looked at was an ARIMA model. From the EDA, we saw that because of the increasing variance, we should look at the log of the Housing Sales Price. And since the Parzen Window had a prominent peak at 0 and very slowly dampening autocorrelations, we should take the first difference.
log.mhp = fed_housing_data$Median_Sales_Price
mhp = exp(log.mhp)
d.log.mhp = artrans.wge(log.mhp,1)
The difference looks more like white noise without an obvious pattern to the ACF. Next we will try modeling the differenced data as an ARMA model.
aic5.wge(d.log.mhp,p=0:6,q=0:2,type='aic')
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 1 2 -7.188559
## 2 2 -7.180263
## 5 2 -7.174131
## 3 2 -7.170135
## 6 2 -7.167230
aic5.wge(d.log.mhp,p=0:6,q=0:2,type='bic')
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 1 2 -7.121895
## 2 2 -7.096933
## 0 2 -7.092570
## 3 0 -7.087997
## 1 0 -7.087827
Both the AIC and BIC selection choose p = 1 and q = 2.
We will evaluate our models with a short term horizon of 1 year (4 quarters) and a long term horizon of 5 years (20 quarters). We calcluated a Rolling Window RMSE for both of those horizons (using the logged data) and calculated an ASE for both of those horizons as well (taking the exponential to get back to the original data).
h.short = 4
h.long = 20
l = length(mhp)
est = est.arma.wge(d.log.mhp,p=1,q=2)
##
##
## Coefficients of AR polynomial:
## 0.6901
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.6901B 1.4490 0.6901 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## 0.9720 -0.4007
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.9720B+0.4007B^2 1.2128+-1.0122i 0.6330 0.1107
##
##
f = fore.arima.wge(log.mhp,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254
ase = mean((mhp[(l-h.short+1):l]-exp(f$f))^2)/1e6
ase # 84.37902
## [1] 84.37902
f = fore.arima.wge(log.mhp,d=1,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254
ase = mean((mhp[(l-h.long+1):l]-exp(f$f))^2)/1e6
ase # 7091.032
## [1] 7091.032
r = roll.win.rmse.wge(log.mhp,h.short,d=1,phi=est$phi,theta=est$theta) # 0.036501
r$rwRMSE
## [1] 0.03650146
r = roll.win.rmse.wge(log.mhp,h.long,d=1,phi=est$phi,theta=est$theta) # 0.13269
r$rwRMSE
## [1] 0.1326872
For the ARIMA(1,1,2) model we get a short term Rolling Window RMSE of 0.037, a long term Rolling Window RMSE of 0.133, a short term ASE of 84.4 million and a long term ASE of 7.09 billion.
ARMA Model
As a comparison, we wanted to look a simpler ARMA model.
aic5.wge(d.log.mhp,p=0:6,q=0:4,type='aic')
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 1 2 -7.188559
## 2 2 -7.180263
## 5 2 -7.174131
## 3 2 -7.170135
## 3 4 -7.168648
aic5.wge(d.log.mhp,p=0:6,q=0:4,type='bic')
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 1 2 -7.121895
## 0 3 -7.098442
## 2 2 -7.096933
## 0 2 -7.092570
## 3 0 -7.087997
Both the AIC and BIC selection choose p = 2 and q = 2.
est = est.arma.wge(log.mhp,p=2,q=2)
##
##
## Coefficients of AR polynomial:
## 1.8974 -0.8977
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.9972B 1.0028 0.9972 0.0000
## 1-0.9002B 1.1109 0.9002 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## 1.1064 -0.4030
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-1.1064B+0.4030B^2 1.3725+-0.7728i 0.6349 0.0816
##
##
f = fore.arma.wge(log.mhp,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
ase = mean((mhp[(l-h.short+1):l]-exp(f$f))^2)/1e6
ase # 112.4314
## [1] 112.4314
f = fore.arma.wge(log.mhp,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
ase = mean((mhp[(l-h.long+1):l]-exp(f$f))^2)/1e6
ase # 9167.59
## [1] 9167.59
r = roll.win.rmse.wge(log.mhp,h.short,phi=est$phi,theta=est$theta) # 0.044488
r$rwRMSE
## [1] 0.04448757
r = roll.win.rmse.wge(log.mhp,h.long,phi=est$phi,theta=est$theta) # 0.12779
r$rwRMSE
## [1] 0.1277888
For the ARMA(2,2) model we get a short term Rolling Window RMSE of 0.044, a long term Rolling Window RMSE of 0.128, a short term ASE of 112.4 million and a long term ASE of 9.17 billion. Even though this model has a better long term Rolling Window RMSE than the ARIMA model, it did worse in the other metrics.
ARIMA(p,2,q) model
Looking at the overfit factor table, we can see two prominent 0 frequency roots.
est = est.ar.wge(log.mhp,p=12)
##
##
## Coefficients of AR polynomial:
## 0.7873 0.3941 0.1201 -0.0790 -0.0881 -0.1315 -0.0324 0.0971 0.0282 -0.1868 0.0902 0.0005
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.9992B 1.0008 0.9992 0.0000
## 1-0.9128B 1.0955 0.9128 0.0000
## 1+1.0481B+0.7252B^2 -0.7226+-0.9256i 0.8516 0.3555
## 1-0.0214B+0.7099B^2 0.0151+-1.1868i 0.8426 0.2480
## 1+1.5907B+0.6978B^2 -1.1398+-0.3660i 0.8353 0.4505
## 1-0.9244B+0.4855B^2 0.9519+-1.0740i 0.6968 0.1346
## 1-0.5740B 1.7420 0.5740 0.0000
## 1+0.0058B -173.5007 0.0058 0.5000
##
##
factor.wge(phi=est$phi)
##
##
## Coefficients of AR polynomial:
## 0.7873 0.3941 0.1201 -0.0790 -0.0881 -0.1315 -0.0324 0.0971 0.0282 -0.1868 0.0902 0.0005
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.9992B 1.0008 0.9992 0.0000
## 1-0.9128B 1.0955 0.9128 0.0000
## 1+1.0481B+0.7252B^2 -0.7226+-0.9256i 0.8516 0.3555
## 1-0.0214B+0.7099B^2 0.0151+-1.1868i 0.8426 0.2480
## 1+1.5907B+0.6978B^2 -1.1398+-0.3660i 0.8353 0.4505
## 1-0.9244B+0.4855B^2 0.9519+-1.0740i 0.6968 0.1346
## 1-0.5740B 1.7420 0.5740 0.0000
## 1+0.0058B -173.5007 0.0058 0.5000
##
##
Even though the differenced data didn’t have slowly dampening autocorrelations and looked liked white noise, the original data had a pretty clear upward trend. We thought that this warranted investigation into an ARIMA(p,2,q) model.
d2.log.mhp = artrans.wge(d.log.mhp,1)
aic5.wge(d.log.mhp,p=0:4,q=0:2,type='aic')
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 1 2 -7.188559
## 2 2 -7.180263
## 3 2 -7.170135
## 3 1 -7.166652
## 4 0 -7.164881
aic5.wge(d.log.mhp,p=0:4,q=0:2,type='bic')
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 1 2 -7.121895
## 2 2 -7.096933
## 0 2 -7.092570
## 3 0 -7.087997
## 1 0 -7.087827
Both the AIC and BIC selection choose p = 1 and q = 1.
est = est.arma.wge(d2.log.mhp,p=1,q=1)
##
##
## Coefficients of AR polynomial:
## -0.3365
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+0.3365B -2.9722 0.3365 0.5000
##
##
##
##
## Coefficients of MA polynomial:
## 0.8508
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.8508B 1.1753 0.8508 0.0000
##
##
f = fore.arima.wge(log.mhp,d=2,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma -0.02848876 0.06515941 -0.02191816 -0.00591316 -0.02767201 0.01995818 -0.007043221 0.0372058 -0.05668252 0.05783844 -0.02902112 0.01571075 -0.02811809 0.03603873 -0.02364418 0.01366843 -0.01538544 -0.05803636 0.05041521 -0.01272076 0.00926602 0.008884892 -0.01684341 0.03217776 -0.04106979 0.02007841 -0.07568861 0.1055637 -0.05138717 0.03696983 -0.009184633 -0.001872253 0.01123961 -0.05240299 0.04942306 0.001615965 -0.02775835 -0.01738388 0.04932551 -0.01769841 -0.03108832 0.05549379 -0.02862908 0.03180794 -0.03581358 0.01155285 0.008792259 0.02458875 -0.02982428 0.02575136 -0.06412972 0.01354423 0.04445176 -0.05406302 0.04497501 -0.02776557 0.00161076 0.03001177 -0.04645838 0.03037392 -0.1035734 0.1181774 -0.05016285 0.01158884 0.001667361 -0.0008336807 -0.004175371 0.008350743 -0.004175371 0.04879016 -0.05675833 0.02384152 -0.01587335 0 0.02334736 -0.02334736 -0.002310359 0.01988819 -0.0328453 0.04482627 -0.04385013 0.05874309 -0.05172452 0.02821972 -0.02023241 0.02815054 -0.02263884 -0.0007241623 -0.01100415 -3.044042e-05 0.05952674 -0.07189327 0.04104058 -0.02314153 0.02835241 -0.02012712 -0.005707984 0.03571176 -0.03822907 -0.01278556 0.0465237 -0.009739329 -0.04209093 0.07085665 -0.0897531 0.02883951 0.1060593 -0.1058912 -0.04185149 0.1150374 -0.08700863 0.05251006 -0.03018525 0.03480365 0.03225854 -0.04480764 -0.04179748 0.08823311 -0.05316946 -0.01089395 0.006339027 0.01851521 -0.01331145 -0.02235883 -0.03874682 0.08516893 0.006987682 -0.1086092 0.05921457 -0.01250811 -0.004895259 0.02502388 -0.04408397 0.02029851 -0.04764995 0.1237186 -0.08858401 0.05202804 -0.004043317 -0.03302252 0.03611116 -0.01984809 0.01063289 -0.006250213 -0.02564745 0.009576389 0.08613117 -0.07407725 0.04018423 -0.02985329 0.01468233 0.0105804 -0.04923649 0.04507758 -0.02686144 0.03963145 -0.07006818 0.08636021 -0.09474498 0.03264474 0.02325674 -0.0005131741 -0.03136342 0.02943521 -0.02768503 0.03031724 -0.01605042 0.009106176 -0.008955327 0.04566556 -0.07108533 -0.03183917 0.09739741 -0.0721239 -0.006046467 0.06072969 -0.04269463 0.03975216 -0.02116566 -0.04263239 0.07033207 -0.001380716 0.01518753 -0.0118768 0.03643113 -0.02537865 -0.04768247 0.05808469 -0.05619106 0.009762351 -0.04165703 0.00642948 0.06436829 -0.06800853 0.03689091 -0.03771318
ase = mean((mhp[(l-h.short+1):l]-exp(f$f))^2)/1e6
ase # 162.1383
## [1] 162.1383
f = fore.arima.wge(log.mhp,d=2,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma -0.02848876 0.06515941 -0.02191816 -0.00591316 -0.02767201 0.01995818 -0.007043221 0.0372058 -0.05668252 0.05783844 -0.02902112 0.01571075 -0.02811809 0.03603873 -0.02364418 0.01366843 -0.01538544 -0.05803636 0.05041521 -0.01272076 0.00926602 0.008884892 -0.01684341 0.03217776 -0.04106979 0.02007841 -0.07568861 0.1055637 -0.05138717 0.03696983 -0.009184633 -0.001872253 0.01123961 -0.05240299 0.04942306 0.001615965 -0.02775835 -0.01738388 0.04932551 -0.01769841 -0.03108832 0.05549379 -0.02862908 0.03180794 -0.03581358 0.01155285 0.008792259 0.02458875 -0.02982428 0.02575136 -0.06412972 0.01354423 0.04445176 -0.05406302 0.04497501 -0.02776557 0.00161076 0.03001177 -0.04645838 0.03037392 -0.1035734 0.1181774 -0.05016285 0.01158884 0.001667361 -0.0008336807 -0.004175371 0.008350743 -0.004175371 0.04879016 -0.05675833 0.02384152 -0.01587335 0 0.02334736 -0.02334736 -0.002310359 0.01988819 -0.0328453 0.04482627 -0.04385013 0.05874309 -0.05172452 0.02821972 -0.02023241 0.02815054 -0.02263884 -0.0007241623 -0.01100415 -3.044042e-05 0.05952674 -0.07189327 0.04104058 -0.02314153 0.02835241 -0.02012712 -0.005707984 0.03571176 -0.03822907 -0.01278556 0.0465237 -0.009739329 -0.04209093 0.07085665 -0.0897531 0.02883951 0.1060593 -0.1058912 -0.04185149 0.1150374 -0.08700863 0.05251006 -0.03018525 0.03480365 0.03225854 -0.04480764 -0.04179748 0.08823311 -0.05316946 -0.01089395 0.006339027 0.01851521 -0.01331145 -0.02235883 -0.03874682 0.08516893 0.006987682 -0.1086092 0.05921457 -0.01250811 -0.004895259 0.02502388 -0.04408397 0.02029851 -0.04764995 0.1237186 -0.08858401 0.05202804 -0.004043317 -0.03302252 0.03611116 -0.01984809 0.01063289 -0.006250213 -0.02564745 0.009576389 0.08613117 -0.07407725 0.04018423 -0.02985329 0.01468233 0.0105804 -0.04923649 0.04507758 -0.02686144 0.03963145 -0.07006818 0.08636021 -0.09474498 0.03264474 0.02325674 -0.0005131741 -0.03136342 0.02943521 -0.02768503 0.03031724 -0.01605042 0.009106176 -0.008955327 0.04566556 -0.07108533 -0.03183917 0.09739741 -0.0721239 -0.006046467 0.06072969 -0.04269463 0.03975216 -0.02116566 -0.04263239 0.07033207 -0.001380716 0.01518753 -0.0118768 0.03643113 -0.02537865 -0.04768247 0.05808469 -0.05619106 0.009762351 -0.04165703 0.00642948 0.06436829 -0.06800853 0.03689091 -0.03771318
ase = mean((mhp[(l-h.long+1):l]-exp(f$f))^2)/1e6
ase # 6263.556
## [1] 6263.556
r = roll.win.rmse.wge(log.mhp,h.short,d=2,phi=est$phi,theta=est$theta) # 0.049161
r$rwRMSE
## [1] 0.0491605
r = roll.win.rmse.wge(log.mhp,h.long,d=2,phi=est$phi,theta=est$theta) # 0.1809277
r$rwRMSE
## [1] 0.1809277
For the ARIMA(1,2,1) model we get a short term Rolling Window RMSE of 0.049, a long term Rolling Window RMSE of 0.181, a short term ASE of 162.4 million and a long term ASE of 6.26 billion. Even though this model had the best long term ASE so far, it did worse in the other metrics.
Comparing all three ARMA/ARIMA models, we will choose the ARIMA(1,1,2). Here are what the short and long term forecasts look like zoomed in.
h.short = 4
h.long = 20
x = fed_housing_data$Median_Sales_Price
l = length(x)
d = artrans.wge(x,1)
dev.off()
## null device
## 1
est = est.arma.wge(d,p=1,q=2)
##
##
## Coefficients of AR polynomial:
## 0.6901
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.6901B 1.4490 0.6901 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## 0.9720 -0.4007
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.9720B+0.4007B^2 1.2128+-1.0122i 0.6330 0.1107
##
##
f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE,plot=FALSE)
## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254
plot(seq(xmin_plot,l,1),x[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="ARIMA(1,1,2) Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.short+1),l,1),f$f,col="red")
lines(seq((l-h.short+1),l,1),f$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l-h.short+1),l,1),f$ul,col="blue",lty=3)
f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE,plot=FALSE)
## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254
plot(seq(xmin_plot,l,1),x[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="ARIMA(1,1,2) Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.long+1),l,1),f$f,col="red")
lines(seq((l-h.long+1),l,1),f$ll,col="blue",lty=3)
lines(seq((l-h.long+1),l,1),f$ul,col="blue",lty=3)
est = est.arma.wge(d.log.mhp,p=1,q=2)
##
##
## Coefficients of AR polynomial:
## 0.6901
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.6901B 1.4490 0.6901 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## 0.9720 -0.4007
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.9720B+0.4007B^2 1.2128+-1.0122i 0.6330 0.1107
##
##
f = fore.arima.wge(log.mhp,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE,plot=FALSE)
## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254
aic = est$aic
aic # -7.188559
## [1] -7.188559
resid = f$resid
xbar=est$xbar
xbar # 0.01211603
## [1] 0.01211603
vara = est$avar
vara # 0.0007251235
## [1] 0.0007251235
est$phi # 0.6901326
## [1] 0.6901326
est$theta # 0.9720137 -0.4007360
## [1] 0.9720137 -0.4007360
The model (for the log data) can be written as: (1-B)(1-0.69B)(x_t-0.012) = (1-0.972B+0.401B^2)a_t \(\sigma\)_t^2 = 0.001
Now we will look at residuals to make sure that they are white noise.
plotts.wge(resid)
title(main="Residuals")
acf(resid,lag.max=100,main="ACF of Residuals")
parzen.wge(resid)
## $freq
## [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
## [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
##
## $pzgram
## [1] -0.797004813 -0.607243200 -0.344198158 -0.063232781 0.188382589
## [6] 0.378720094 0.491494531 0.523513299 0.481787408 0.381099823
## [11] 0.241868424 0.087774592 -0.057313938 -0.173121432 -0.246654172
## [16] -0.274416037 -0.262023406 -0.221275590 -0.165987055 -0.108279277
## [21] -0.056499886 -0.015078241 0.014084469 0.029552351 0.028689360
## [26] 0.007329816 -0.038795564 -0.111192859 -0.205284085 -0.308113669
## [31] -0.398868158 -0.453706774 -0.454596339 -0.398504316 -0.301238090
## [36] -0.192849981 -0.107095065 -0.070124261 -0.091740248 -0.159921479
## [41] -0.239308598 -0.276760569 -0.217958210 -0.032276536 0.269132798
## [46] 0.637082772 1.005808966 1.314459108 1.517957072 1.588609565
## [51] 1.513852867 1.293927215 0.941005071 0.479917398 -0.050304363
## [56] -0.595941123 -1.092179747 -1.473475908 -1.689279121 -1.717433678
## [61] -1.566927949 -1.270016865 -0.871193575 -0.418870627 0.039687761
## [66] 0.461726243 0.811445715 1.061599835 1.194392265 1.202139668
## [71] 1.088221025 0.868516713 0.572791063 0.244252201 -0.065594414
## [76] -0.308704934 -0.455642963 -0.504776005 -0.478004996 -0.406270007
## [81] -0.315364003 -0.219440713 -0.122757896 -0.026233531 0.065593538
## [86] 0.140919811 0.181970578 0.170235493 0.093240274 -0.049266474
## [91] -0.241155492 -0.449545009 -0.630933666 -0.745400980 -0.774350119
## [96] -0.730371530 -0.650938653 -0.580545801 -0.552964472
They appear to be white noise, and the ACF has very few values above 0.2. We will run a Ljung-Box test to be sure.
ljung.wge(resid,K=24,p=1,q=2) # p = 0.2272667, white
## Obs 0.00435293 -0.0244108 0.01796347 0.005240047 0.02127357 -0.03888973 -0.03903064 0.05468715 0.02530515 -0.142211 0.02934427 0.04507912 -0.1378715 -0.04501699 0.006830995 0.0376424 -0.03877854 -0.09888204 -0.07092551 0.1855184 -0.08752274 0.009655451 -0.06740214 -0.01970965
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 25.47283
##
## $df
## [1] 21
##
## $pval
## [1] 0.2272667
ljung.wge(resid,K=48,p=1,q=2) # p = 0.5584003, white
## Obs 0.00435293 -0.0244108 0.01796347 0.005240047 0.02127357 -0.03888973 -0.03903064 0.05468715 0.02530515 -0.142211 0.02934427 0.04507912 -0.1378715 -0.04501699 0.006830995 0.0376424 -0.03877854 -0.09888204 -0.07092551 0.1855184 -0.08752274 0.009655451 -0.06740214 -0.01970965 0.0001776949 -0.00201369 -0.02954499 0.006651707 0.08148863 -0.1088068 -0.02321975 0.1202522 -0.05252142 0.1307315 0.03517719 0.05725037 0.03895821 -0.02151136 -0.04091389 0.08019348 -0.01208333 -0.0003453521 0.006175633 -0.01274082 0.02231108 0.05047774 0.009971913 0.01279219
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 42.96777
##
## $df
## [1] 45
##
## $pval
## [1] 0.5584003
The test gives more evidence that the residuals are white noise. We can also check if they are normally distributed with a histogram.
hist(resid)
We also want to make sure that the ACFs and Parzen Windows generated from ARIMA(1,2,2) models look similar to that of our original data.
# Multiple ACFs
set.seed(2)
sims = 10
ACF = acf(log.mhp, plot = "FALSE")
plot(ACF$lag ,ACF$acf , type = "l", lwd = 6,xlab="Lag",ylab="ACF",main="True ACF vs Generated Data")
for( i in 1: sims)
{
ACF2 = acf(gen.arima.wge(l, phi = est$phi, theta=est$theta,d=1, plot="FALSE"), plot = "FALSE")
lines(ACF2$lag ,ACF2$acf, lwd = 2, col = "red")
}
# Multiple Parzen
set.seed(3)
sims = 10
SpecDen = parzen.wge(log.mhp, plot = "FALSE")
plot(SpecDen$freq,SpecDen$pzgram, type = "l", lwd = 6,xlab="Frequency",ylab="dB",main="True Spectral Density vs Generated Data")
for( i in 1: sims)
{
SpecDen2 = parzen.wge(gen.aruma.wge(l,phi=est$phi, theta=est$theta,d=1, plot ="FALSE"), plot = "FALSE")
lines(SpecDen2$freq,SpecDen2$pzgram, lwd = 2, col = "red")
}
The ACFs don’t match up exactly, but have the same slowly dampening behavior. The Parzen Windows are very close to the original.
We will look at the short and long term forecasts of future data as well.
h.short = 4
h.long = 20
x = fed_housing_data$Median_Sales_Price
l = length(x)
d = artrans.wge(x,1)
dev.off()
## null device
## 1
est = est.arma.wge(d,p=1,q=2)
##
##
## Coefficients of AR polynomial:
## 0.6901
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.6901B 1.4490 0.6901 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## 0.9720 -0.4007
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.9720B+0.4007B^2 1.2128+-1.0122i 0.6330 0.1107
##
##
f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=FALSE,plot=FALSE)
## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254
plot(seq(xmin_plot,l+h.short,1),x[xmin_plot:(l+h.short)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="ARIMA(1,1,2) Short Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.short),1),f$f,col="red")
lines(seq((l+1),(l+h.short),1),f$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l+1),(l+h.short),1),f$ul,col="blue",lty=3)
f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=FALSE,plot=FALSE)
## y.arma 0.02334736 -0.0051414 0.06001801 0.03809985 0.03218669 0.00451468 0.02447286 0.01742964 0.05463544 -0.002047084 0.05579136 0.02677024 0.04248099 0.0143629 0.05040163 0.02675745 0.04042588 0.02504043 -0.03299592 0.01741928 0.004698521 0.01396454 0.02284943 0.006006024 0.03818379 -0.002886005 0.0171924 -0.05849621 0.04706751 -0.004319661 0.03265017 0.02346553 0.02159328 0.03283289 -0.0195701 0.02985296 0.03146893 0.003710579 -0.0136733 0.03565221 0.0179538 -0.01313452 0.04235927 0.01373019 0.04553813 0.00972455 0.0212774 0.03006966 0.05465841 0.02483413 0.0505855 -0.01354423 0 0.04445176 -0.009611258 0.03536375 0.007598179 0.009208939 0.03922071 -0.007237667 0.02313625 -0.08043711 0.03774033 -0.01242252 -0.0008336807 0.0008336807 0 -0.004175371 0.004175371 0 0.04879016 -0.00796817 0.01587335 0 0 0.02334736 0 -0.002310359 0.01757783 -0.01526747 0.0295588 -0.01429133 0.04445176 -0.007272759 0.02094696 0.0007145409 0.02886508 0.006226239 0.005502077 -0.005502077 -0.005532518 0.05399422 -0.01789905 0.02314153 0 0.02835241 0.008225292 0.002517308 0.03822907 0 -0.01278556 0.03373814 0.02399881 -0.01809212 0.05276453 -0.03698857 -0.008149056 0.09791027 -0.007980888 -0.04983237 0.06520506 -0.02180358 0.03070649 0.0005212406 0.03532489 0.06758343 0.02277579 -0.01902168 0.06921143 0.01604197 0.005148017 0.01148704 0.03000225 0.0166908 -0.005668031 -0.04441485 0.04075408 0.04774176 -0.06086746 -0.001652893 -0.014161 -0.01905626 0.005967622 -0.03811635 -0.01781784 -0.06546779 0.0582508 -0.03033321 0.02169483 0.01765151 -0.01537101 0.02074015 0.0008920607 0.01152495 0.005274738 -0.02037272 -0.01079633 0.07533485 0.001257598 0.04144183 0.01158854 0.02627087 0.03685127 -0.01238522 0.03269236 0.00583092 0.04546237 -0.02460581 0.0617544 -0.03299058 -0.0003458413 0.0229109 0.02239773 -0.008965692 0.02046952 -0.007215512 0.02310173 0.007051311 0.01615749 0.00720216 0.05286772 -0.01821762 -0.05005679 0.04734063 -0.02478328 -0.03082975 0.02989995 -0.01279468 0.02695748 0.005791816 -0.03684057 0.0334915 0.03211079 0.04729832 0.03542152 0.07185265 0.04647401 -0.001208459 0.05687623 0.0006851662 0.01044752 -0.03120951 -0.02478003 0.03958826 -0.02842027 0.008470639 -0.02924254
plot(seq(xmin_plot,l+h.long,1),x[xmin_plot:(l+h.long)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="ARIMA(1,1,2) Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),f$f,col="red")
lines(seq((l+1),(l+h.long),1),f$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l+1),(l+h.long),1),f$ul,col="blue",lty=3)
Unsurprisingly, the future forecasts didn’t increase with time. This is similar to the forecasts we used for predictions.
Exogenous Variable Forecasts
In order to use the Exogenous variables in either the MLR or Multi-Variate MLP, it is more realistic to make predictions for them first.
For error metrics, we will focus on the ASE for short and long term predictions. This is because we are specifically using these short and long term predictions in future models.
Home Ownership Rate
The Home Ownership Rate looked like either an ARMA or an ARIMA(p,1,q) might be appropriate.
x = fed_housing_data$Ownership_Rate
plotts.sample.wge(x)
## $xbar
## [1] 65.53333
##
## $autplt
## [1] 1.0000000 0.9722424 0.9483443 0.9359713 0.9199870 0.8933705 0.8751577
## [8] 0.8587103 0.8420414 0.8126124 0.7809415 0.7543250 0.7246543 0.6847273
## [15] 0.6445049 0.6104712 0.5736989 0.5329127 0.4938515 0.4583814 0.4201056
## [22] 0.3795946 0.3399360 0.3068421 0.2726943 0.2332908
##
## $freq
## [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
## [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
##
## $dbz
## [1] 12.40084982 12.05208269 11.46667385 10.63837479 9.55841432
## [6] 8.21574644 6.59803371 4.69464371 2.50495993 0.06013993
## [11] -2.52569134 -4.96980287 -6.81825314 -7.77998095 -8.04843399
## [16] -8.00240687 -7.87914699 -7.78191376 -7.76265554 -7.85818084
## [21] -8.09312523 -8.47433777 -8.98713908 -9.59570117 -10.24851659
## [26] -10.88985551 -11.47485848 -11.98084976 -12.40803253 -12.77134027
## [31] -13.09179721 -13.39253528 -13.69753110 -14.02860060 -14.39874975
## [36] -14.80330620 -15.21201130 -15.56672034 -15.79120455 -15.81670210
## [41] -15.61309625 -15.20280732 -14.64701792 -14.01978302 -13.38927072
## [46] -12.81052548 -12.32491905 -11.96174963 -11.74003104 -11.66991253
## [51] -11.75354537 -11.98525285 -12.35089436 -12.82647847 -13.37653746
## [56] -13.95369967 -14.50206148 -14.96692894 -15.30987861 -15.52161538
## [61] -15.62332516 -15.65545084 -15.66236442 -15.68149000 -15.73910254
## [66] -15.85059016 -16.02243062 -16.25427422 -16.54062535 -16.87225502
## [71] -17.23773120 -17.62538046 -18.02559859 -18.43284892 -18.84631210
## [76] -19.26840545 -19.70126488 -20.14225382 -20.58010206 -20.99328730
## [81] -21.35180613 -21.62236172 -21.77529821 -21.79055554 -21.66095243
## [86] -21.39326846 -21.00804855 -20.53758741 -20.02117993 -19.49852922
## [91] -19.00371958 -18.56160453 -18.18687293 -17.88502998 -17.65440333
## [96] -17.48858032 -17.37895990 -17.31720434 -17.29734042
d = artrans.wge(x,1)
We will start with the ARIMA model.
aic5.wge(d,p=0:10,q=0:4,type='aic') # 8/1 best
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 6 4
## Five Smallest Values of aic
## p q aic
## 8 1 -2.113107
## 8 0 -2.111537
## 9 0 -2.108892
## 8 2 -2.105050
## 8 3 -2.103671
The model with the best AIC is p = 8, q = 1.
est = est.arma.wge(d,p=8,q=1)
##
##
## Coefficients of AR polynomial:
## 0.5294 -0.2041 0.1820 0.1108 -0.2424 0.1690 -0.1057 0.2697
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.9134B 1.0948 0.9134 0.0000
## 1+0.8712B -1.1478 0.8712 0.5000
## 1-1.3170B+0.7430B^2 0.8862+-0.7486i 0.8620 0.1116
## 1-0.0293B+0.7263B^2 0.0202+-1.1733i 0.8522 0.2473
## 1+0.8591B+0.6281B^2 -0.6839+-1.0604i 0.7925 0.3412
##
##
##
##
## Coefficients of MA polynomial:
## 0.5818
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.5818B 1.7189 0.5818 0.0000
##
##
f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma 0.5 -0.3 -0.1 0.1 0 0.3 -0.1 0 -0.3 0.5 -0.1 -0.1 -0.4 0.8 0.2 -0.6 0.1 0.9 -0.4 0.1 0 0.3 -0.3 0.1 -0.3 0.3 -0.4 -0.4 0.1 0 -0.4 0.2 0 0.1 -0.4 0.2 0 0 -0.5 0 0 -0.2 -0.4 0.1 0.2 0 0.1 -0.1 0 0.4 -0.1 -0.4 0 0.3 -0.2 0.1 -0.1 0.3 -0.3 0.2 -0.3 0.3 0.1 -0.2 0 0.3 0 -0.2 -0.1 0.4 0.1 -0.2 0.2 0.3 -0.1 -0.8 0 0.3 0.1 0 0.5 0.3 0.1 0 0.3 0.2 -0.2 0 0.3 0.3 -0.3 0.2 0.1 0.8 -0.4 0.3 -0.1 0.4 -0.1 0.2 0.1 0.5 -0.2 0 0.2 0.4 -0.1 -0.2 -0.2 0.4 0.3 -0.3 0 0.4 0.2 0 0.6 -0.2 0.2 -0.1 -0.5 0.2 0.2 -0.5 0.2 0.3 -0.1 -0.5 -0.2 0 -0.4 0 0.3 -0.2 -0.4 -0.2 0.1 0.2 -0.4 -0.1 -0.2 0 -0.4 -0.1 -0.5 0.4 -0.3 -0.6 0.1 0 -0.1 -0.4 0 0.3 -0.1 -0.4 -0.1 -0.3 -0.4 -0.3 -0.3 0.3 0.1 -0.3 -0.6 0.6 0.2 -0.1 0.1 0.2 0.3 0 0.1 0.1 0.4 -0.6 -0.1 0.7 0.3 0.2 2.6 -0.5 -1.6 -0.2 -0.2 0 0.1 -0.1 0.4 0.2 -0.1 0.1 -0.1 0.1 -0.3 -0.1 0
ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 0.03083697
## [1] 0.03083697
f = fore.arima.wge(x,d=1,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma 0.5 -0.3 -0.1 0.1 0 0.3 -0.1 0 -0.3 0.5 -0.1 -0.1 -0.4 0.8 0.2 -0.6 0.1 0.9 -0.4 0.1 0 0.3 -0.3 0.1 -0.3 0.3 -0.4 -0.4 0.1 0 -0.4 0.2 0 0.1 -0.4 0.2 0 0 -0.5 0 0 -0.2 -0.4 0.1 0.2 0 0.1 -0.1 0 0.4 -0.1 -0.4 0 0.3 -0.2 0.1 -0.1 0.3 -0.3 0.2 -0.3 0.3 0.1 -0.2 0 0.3 0 -0.2 -0.1 0.4 0.1 -0.2 0.2 0.3 -0.1 -0.8 0 0.3 0.1 0 0.5 0.3 0.1 0 0.3 0.2 -0.2 0 0.3 0.3 -0.3 0.2 0.1 0.8 -0.4 0.3 -0.1 0.4 -0.1 0.2 0.1 0.5 -0.2 0 0.2 0.4 -0.1 -0.2 -0.2 0.4 0.3 -0.3 0 0.4 0.2 0 0.6 -0.2 0.2 -0.1 -0.5 0.2 0.2 -0.5 0.2 0.3 -0.1 -0.5 -0.2 0 -0.4 0 0.3 -0.2 -0.4 -0.2 0.1 0.2 -0.4 -0.1 -0.2 0 -0.4 -0.1 -0.5 0.4 -0.3 -0.6 0.1 0 -0.1 -0.4 0 0.3 -0.1 -0.4 -0.1 -0.3 -0.4 -0.3 -0.3 0.3 0.1 -0.3 -0.6 0.6 0.2 -0.1 0.1 0.2 0.3 0 0.1 0.1 0.4 -0.6 -0.1 0.7 0.3 0.2 2.6 -0.5 -1.6 -0.2 -0.2 0 0.1 -0.1 0.4 0.2 -0.1 0.1 -0.1 0.1 -0.3 -0.1 0
ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 2.077416
## [1] 2.077416
This model has a short term ASE of 0.031 and a long term ASE of 2.08.
Next will compare this to an ARMA model.
aic5.wge(x,p=0:12,q=0:2,type='aic') # 9/1 best
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 9 1 -2.144284
## 9 2 -2.134332
## 10 0 -2.123071
## 9 0 -2.122279
## 12 2 -2.116436
The model it chose had p = 9 and q = 1.
est = est.arma.wge(x,p=9,q=1)
##
##
## Coefficients of AR polynomial:
## 1.6768 -0.8806 0.4076 -0.1012 -0.3757 0.4520 -0.2987 0.3697 -0.2608
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.9534B+0.9571B^2 1.0204+-0.0589i 0.9783 0.0092
## 1-1.3559B+0.7452B^2 0.9098+-0.7171i 0.8632 0.1062
## 1+0.8617B -1.1605 0.8617 0.5000
## 1-0.0469B+0.7004B^2 0.0335+-1.1944i 0.8369 0.2455
## 1+0.8177B+0.6059B^2 -0.6748+-1.0932i 0.7784 0.3380
##
##
##
##
## Coefficients of MA polynomial:
## 0.7795
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.7795B 1.2828 0.7795 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6
ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 0.02468854
## [1] 0.02468854
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6
ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 1.108741
## [1] 1.108741
This model had better ASE values with 0.025 for short term and 1.11 for long term. This will be the predictions we use for our MLR and MLP models.
fed_housing_data_short = fed_housing_data
fed_housing_data_long = fed_housing_data
x = fed_housing_data$Ownership_Rate
est = est.arma.wge(x,p=9,q=1)
##
##
## Coefficients of AR polynomial:
## 1.6768 -0.8806 0.4076 -0.1012 -0.3757 0.4520 -0.2987 0.3697 -0.2608
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.9534B+0.9571B^2 1.0204+-0.0589i 0.9783 0.0092
## 1-1.3559B+0.7452B^2 0.9098+-0.7171i 0.8632 0.1062
## 1+0.8617B -1.1605 0.8617 0.5000
## 1-0.0469B+0.7004B^2 0.0335+-1.1944i 0.8369 0.2455
## 1+0.8177B+0.6059B^2 -0.6748+-1.0932i 0.7784 0.3380
##
##
##
##
## Coefficients of MA polynomial:
## 0.7795
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.7795B 1.2828 0.7795 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6
hor.pred.short = f$f
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6
hor.pred.long = f$f
fed_housing_data_short$Ownership_Rate[(l-h.short+1):l] = hor.pred.short
fed_housing_data_long$Ownership_Rate[(l-h.long+1):l] = hor.pred.long
Housing Units Completed
The Housing Units Completed variable looked like either an ARMA or an ARIMA(p,1,q) might be appropriate for forecasting.
x = fed_housing_data$Housing_Units_Completed
plotts.sample.wge(x)
## $xbar
## [1] 1354.924
##
## $autplt
## [1] 1.000000000 0.959798082 0.927757828 0.887546260 0.839449796
## [6] 0.785021406 0.724421795 0.663945462 0.587584415 0.521735598
## [11] 0.459327920 0.391804222 0.331038625 0.272988776 0.219171898
## [16] 0.167685853 0.124990699 0.088571013 0.054233289 0.031115266
## [21] 0.011815053 -0.006148206 -0.028654674 -0.043678086 -0.059390137
## [26] -0.072592499
##
## $freq
## [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
## [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
##
## $dbz
## [1] 11.4278852 11.2069031 10.8369187 10.3155427 9.6397210 8.8060964
## [7] 7.8116051 6.6544671 5.3358410 3.8625822 2.2517055 0.5370433
## [13] -1.2226229 -2.9385521 -4.5011052 -5.8132302 -6.8352084 -7.6004496
## [19] -8.1859722 -8.6701451 -9.1089491 -9.5309632 -9.9399897 -10.3202888
## [25] -10.6455581 -10.8926675 -11.0556563 -11.1512734 -11.2119164 -11.2716270
## [31] -11.3544483 -11.4700563 -11.6162799 -11.7857504 -11.9732807 -12.1806396
## [37] -12.4166557 -12.6930160 -13.0180937 -13.3913894 -13.8002910 -14.2201381
## [43] -14.6182596 -14.9617572 -15.2265628 -15.4031355 -15.4952227 -15.5127543
## [49] -15.4639068 -15.3512372 -15.1736940 -14.9328641 -14.6394550 -14.3159142
## [55] -13.9937507 -13.7075613 -13.4890602 -13.3631708 -13.3463799 -13.4465623
## [61] -13.6634273 -13.9890858 -14.4085938 -14.9005598 -15.4379665 -15.9892628
## [67] -16.5196245 -16.9923207 -17.3706029 -17.6211731 -17.7199788 -17.6589302
## [73] -17.4496569 -17.1210744 -16.7117425 -16.2613673 -15.8051121 -15.3715052
## [79] -14.9827543 -14.6559553 -14.4041964 -14.2371325 -14.1609597 -14.1778746
## [85] -14.2851693 -14.4742024 -14.7296668 -15.0298492 -15.3487446 -15.6604883
## [91] -15.9452324 -16.1938387 -16.4083643 -16.5974483 -16.7688868 -16.9232350
## [97] -17.0515395 -17.1384247 -17.1693705
d = artrans.wge(x,1)
However, this looks like the autocorrelations dampen even less slowly than the Home Ownership Rate, meaning that an ARIMA(p,1,q) model would be even less appropriate. Since we chose the ARMA model for Home Ownership Rate over the ARIMA model. We will just focus on the ARMA model for Housing Units Complete.
aic5.wge(x,p=0:10,q=0:1,type='aic') # 10/0 best
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 10 0 9.024872
## 9 0 9.025447
## 9 1 9.028069
## 8 0 9.028335
## 8 1 9.032021
The highest AIC value was for p = 10 and q = 0.
est = est.ar.wge(x,p=10)
##
##
## Coefficients of AR polynomial:
## 0.8984 0.1900 0.0176 -0.0870 0.0137 -0.0379 0.1724 -0.3410 0.0211 0.0961
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.7434B+0.7704B^2 1.1315+-0.1333i 0.8777 0.0187
## 1+0.4915B+0.7394B^2 -0.3323+-1.1144i 0.8599 0.2961
## 1+1.5034B+0.7169B^2 -1.0486+-0.5435i 0.8467 0.4239
## 1-0.8325B+0.6888B^2 0.6043+-1.0424i 0.8300 0.1664
## 1-0.7644B 1.3083 0.7644 0.0000
## 1+0.4469B -2.2378 0.4469 0.5000
##
##
f = fore.arima.wge(x,phi=est$phi,n.ahead=h.short,lastn=TRUE)
## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659
ase = mean((x[(l-h.short+1):l]-f$f)^2)/1e3
ase # 12.98288
## [1] 12.98288
f = fore.arima.wge(x,phi=est$phi,n.ahead=h.long,lastn=TRUE)
## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659
ase = mean((x[(l-h.long+1):l]-f$f)^2)/1e3
ase # 8.546039
## [1] 8.546039
This gives a short term ASE of 13.0k and a long term ASE of 8.5k. Playing around with some of the other output from the aic5 output showed that an ARMA(9,1) model does slightly better for long term predictions and slightly worse for short term predictions.
est = est.arma.wge(x,p=9,q=1)
##
##
## Coefficients of AR polynomial:
## 1.4066 -0.2813 -0.0540 -0.1068 0.0604 -0.0496 0.1985 -0.4289 0.2277
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.7319B+0.7686B^2 1.1267+-0.1778i 0.8767 0.0249
## 1-0.8710B 1.1481 0.8710 0.0000
## 1+1.5502B+0.7380B^2 -1.0502+-0.5020i 0.8591 0.4290
## 1+0.5020B+0.6999B^2 -0.3586+-1.1402i 0.8366 0.2985
## 1-0.8558B+0.6584B^2 0.6499+-1.0471i 0.8114 0.1616
##
##
##
##
## Coefficients of MA polynomial:
## 0.5043
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.5043B 1.9829 0.5043 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659
ase = mean((x[(l-h.short+1):l]-f$f)^2)/1e3
ase # 13.44075
## [1] 13.44075
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659
ase = mean((x[(l-h.long+1):l]-f$f)^2)/1e3
ase # 8.516217
## [1] 8.516217
We will proceed with the ARMA(9,1) forecast for our future models that use exogenous variables.
x = fed_housing_data$Housing_Units_Completed
est = est.arma.wge(x,p=9,q=1)
##
##
## Coefficients of AR polynomial:
## 1.4066 -0.2813 -0.0540 -0.1068 0.0604 -0.0496 0.1985 -0.4289 0.2277
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.7319B+0.7686B^2 1.1267+-0.1778i 0.8767 0.0249
## 1-0.8710B 1.1481 0.8710 0.0000
## 1+1.5502B+0.7380B^2 -1.0502+-0.5020i 0.8591 0.4290
## 1+0.5020B+0.6999B^2 -0.3586+-1.1402i 0.8366 0.2985
## 1-0.8558B+0.6584B^2 0.6499+-1.0471i 0.8114 0.1616
##
##
##
##
## Coefficients of MA polynomial:
## 0.5043
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.5043B 1.9829 0.5043 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659
huc.pred.short = f$f
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659
huc.pred.long = f$f
fed_housing_data_short$Housing_Units_Completed[(l-h.short+1):l] = huc.pred.short
fed_housing_data_long$Housing_Units_Completed[(l-h.long+1):l] = huc.pred.long
Supply of New Houses
Similar to Housing Units Completed, an ARMA model seems like it would be most appropriate for Supply of New Houses.
x = fed_housing_data$Supply_New_Houses
plotts.sample.wge(x)
## $xbar
## [1] 6.167172
##
## $autplt
## [1] 1.000000000 0.840571774 0.745101265 0.663968698 0.563328441
## [6] 0.489741871 0.419594057 0.325865642 0.257517222 0.230333020
## [11] 0.176617503 0.124718033 0.057267731 0.015708696 0.002656971
## [16] -0.031243001 -0.045540921 -0.054571142 -0.086637135 -0.094349258
## [21] -0.090844434 -0.085823390 -0.112309986 -0.115799497 -0.138576373
## [26] -0.143176298
##
## $freq
## [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
## [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
##
## $dbz
## [1] 9.5891631 9.4770320 9.2843191 9.0037218 8.6275394 8.1495676
## [7] 7.5669926 6.8822183 6.1045806 5.2517494 4.3502668 3.4342686
## [13] 2.5414258 1.7061818 0.9524166 0.2890741 -0.2890919 -0.7969233
## [19] -1.2536889 -1.6795409 -2.0935880 -2.5125122 -2.9490091 -3.4100709
## [25] -3.8954671 -4.3966342 -4.8959536 -5.3665345 -5.7734357 -6.0784044
## [31] -6.2499149 -6.2763837 -6.1748891 -5.9881487 -5.7713034 -5.5772650
## [37] -5.4474826 -5.4089954 -5.4753662 -5.6491352 -5.9245893 -6.2904934
## [43] -6.7326928 -7.2362671 -7.7864647 -8.3673321 -8.9572359 -9.5218201
## [49] -10.0079681 -10.3467888 -10.4743998 -10.3657394 -10.0546383 -9.6189296
## [55] -9.1454707 -8.7037197 -8.3370423 -8.0646735 -7.8865272 -7.7876591
## [61] -7.7424992 -7.7201909 -7.6918762 -7.6388136 -7.5581028 -7.4629285
## [67] -7.3773173 -7.3285781 -7.3409229 -7.4317827 -7.6103898 -7.8774389
## [73] -8.2247650 -8.6344430 -9.0774007 -9.5127345 -9.8903916 -10.1604198
## [79] -10.2886844 -10.2715343 -10.1383759 -9.9394841 -9.7281415 -9.5473781
## [85] -9.4243977 -9.3704536 -9.3833644 -9.4512929 -9.5574867 -9.6856568
## [91] -9.8248218 -9.9717173 -10.1293502 -10.3020043 -10.4887435 -10.6782054
## [97] -10.8474029 -10.9665648 -11.0097488
d = artrans.wge(x,1)
aic5.wge(x,p=0:4,q=0:1,type='aic') # 1/0 best
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 1 0 -0.1975496
## 1 1 -0.1970961
## 2 0 -0.1970576
## 3 1 -0.1966637
## 4 0 -0.1912306
The highest AIC value was for p = 1 and q = 0.
est = est.ar.wge(x,p=1)
##
##
## Coefficients of AR polynomial:
## 0.8594
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.8594B 1.1636 0.8594 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,n.ahead=h.short,lastn=TRUE)
## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6
ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 0.6951032
## [1] 0.6951032
f = fore.arima.wge(x,phi=est$phi,n.ahead=h.long,lastn=TRUE)
## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6
ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 3.865913
## [1] 3.865913
This gives a short term ASE of 13.0k and a long term ASE of 8.5k. Playing around with some of the other output from the aic5 output showed that an ARMA(9,1) model does slightly better for long term predictions and slightly worse for short term predictions.
est = est.arma.wge(x,p=1,q=1)
##
##
## Coefficients of AR polynomial:
## 0.8930
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.8930B 1.1198 0.8930 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## 0.1287
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.1287B 7.7707 0.1287 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6
ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 0.4791473
## [1] 0.4791473
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6
ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 3.901133
## [1] 3.901133
The short term ASE for the ARMA(1,1) is significantly better, but the long term ASE is a little worse. We will proceed with the ARMA(1,1) model.
x = fed_housing_data$Supply_New_Houses
est = est.arma.wge(x,p=1,q=1)
##
##
## Coefficients of AR polynomial:
## 0.8930
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.8930B 1.1198 0.8930 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## 0.1287
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.1287B 7.7707 0.1287 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6
snh.pred.short = f$f
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6
snh.pred.long = f$f
fed_housing_data_short$Supply_New_Houses[(l-h.short+1):l] = snh.pred.short
fed_housing_data_long$Supply_New_Houses[(l-h.long+1):l] = snh.pred.long
House price index
x = fed_housing_data$Housing_Price_Index
plotts.sample.wge(x)
## $xbar
## [1] 260.0757
##
## $autplt
## [1] 1.0000000 0.9729202 0.9462056 0.9191307 0.8909986 0.8628031 0.8357103
## [8] 0.8082155 0.7793027 0.7505719 0.7251203 0.7011544 0.6785046 0.6582523
## [15] 0.6404179 0.6234247 0.6071393 0.5914399 0.5759915 0.5608527 0.5458586
## [22] 0.5311570 0.5169824 0.5031950 0.4892727 0.4755910
##
## $freq
## [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
## [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
##
## $dbz
## [1] 12.2782408 11.9222965 11.3282016 10.4956796 9.4267581 8.1292098
## [7] 6.6225197 4.9470123 3.1742366 1.4094245 -0.2313272 -1.6725572
## [13] -2.9162225 -4.0138553 -5.0001956 -5.8567808 -6.5364365 -7.0250111
## [19] -7.3764536 -7.6840015 -8.0258094 -8.4339996 -8.8942563 -9.3642704
## [25] -9.8011060 -10.1848714 -10.5238940 -10.8395701 -11.1462611 -11.4424491
## [31] -11.7173103 -11.9654010 -12.1970206 -12.4359357 -12.7070093 -13.0231920
## [37] -13.3793254 -13.7548915 -14.1237551 -14.4655588 -14.7716121 -15.0415198
## [43] -15.2744035 -15.4630500 -15.5964283 -15.6686425 -15.6865151 -15.6690104
## [49] -15.6391463 -15.6150166 -15.6056625 -15.6128189 -15.6358467 -15.6760504
## [55] -15.7377682 -15.8261407 -15.9436261 -16.0877799 -16.2516731 -16.4265637
## [61] -16.6050338 -16.7825128 -16.9562814 -17.1229779 -17.2768337 -17.4103793
## [67] -17.5174995 -17.5967749 -17.6525899 -17.6931056 -17.7265509 -17.7583210
## [73] -17.7904616 -17.8233217 -17.8578059 -17.8964986 -17.9429344 -17.9996839
## [79] -18.0666917 -18.1410233 -18.2181651 -18.2939853 -18.3660345 -18.4333635
## [85] -18.4951505 -18.5493235 -18.5923666 -18.6206174 -18.6322323 -18.6284354
## [91] -18.6131454 -18.5912560 -18.5667599 -18.5418862 -18.5175970 -18.4948317
## [97] -18.4754511 -18.4621442 -18.4573749
d = artrans.wge(x,1)
d2 = artrans.wge(d,1)
dev.off()
## null device
## 1
parzen.wge(d2)
## $freq
## [1] 0.005102041 0.010204082 0.015306122 0.020408163 0.025510204 0.030612245
## [7] 0.035714286 0.040816327 0.045918367 0.051020408 0.056122449 0.061224490
## [13] 0.066326531 0.071428571 0.076530612 0.081632653 0.086734694 0.091836735
## [19] 0.096938776 0.102040816 0.107142857 0.112244898 0.117346939 0.122448980
## [25] 0.127551020 0.132653061 0.137755102 0.142857143 0.147959184 0.153061224
## [31] 0.158163265 0.163265306 0.168367347 0.173469388 0.178571429 0.183673469
## [37] 0.188775510 0.193877551 0.198979592 0.204081633 0.209183673 0.214285714
## [43] 0.219387755 0.224489796 0.229591837 0.234693878 0.239795918 0.244897959
## [49] 0.250000000 0.255102041 0.260204082 0.265306122 0.270408163 0.275510204
## [55] 0.280612245 0.285714286 0.290816327 0.295918367 0.301020408 0.306122449
## [61] 0.311224490 0.316326531 0.321428571 0.326530612 0.331632653 0.336734694
## [67] 0.341836735 0.346938776 0.352040816 0.357142857 0.362244898 0.367346939
## [73] 0.372448980 0.377551020 0.382653061 0.387755102 0.392857143 0.397959184
## [79] 0.403061224 0.408163265 0.413265306 0.418367347 0.423469388 0.428571429
## [85] 0.433673469 0.438775510 0.443877551 0.448979592 0.454081633 0.459183673
## [91] 0.464285714 0.469387755 0.474489796 0.479591837 0.484693878 0.489795918
## [97] 0.494897959 0.500000000
##
## $pzgram
## [1] -10.34279517 -9.98707527 -9.44865876 -8.78719239 -8.05897430
## [6] -7.31041968 -6.57762865 -5.88826262 -5.26349808 -4.71940306
## [11] -4.26780579 -3.91686547 -3.67148234 -3.53359444 -3.50235713
## [16] -3.57418301 -3.74261345 -3.99798412 -4.32683216 -4.71099290
## [21] -5.12641206 -5.54197027 -5.91917652 -6.21425948 -6.38412736
## [26] -6.39564482 -6.23410223 -5.90544523 -5.43096931 -4.83900922
## [31] -4.15911471 -3.42028916 -2.65119399 -1.87971474 -1.13094464
## [36] -0.42426542 0.22929907 0.82857798 1.38205740 1.90556547
## [41] 2.41764521 2.93328115 3.45799834 3.98499129 4.49667567
## [46] 4.96958060 5.37998631 5.70818229 5.94065817 6.07059152
## [51] 6.09727492 6.02494653 5.86123982 5.61535535 5.29612909
## [56] 4.91036143 4.46192749 3.95214761 3.38160525 2.75317137
## [61] 2.07556803 1.36636784 0.65279634 -0.03160561 -0.65535551
## [66] -1.19994093 -1.66703125 -2.07665971 -2.45748803 -2.83554252
## [71] -3.22693514 -3.63596757 -4.05714444 -4.47880516 -4.88624518
## [76] -5.26287221 -5.58931279 -5.84223501 -5.99588123 -6.02835672
## [81] -5.93096809 -5.71484147 -5.40960546 -5.05481896 -4.68972712
## [86] -4.34617281 -4.04576320 -3.79996389 -3.61152832 -3.47636384
## [91] -3.38558547 -3.32778303 -3.29144017 -3.26715430 -3.24907710
## [96] -3.23509357 -3.22573312 -3.22238869
d3 = artrans.wge(d2,c(0,-1))
dev.off()
## null device
## 1
acf(d3,lag.max=100)
This plot is more similar to the variable of interest, Medium Sale Price. This means that a linear signal plus noise model could be useful.
However, unlike the other variables, it looked like a single difference wasn’t white noise. Even differencing out the (1-B)^2 still had a 0.25 frequency. Differencing out that frequency as well looked more like white noise. So we will look at a signal plus noise model and a (1-B)^2*(1+B^2) model.
f = fore.sigplusnoise.wge(x,max.p=0,n.ahead=h.short,lastn=TRUE)
aic5.ar.wge(f$resid,p=0:20) # p = 9
## ---------WORKING... PLEASE WAIT...
##
##
## Error in aic calculation at 1
## Error in aic calculation at 2
## Error in aic calculation at 3
## Error in aic calculation at 12
## Error in aic calculation at 14
## Error in aic calculation at 17
## Error in aic calculation at 18
## Error in aic calculation at 19
## Error in aic calculation at 20
## Five Smallest Values of aic
## Method= mle
## p aic
## 9 2.299272
## 6 2.299300
## 7 2.305336
## 10 2.305733
## 13 2.311372
The model for the noise with the highest AIC value is p = 9.
f = fore.sigplusnoise.wge(x,max.p=9,n.ahead=h.short,lastn=TRUE)
##
##
## Coefficients of AR polynomial:
## 1.8014 -1.2205 0.8755 -0.2603 -0.5252 0.4232 -0.2115 0.2703 -0.1651
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.9514B+0.9573B^2 1.0192+-0.0764i 0.9784 0.0119
## 1+0.0202B+0.8230B^2 -0.0123+-1.1022i 0.9072 0.2518
## 1+0.7906B -1.2649 0.7906 0.5000
## 1-1.2751B+0.5746B^2 1.1097+-0.7136i 0.7580 0.0910
## 1+0.6143B+0.4614B^2 -0.6656+-1.3130i 0.6793 0.3247
##
##
ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 161.2832
## [1] 161.2832
f = fore.sigplusnoise.wge(x,max.p=9,n.ahead=h.long,lastn=TRUE)
##
##
## Coefficients of AR polynomial:
## 1.8014 -1.2205 0.8755 -0.2603 -0.5252 0.4232 -0.2115 0.2703 -0.1651
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.9514B+0.9573B^2 1.0192+-0.0764i 0.9784 0.0119
## 1+0.0202B+0.8230B^2 -0.0123+-1.1022i 0.9072 0.2518
## 1+0.7906B -1.2649 0.7906 0.5000
## 1-1.2751B+0.5746B^2 1.1097+-0.7136i 0.7580 0.0910
## 1+0.6143B+0.4614B^2 -0.6656+-1.3130i 0.6793 0.3247
##
##
ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 10014.69
## [1] 10014.69
The short term ASE is 161 and the long term ASE is 10,014.
aic5.wge(d3,p=0:8,q=0:6,type='aic') # 4/5 best
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 4 5 2.318766
## 3 5 2.318801
## 5 5 2.331189
## 6 5 2.335669
## 2 5 2.343238
It looks like the model with p = 4 and q = 5 had the best AIC.
est = est.arma.wge(d3,p=4,q=5)
##
##
## Coefficients of AR polynomial:
## -0.2653 0.1543 0.4275 0.2777
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.8773B 1.1399 0.8773 0.0000
## 1+0.5373B+0.5229B^2 -0.5138+-1.2839i 0.7231 0.3106
## 1+0.6053B -1.6520 0.6053 0.5000
##
##
##
##
## Coefficients of MA polynomial:
## -0.0912 -0.2272 0.4182 0.4609 0.4392
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-1.0000B 1.0000 1.0000 0.0000
## 1-0.0322B+0.8167B^2 0.0197+-1.1064i 0.9037 0.2472
## 1+1.1234B+0.5378B^2 -1.0444+-0.8767i 0.7334 0.3889
##
##
m = mult.wge(fac1=est$phi,fac2=c(0,-1))
factor.wge(m$model.coef) # looks good
##
##
## Coefficients of AR polynomial:
## -0.2653 -0.8457 0.1622 0.4320 0.4275 0.2777
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+0.0000B+1.0000B^2 0.0000+-1.0000i 1.0000 0.2500
## 1-0.8773B 1.1399 0.8773 0.0000
## 1+0.5373B+0.5229B^2 -0.5138+-1.2839i 0.7231 0.3106
## 1+0.6053B -1.6520 0.6053 0.5000
##
##
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31
ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 16.01932
## [1] 16.01932
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31
ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 2109.469
## [1] 2109.469
This model did significantly better with a short term ASE of 16 and a long term ASE of 2,109. Testing out some of the other values from aic5, found that p = 6 and q = 5 did even better.
est = est.arma.wge(d3,p=6,q=5)
##
##
## Coefficients of AR polynomial:
## -0.8116 0.2231 0.5831 0.5250 0.1352 -0.2199
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.7230B+0.8346B^2 -1.0323+-0.3642i 0.9136 0.4460
## 1-0.8558B 1.1685 0.8558 0.0000
## 1+0.4123B+0.6580B^2 -0.3132+-1.1923i 0.8112 0.2909
## 1-0.4679B 2.1373 0.4679 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## -0.6419 -0.0609 0.3220 0.7473 0.6335
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-1.0000B 1.0000 1.0000 0.0000
## 1+0.0687B+0.8449B^2 -0.0407+-1.0872i 0.9192 0.2560
## 1+1.5732B+0.7497B^2 -1.0492+-0.4827i 0.8659 0.4314
##
##
m = mult.wge(fac1=est$phi,fac2=c(0,-1))
factor.wge(m$model.coef) # looks good
##
##
## Coefficients of AR polynomial:
## -0.8116 -0.7769 -0.2285 0.7481 0.7183 0.3051 0.1352 -0.2199
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+0.0000B+1.0000B^2 0.0000+-1.0000i 1.0000 0.2500
## 1+1.7230B+0.8346B^2 -1.0323+-0.3642i 0.9136 0.4460
## 1-0.8558B 1.1685 0.8558 0.0000
## 1+0.4123B+0.6580B^2 -0.3132+-1.1923i 0.8112 0.2909
## 1-0.4679B 2.1373 0.4679 0.0000
##
##
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31
ase = mean((x[(l-h.short+1):l]-f$f)^2)
ase # 14.86637
## [1] 14.86637
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31
ase = mean((x[(l-h.long+1):l]-f$f)^2)
ase # 558.2356
## [1] 558.2356
This model had a short term ASE of 15 and a long term ASE of 558. This is the model we will use to get the forecasts.
x = fed_housing_data$Housing_Price_Index
d = artrans.wge(x,1)
d2 = artrans.wge(d,1)
d3 = artrans.wge(d2,c(0,-1))
est = est.arma.wge(d3,p=6,q=5)
##
##
## Coefficients of AR polynomial:
## -0.8116 0.2231 0.5831 0.5250 0.1352 -0.2199
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.7230B+0.8346B^2 -1.0323+-0.3642i 0.9136 0.4460
## 1-0.8558B 1.1685 0.8558 0.0000
## 1+0.4123B+0.6580B^2 -0.3132+-1.1923i 0.8112 0.2909
## 1-0.4679B 2.1373 0.4679 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## -0.6419 -0.0609 0.3220 0.7473 0.6335
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-1.0000B 1.0000 1.0000 0.0000
## 1+0.0687B+0.8449B^2 -0.0407+-1.0872i 0.9192 0.2560
## 1+1.5732B+0.7497B^2 -1.0492+-0.4827i 0.8659 0.4314
##
##
m = mult.wge(fac1=est$phi,fac2=c(0,-1))
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.short,lastn=TRUE)
## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31
hpi.pred.short = f$f
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.long,lastn=TRUE)
## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31
hpi.pred.long = f$f
fed_housing_data_short$Housing_Price_Index[(l-h.short+1):l] = hpi.pred.short
fed_housing_data_long$Housing_Price_Index[(l-h.long+1):l] = hpi.pred.long
Signal Plus Noise Model
Our domain knowledge tells us that the positive trend of Median House Price is not random and does have an underlying regression line, but we can run a few tests for linear trend to be sure. We will test for linear trend using simple linear regression with a standard t-test, the Cochran Orcutt Test for Linear Trend, and the WBG Test (Woodward, Bottone, and Gray, 1997). All 3 tests have a p-value below 0.05, rejecting the null hypothesis. The residuals are not white noise yet, so we will fit an ARMA model along with the linear signal.
Test for linear Trend
# Testing for Linear Trend
t = 1:168
reg = slr.wge(train$Median_Sales_Price)
summary(reg)
## Length Class Mode
## res 168 -none- numeric
## b0hat 1 -none- numeric
## b1hat 1 -none- numeric
## pvalue 1 -none- numeric
## tstatistic 1 -none- numeric
# Cochran Orcutt Test for Linear Trend
co_test_msp = co.wge(train$Median_Sales_Price)
co_test_msp$pvalue
## [1] 4.411261e-15
# WBG Test
wbg.boot.wge(train$Median_Sales_Price)
## $p
## [1] 5
##
## $phi
## [1] 0.77778625 0.44429145 0.12173280 -0.08786636 -0.25901843
##
## $pv
## [1] 0.04010025
# Linear model according to Simple Linear Regression (ignoring correlated errors)
plotts.wge(train$Median_Sales_Price)
fit = reg$b0hat + t*reg$b1hat
points(fit, type = "l")
# Examine Residuals
resid = train$Median_Sales_Price - fit
plot(resid, type = "p")
abline(h=0)
# Quicker way to plot residuals: plot(reg$res, type = "l")
x = fed_housing_data
reg = slr.wge(x$Median_Sales_Price)
plot(reg$res, type = "l")
abline(h=0)
t=1:198
plotts.wge(x$Median_Sales_Price,xlab="Time",ylab="log Median House Price",main="Linear fit to log Median House Price")
fit = reg$b0hat + t*reg$b1hat
points(fit, type = "l")
aic5.ar.wge(reg$res,p=0:10)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## Method= mle
## p aic
## 6 -7.203842
## 5 -7.198616
## 7 -7.194018
## 8 -7.188880
## 4 -7.180527
Choosing estimates for the ARMA portion of the Signal Plus Noise Model
After comparing the MLE and Burg estimates on AIC, AICC, and BIC, we determined the MLE estimates were better for all three metrics. All estimates chose an AR(6), with the exception of the MLE estimate using the BIC, which chose an AR(2). We will move forward with the AR(6) model, compare the forecasts visually, and test whether the Average Square Error (ASE) is better for the MLE or the Burg estimates. The MLE estimates have better ASEs, as expected. While there isn’t much difference visually between the two, the MLE lower limit (blue line) does contain the actual values, whereas the Burg lower limit (green line) are slightly more narrow and doesn’t capture one of the actual values. Given the other evidence for using the MLE estimates, we will move forward with MLE estimates for an AR(6).
# Fit signal plus noise models
# MLE Estimates with AIC
spn.mle.aic = aic.ar.wge(reg$res, type = 'aic', p = 0:7) # ar(6), aic -7.269686
# MLE Estimates with AICC
spn.mle.aicc = aic.ar.wge(reg$res, type = 'aicc', p = 0:7) # ar(6), aicc -6.25239
# MLE Estimates with BIC
spn.mle.bic = aic.ar.wge(reg$res, type = 'bic', p = 0:7) # ar(2), bic -7.147245
# Burg Estimates with AIC
spn.b.aic = aic.burg.wge(reg$res, p = 1:7, type = 'aic') # ar(6), aic -7.278742
# Burg Estimates with AICC
spn.b.aicc = aic.burg.wge(reg$res, p = 1:7, type = 'aicc') # ar(6), aicc -6.261446
# Burg Estimates with BIC
spn.b.bic = aic.burg.wge(reg$res, p = 1:7, type = 'bic') # ar(6), bic -7.157681
# MLE estimates are better for all types: aic, aicc, bic - see comparison table
# Fit with MLE estimates, train/test set
fit.mle.sig = fore.sigplusnoise.wge(train$Median_Sales_Price, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 30)
##
##
## Coefficients of AR polynomial:
## 0.6593 0.3962 0.1492 0.0073 -0.0863 -0.1550
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.9202B 1.0867 0.9202 0.0000
## 1-0.9024B 1.1082 0.9024 0.0000
## 1+0.0258B+0.4419B^2 -0.0292+-1.5041i 0.6647 0.2531
## 1+1.1375B+0.4224B^2 -1.3465+-0.7446i 0.6499 0.4196
##
##
# Examine residuals
plot(fit.mle.sig$resid)
# ASE with MLE estimates
ASE = mean((test$Median_Sales_Price - fit.mle.sig$f)^2)
ASEexp = mean((exp(test$Median_Sales_Price) - exp(fit.mle.sig$f))^2) # 826.5M
# Fit with burg estimates, train/test set
fit.b.sig = fore.sigplusnoise.wge(train$Median_Sales_Price, linear = TRUE, method = 'burg', freq = 0, max.p = 6, n.ahead = 30)
##
##
## Coefficients of AR polynomial:
## 0.6479 0.3963 0.1454 0.0081 -0.0849 -0.1570
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.8164B+0.8282B^2 1.0967+-0.0695i 0.9100 0.0101
## 1+0.0238B+0.4439B^2 -0.0268+-1.5007i 0.6663 0.2528
## 1+1.1448B+0.4270B^2 -1.3404+-0.7383i 0.6535 0.4199
##
##
# Examine residuals
plot(fit.b.sig$resid)
# ASE with Burg estimates
ASE.b = mean((test$Median_Sales_Price - fit.b.sig$f)^2)
ASE.b.exp = mean((exp(test$Median_Sales_Price) - exp(fit.b.sig$f))^2) # 1.11B
# Create table for comparison
labels = c("aic", "aicc", "bic", "ase for AR(6)")
mle.ests = c(spn.mle.aic$value, spn.mle.aicc$value, spn.mle.bic$value, ASE)
burg.ests = c(spn.b.aic$value, spn.b.aicc$value, spn.b.bic$value, ASE.b)
comparison = data.frame(labels,mle.ests,burg.ests)
comparison
## labels mle.ests burg.ests
## 1 aic -7.2038419 -7.212180512
## 2 aicc -6.1898929 -6.198231498
## 3 bic -7.0989717 -7.108212533
## 4 ase for AR(6) 0.0061569 0.008076531
# Different Plot
log.mhp = fed_housing_data$Median_Sales_Price
plot(log.mhp, type = 'l')
lines(seq(169,198,1), fit.mle.sig$f, col = "red")
lines(seq(169,198,1), fit.mle.sig$ll, col = "blue")
lines(seq(169,198,1), fit.mle.sig$ul, col = "blue")
lines(seq(169,198,1), fit.b.sig$f, col = "orange")
lines(seq(169,198,1), fit.b.sig$ll, col = "green")
lines(seq(169,198,1), fit.b.sig$ul, col = "green")
Signal Plus Noise using 1 Yr Horizon
The 1 Year Horizon backcast (for the past year) is pretty close to the actual values, with an ASE (on the non-lagged values) around 50M. The residuals appear to be white noise when looking at both the plot and the ACF, indicating this model has explained most of the noise of the data.
# Making sure log.mhp is the correct data
log.mhp = fed_housing_data$Median_Sales_Price
# Fit with MLE estimates, using all data
fit.mle.sig_h4 = fore.sigplusnoise.wge(log.mhp, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 4, lastn = TRUE)
##
##
## Coefficients of AR polynomial:
## 0.7158 0.3673 0.1374 -0.0619 -0.0761 -0.1179
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.8131B+0.8234B^2 1.1010+-0.0480i 0.9074 0.0069
## 1+1.0575B+0.3859B^2 -1.3702+-0.8449i 0.6212 0.4121
## 1+0.0398B+0.3709B^2 -0.0537+-1.6410i 0.6090 0.2552
##
##
# Different Plot
plot(log.mhp, type = 'l')
lines(seq(195,198,1), fit.mle.sig_h4$f, col = "red")
lines(seq(195,198,1), fit.mle.sig_h4$ll, lty = 3, col = "blue")
lines(seq(195,198,1), fit.mle.sig_h4$ul, lty = 3, col = "blue")
# Zoomed In Plot
plot(seq(150,198,1),log.mhp[150:198], type = 'l', ylim = c(12.3, 13.2), main = "Linear Signal with AR(6) Noise Short Term Forecast",
xlab = "Time", ylab = "log Median Housing Sales Price")
lines(seq(195,198,1), fit.mle.sig_h4$f, col = "red")
lines(seq(195,198,1), fit.mle.sig_h4$ll, lty = 3, col = "blue")
lines(seq(195,198,1), fit.mle.sig_h4$ul, lty = 3, col = "blue")
# ASE with MLE estimates
ASE.h4 = mean((log.mhp[195:198] - fit.mle.sig_h4$f)^2)
ASEexp.h4 = mean((exp(log.mhp[195:198]) - exp(fit.mle.sig_h4$f))^2)
ASEexp.h4 # 50.92M
## [1] 50917914
# Examine residuals
plot(fit.mle.sig_h4$resid)
plotts.sample.wge(fit.mle.sig_h4$resid)
## $xbar
## [1] 0.0005583755
##
## $autplt
## [1] 1.000000000 -0.030928685 -0.019285044 -0.016430741 -0.018749777
## [6] -0.011967976 -0.020841820 -0.072191596 0.032269287 0.021997333
## [11] -0.139312067 0.034046387 0.050705330 -0.130392909 -0.017846377
## [16] 0.013239634 0.049386836 -0.032977920 -0.073065205 -0.066452453
## [21] 0.207698279 -0.082585175 0.031084812 -0.062395719 -0.017646344
## [26] -0.005338563
##
## $freq
## [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
## [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
##
## $dbz
## [1] -2.2164539108 -1.9849206395 -1.6524595119 -1.2753123609 -0.9026883503
## [6] -0.5697330186 -0.2973356993 -0.0947071844 0.0378192323 0.1066356847
## [11] 0.1232846159 0.1030012479 0.0628682718 0.0193305301 -0.0146466493
## [16] -0.0318985733 -0.0318047089 -0.0189600869 -0.0003052964 0.0179951969
## [21] 0.0326096059 0.0432898408 0.0513817478 0.0574210279 0.0591293252
## [26] 0.0510549508 0.0263798048 -0.0196075733 -0.0856570020 -0.1619594895
## [31] -0.2305057758 -0.2696686264 -0.2626013516 -0.2060711169 -0.1144768534
## [36] -0.0160724930 0.0565573644 0.0770660068 0.0337627161 -0.0648105292
## [41] -0.1875268450 -0.2840930541 -0.2984854839 -0.1921574926 0.0349986465
## [46] 0.3425768179 0.6678254499 0.9482788033 1.1354639949 1.1985632117
## [51] 1.1230390343 0.9085430614 0.5680860846 0.1287458457 -0.3670491750
## [56] -0.8619714586 -1.2893954797 -1.5863119898 -1.7101006697 -1.6495832635
## [61] -1.4233038891 -1.0686030283 -0.6305291543 -0.1551810623 0.3136053611
## [66] 0.7364264052 1.0801353372 1.3192114335 1.4366475909 1.4246924028
## [71] 1.2859157508 1.0348495407 0.6998155913 0.3233817577 -0.0414560620
## [76] -0.3419837227 -0.5414473463 -0.6307151786 -0.6265856294 -0.5579564343
## [81] -0.4506893185 -0.3202405118 -0.1733955011 -0.0151343742 0.1447352341
## [86] 0.2892056076 0.3969424807 0.4485239121 0.4332923725 0.3548702321
## [91] 0.2340288625 0.1070212601 0.0168377124 -0.0026877158 0.0568993323
## [96] 0.1734383141 0.3043620452 0.4046463365 0.4419284082
acf(fit.mle.sig_h4$resid)
ljung.wge(fit.mle.sig_h4$resid, p = 6, q = 0, K = 48)
## Obs -0.03092868 -0.01928504 -0.01643074 -0.01874978 -0.01196798 -0.02084182 -0.0721916 0.03226929 0.02199733 -0.1393121 0.03404639 0.05070533 -0.1303929 -0.01784638 0.01323963 0.04938684 -0.03297792 -0.07306521 -0.06645245 0.2076983 -0.08258517 0.03108481 -0.06239572 -0.01764634 -0.005338563 0.01017477 -0.03592154 0.01086301 0.07515347 -0.1090446 -0.02897116 0.1183135 -0.06710514 0.1362477 0.02578356 0.04932395 0.02279133 -0.02142985 -0.0590818 0.079141 -0.02309291 2.742465e-05 -0.0007788745 -0.01091636 0.02298131 0.07171739 0.008512692 0.03126207
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 44.69122
##
## $df
## [1] 42
##
## $pval
## [1] 0.3594121
Signal Plus Noise using 5 Yr Horizon
The 5 Year Horizon backcast (for the past 5 years) is pretty close to the actual values, with an ASE (on the non-lagged values) around 1.1B. The residuals appear to be white noise when looking at both the plot and the ACF, indicating this model has explained most of the noise of the data.
# Making sure log.mhp is the correct data
log.mhp = fed_housing_data$Median_Sales_Price
# Fit with MLE estimates, using all data
fit.mle.sig_h20 = fore.sigplusnoise.wge(log.mhp, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 20, lastn = TRUE)
##
##
## Coefficients of AR polynomial:
## 0.7158 0.3673 0.1374 -0.0619 -0.0761 -0.1179
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.8131B+0.8234B^2 1.1010+-0.0480i 0.9074 0.0069
## 1+1.0575B+0.3859B^2 -1.3702+-0.8449i 0.6212 0.4121
## 1+0.0398B+0.3709B^2 -0.0537+-1.6410i 0.6090 0.2552
##
##
# Different Plot
plot(log.mhp, type = 'l')
lines(seq(179,198,1), fit.mle.sig_h20$f, col = "red")
lines(seq(179,198,1), fit.mle.sig_h20$ll, lty = 3, col = "blue")
lines(seq(179,198,1), fit.mle.sig_h20$ul, lty = 3, col = "blue")
# Zoomed In
plot(seq(150,198,1),log.mhp[150:198], type = 'l', ylim = c(12.3, 13.2), main = "Linear Signal with AR(6) Noise Long Term Forecast",
xlab = "Time", ylab = "log Median Housing Sales Price")
lines(seq(179,198,1), fit.mle.sig_h20$f, col = "red")
lines(seq(179,198,1), fit.mle.sig_h20$ll, lty = 3, col = "blue")
lines(seq(179,198,1), fit.mle.sig_h20$ul, lty = 3, col = "blue")
# ASE with MLE estimates
ASE.h20 = mean((log.mhp[179:198] - fit.mle.sig_h20$f)^2)
ASEexp.h20 = mean((exp(log.mhp[179:198]) - exp(fit.mle.sig_h20$f))^2)
ASEexp.h20 # 1.1B
## [1] 1104897278
# Examine residuals
plot(fit.mle.sig_h20$resid)
plotts.sample.wge(fit.mle.sig_h20$resid)
## $xbar
## [1] 0.0005583755
##
## $autplt
## [1] 1.000000000 -0.030928685 -0.019285044 -0.016430741 -0.018749777
## [6] -0.011967976 -0.020841820 -0.072191596 0.032269287 0.021997333
## [11] -0.139312067 0.034046387 0.050705330 -0.130392909 -0.017846377
## [16] 0.013239634 0.049386836 -0.032977920 -0.073065205 -0.066452453
## [21] 0.207698279 -0.082585175 0.031084812 -0.062395719 -0.017646344
## [26] -0.005338563
##
## $freq
## [1] 0.005050505 0.010101010 0.015151515 0.020202020 0.025252525 0.030303030
## [7] 0.035353535 0.040404040 0.045454545 0.050505051 0.055555556 0.060606061
## [13] 0.065656566 0.070707071 0.075757576 0.080808081 0.085858586 0.090909091
## [19] 0.095959596 0.101010101 0.106060606 0.111111111 0.116161616 0.121212121
## [25] 0.126262626 0.131313131 0.136363636 0.141414141 0.146464646 0.151515152
## [31] 0.156565657 0.161616162 0.166666667 0.171717172 0.176767677 0.181818182
## [37] 0.186868687 0.191919192 0.196969697 0.202020202 0.207070707 0.212121212
## [43] 0.217171717 0.222222222 0.227272727 0.232323232 0.237373737 0.242424242
## [49] 0.247474747 0.252525253 0.257575758 0.262626263 0.267676768 0.272727273
## [55] 0.277777778 0.282828283 0.287878788 0.292929293 0.297979798 0.303030303
## [61] 0.308080808 0.313131313 0.318181818 0.323232323 0.328282828 0.333333333
## [67] 0.338383838 0.343434343 0.348484848 0.353535354 0.358585859 0.363636364
## [73] 0.368686869 0.373737374 0.378787879 0.383838384 0.388888889 0.393939394
## [79] 0.398989899 0.404040404 0.409090909 0.414141414 0.419191919 0.424242424
## [85] 0.429292929 0.434343434 0.439393939 0.444444444 0.449494949 0.454545455
## [91] 0.459595960 0.464646465 0.469696970 0.474747475 0.479797980 0.484848485
## [97] 0.489898990 0.494949495 0.500000000
##
## $dbz
## [1] -2.2164539108 -1.9849206395 -1.6524595119 -1.2753123609 -0.9026883503
## [6] -0.5697330186 -0.2973356993 -0.0947071844 0.0378192323 0.1066356847
## [11] 0.1232846159 0.1030012479 0.0628682718 0.0193305301 -0.0146466493
## [16] -0.0318985733 -0.0318047089 -0.0189600869 -0.0003052964 0.0179951969
## [21] 0.0326096059 0.0432898408 0.0513817478 0.0574210279 0.0591293252
## [26] 0.0510549508 0.0263798048 -0.0196075733 -0.0856570020 -0.1619594895
## [31] -0.2305057758 -0.2696686264 -0.2626013516 -0.2060711169 -0.1144768534
## [36] -0.0160724930 0.0565573644 0.0770660068 0.0337627161 -0.0648105292
## [41] -0.1875268450 -0.2840930541 -0.2984854839 -0.1921574926 0.0349986465
## [46] 0.3425768179 0.6678254499 0.9482788033 1.1354639949 1.1985632117
## [51] 1.1230390343 0.9085430614 0.5680860846 0.1287458457 -0.3670491750
## [56] -0.8619714586 -1.2893954797 -1.5863119898 -1.7101006697 -1.6495832635
## [61] -1.4233038891 -1.0686030283 -0.6305291543 -0.1551810623 0.3136053611
## [66] 0.7364264052 1.0801353372 1.3192114335 1.4366475909 1.4246924028
## [71] 1.2859157508 1.0348495407 0.6998155913 0.3233817577 -0.0414560620
## [76] -0.3419837227 -0.5414473463 -0.6307151786 -0.6265856294 -0.5579564343
## [81] -0.4506893185 -0.3202405118 -0.1733955011 -0.0151343742 0.1447352341
## [86] 0.2892056076 0.3969424807 0.4485239121 0.4332923725 0.3548702321
## [91] 0.2340288625 0.1070212601 0.0168377124 -0.0026877158 0.0568993323
## [96] 0.1734383141 0.3043620452 0.4046463365 0.4419284082
acf(fit.mle.sig_h20$resid)
ljung.wge(fit.mle.sig_h20$resid, p = 6, q = 0, K = 48)
## Obs -0.03092868 -0.01928504 -0.01643074 -0.01874978 -0.01196798 -0.02084182 -0.0721916 0.03226929 0.02199733 -0.1393121 0.03404639 0.05070533 -0.1303929 -0.01784638 0.01323963 0.04938684 -0.03297792 -0.07306521 -0.06645245 0.2076983 -0.08258517 0.03108481 -0.06239572 -0.01764634 -0.005338563 0.01017477 -0.03592154 0.01086301 0.07515347 -0.1090446 -0.02897116 0.1183135 -0.06710514 0.1362477 0.02578356 0.04932395 0.02279133 -0.02142985 -0.0590818 0.079141 -0.02309291 2.742465e-05 -0.0007788745 -0.01091636 0.02298131 0.07171739 0.008512692 0.03126207
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 44.69122
##
## $df
## [1] 42
##
## $pval
## [1] 0.3594121
Rolling Window RMSE for SPN
To compare this model with other models, we will use code that Aaron has written to create a rolling window that measures RMSE for the model. We will use the MLE estimates for p values of 1-6 to assure ourselves that an AR(6) is the best AR model to accompany the signal. The average RMSE for all 1 year windows (4 quarters) is 0.0326, and the average RMSE for all 5 year windows (20 quarters) is 0.07595.
# Aaron's Rolling Window RMSE Code
# If you wanted to use all the fore.sigplusnoise.wge parameters, it would look something like this:
series = log.mhp
horizon = 12
linear = TRUE
method = "mle"
freq=0
max.p=5
# Rolling Window for 1 Year Horizon with MLE
horizon = 4
method = "mle"
source("functions_Aaron.R")
mle.p1h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=1) # 0.03564353
mle.p2h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=2) # 0.03547613
mle.p3h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=3) # 0.03495764
mle.p4h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=4) # 0.03326752
mle.p5h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=5) # 0.03261551
mle.p6h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=6) # 0.03259507
mle.p7h4 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=7) # 0.03259507
# Rolling Window for 5 Year Horizon with MLE
horizon = 20
source("functions_Aaron.R")
mle.p1h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=1) # 0.08107048
mle.p2h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=2) # 0.08065467
mle.p3h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=3) # 0.07973718
mle.p4h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=4) # 0.07797091
mle.p5h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=5) # 0.07669397
mle.p6h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=6) # 0.0759512
mle.p7h20 = roll.win.rmse.linplusnoise.ada(series, horizon, max.p=7) # 0.0759512
Evaluating Models
We will evaluate the model by comparing the ACFs and the Spectral Densities for the same model on generated signal plus noise data. The loop given in class for Unit 11 does not work with signal plus noise generated realizations because the output includes the AR portion of the model regardless of whether plot = TRUE or FALSE. Thus, we will generate 10 different realizations and then take the ACFs and Spectral Densities of all 10 objects. The
# Compare Spectral Densities
sims = 10
SpecDen = parzen.wge(log.mhp, plot = FALSE)
plot(SpecDen$freq,SpecDen$pzgram, type = "l", lwd = 3,xlab="Frequency",ylab="dB",main="True Spectral Density vs Generated Data")
for( i in 1: sims)
{
SpecDen2 = parzen.wge(ten.generated[[i]], plot = FALSE)
lines(SpecDen2$freq,SpecDen2$pzgram, lwd = 2, col = "red")
}
#Compare ACFs
sims = 10
ACF = acf(log.mhp, plot = "FALSE")
plot(ACF$lag ,ACF$acf , type = "l", lwd = 4,xlab="Lag",ylab="ACF",main="True ACF vs Generated Data")
for( i in 1: sims)
{
ACF2 = acf(ten.generated[[i]], plot = "FALSE")
lines(ACF2$lag ,ACF2$acf, lwd = 1, col = "red")
}
# Residuals
plot(fit.mle.sig$resid, xlab="Time", ylab="", main="Residuals",type="b",col = "black", pch = 19)
acf(fit.mle.sig$resid,main="ACF of Residuals",lag.max=100)
Model Choice
The MLE estimates for forecasting had better ASE and better rolling window RMSE for all three forecasts (1 yr, 3 yr, and 5 yr)
Our final Signal Plus Noise Model is Xt = 10.871 + .011t + zt, where (1 - .716B - .367B2 - 0.137B3 + 0.062B4 + .076B5 + .118B6)Zt with sigma2 = 0.0006929526
Wording:
In 1 year, we are 95% confident that the median home sale price will be between $387,759 (e^12.86814) and $468,321 (e^13.05691). Our best estimate is $426,142 (e^12.96253).
In 5 years, we are 95% confident that the median home sale price will be between $419,048 (e^12.94574) and $669,482 (e^13.41426). Our best estimate is $529,665 (e^13.18).
log.mhp = fed_housing_data$Median_Sales_Price
x = fed_housing_data$Median_Sales_Price
l = length(log.mhp)
h.short = 4
h.long = 20
# Fit with MLE estimates, using all data, forecasting ahead 1 year
fit.mle.sig_h4_ahead = fore.sigplusnoise.wge(log.mhp, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 4, lastn = FALSE)
##
##
## Coefficients of AR polynomial:
## 0.7158 0.3673 0.1374 -0.0619 -0.0761 -0.1179
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.8131B+0.8234B^2 1.1010+-0.0480i 0.9074 0.0069
## 1+1.0575B+0.3859B^2 -1.3702+-0.8449i 0.6212 0.4121
## 1+0.0398B+0.3709B^2 -0.0537+-1.6410i 0.6090 0.2552
##
##
# Fit with MLE estimates, using all data, forecasting ahead 5 years
fit.mle.sig_h20_ahead = fore.sigplusnoise.wge(log.mhp, linear = TRUE, method = 'mle', freq = 0, max.p = 6, n.ahead = 20, lastn = FALSE)
##
##
## Coefficients of AR polynomial:
## 0.7158 0.3673 0.1374 -0.0619 -0.0761 -0.1179
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.8131B+0.8234B^2 1.1010+-0.0480i 0.9074 0.0069
## 1+1.0575B+0.3859B^2 -1.3702+-0.8449i 0.6212 0.4121
## 1+0.0398B+0.3709B^2 -0.0537+-1.6410i 0.6090 0.2552
##
##
# Checking if models all have same intercept, slope, and phis
fit.mle.sig_h4$b0hat == fit.mle.sig_h4_ahead$b0hat
## (Intercept)
## TRUE
fit.mle.sig_h4$b1hat == fit.mle.sig_h4_ahead$b1hat
## time
## TRUE
fit.mle.sig_h4$phi.z == fit.mle.sig_h4_ahead$phi.z
## [1] TRUE TRUE TRUE TRUE TRUE TRUE
# Model Coefficients
fit.mle.sig_h4$phi.z
## [1] 0.71575088 0.36727152 0.13737786 -0.06190173 -0.07612962 -0.11786143
fit.mle.sig_h4$b0hat
## (Intercept)
## 10.87039
fit.mle.sig_h4$b1hat
## time
## 0.01084782
fit.mle.sig_h4$wnv
## [1] 0.0006929526
# Confidence Intervals
# Find forecasted Values
fit.mle.sig_h4_ahead$f[4] # 12.96253 ($426,142.89)
## [1] 12.96253
fit.mle.sig_h4_ahead$ll[4] # 12.86814 ($387,759.27)
## [1] 12.86814
fit.mle.sig_h4_ahead$ul[4] # 13.05691 ($468,321.36)
## [1] 13.05691
fit.mle.sig_h20_ahead$f[20] # 13.18 ($529,665.00)
## [1] 13.18
fit.mle.sig_h20_ahead$ll[20] # 12.94574 ($419,047.69)
## [1] 12.94574
fit.mle.sig_h20_ahead$ul[20] # 13.41426 ($669,482.30)
## [1] 13.41426
# Forecasting next year (Zoomed In)
plot(seq(xmin_plot,l+h.short,1),x[xmin_plot:(l+h.short)],type="l",col="black",
xlab="Time", ylab = "log Median Housing Sales Price",
main="Linear Signal with AR(6) Noise Short Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.short),1),fit.mle.sig_h4_ahead$f,col="red")
lines(seq((l+1),(l+h.short),1),fit.mle.sig_h4_ahead$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l+1),(l+h.short),1),fit.mle.sig_h4_ahead$ul,col="blue",lty=3)
# Forecasting next 5 years (Zoomed In)
plot(seq(xmin_plot,l+h.long,1),x[xmin_plot:(l+h.long)],type="l",col="black",
xlab="Time", ylab = "log Median Housing Sales Price",
main="Linear Signal with AR(6) Noise Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),fit.mle.sig_h20_ahead$f,col="red")
lines(seq((l+1),(l+h.long),1),fit.mle.sig_h20_ahead$ll,col="blue",lty=3) # lty=3 for dotted line
lines(seq((l+1),(l+h.long),1),fit.mle.sig_h20_ahead$ul,col="blue",lty=3)
Multi-variate Models
We will try out an MLR model with lag and a VAR model that predicts all variables at the same time. The exogenous predictions we made previously will be used for the MLR Model.
MLR Model
We will first start by looking at the lag plots for the four exogenous variables.
x = fed_housing_data
l = ccf(x$Median_Sales_Price,x$Ownership_Rate,lag.max=80)
which.max(l$acf)
## [1] 131
l$acf[20] # 0.3090685
## [1] -0.03980345
l$lag[20] # 0
## [1] -61
l = ccf(x$Median_Sales_Price,x$Housing_Units_Completed,lag.max=80)
which.max(l$acf)
## [1] 150
l$acf[150] # 0.263895
## [1] 0.263895
l$lag[150] # 69, 21 seemed alright
## [1] 69
ccf(x$Median_Sales_Price,x$Supply_New_Houses,main="Median Sales Price & Supply of New Houses")
l = ccf(x$Median_Sales_Price,x$Supply_New_Houses,lag.max=60)
which.min(l$acf)
## [1] 70
l$acf[70] # -0.2592145
## [1] -0.2592145
l$lag[70] # 9
## [1] 9
l = ccf(x$Median_Sales_Price,x$Housing_Price_Index,lag.max=60)
which.max(l$acf)
## [1] 61
l$acf[61] # 0.933195
## [1] 0.933195
l$lag[61] # 0
## [1] 0
It looks like Ownership Rate has a lag of 0, Housing Units Completed has a lag of 21, Supply of New Houses has a lag of 9, and Housing Price Index has a lag of 0.
What does a model with these 4 lagged variables look like.
x.short = fed_housing_data_short
x.long = fed_housing_data_long
x$Year_Quarter = c()
x.short$Year_Quarter = c()
x.long$Year_Quarter = c()
l = length(x$Median_Sales_Price)
t=1:l
t.train.short= 1:(l-h.short)
t.test.short=(l-h.short+1):l
t.train.long= 1:(l-h.long)
t.test.long=(l-h.long+1):l
x$Housing_Units_Completed_l21 = dplyr::lag(x$Housing_Units_Completed,21)
x$Supply_New_Houses_l9 = dplyr::lag(x$Supply_New_Houses,9)
x.short$Housing_Units_Completed_l21 = dplyr::lag(x.short$Housing_Units_Completed,21)
x.short$Supply_New_Houses_l9 = dplyr::lag(x.short$Supply_New_Houses,9)
x.long$Housing_Units_Completed_l21 = dplyr::lag(x.long$Housing_Units_Completed,21)
x.long$Supply_New_Houses_l9 = dplyr::lag(x.long$Supply_New_Houses,9)
ksfit = lm(x$Median_Sales_Price~x$Ownership_Rate+x$Housing_Units_Completed_l21+x$Supply_New_Houses_l9+x$Housing_Price_Index+t)
summary(ksfit)
##
## Call:
## lm(formula = x$Median_Sales_Price ~ x$Ownership_Rate + x$Housing_Units_Completed_l21 +
## x$Supply_New_Houses_l9 + x$Housing_Price_Index + t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.161704 -0.039905 0.000082 0.037073 0.169569
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.121e+01 2.627e-01 42.655 < 2e-16 ***
## x$Ownership_Rate -2.029e-03 4.165e-03 -0.487 0.62676
## x$Housing_Units_Completed_l21 5.980e-05 2.174e-05 2.751 0.00658 **
## x$Supply_New_Houses_l9 -2.926e-02 3.955e-03 -7.400 5.88e-12 ***
## x$Housing_Price_Index 2.695e-04 1.058e-04 2.548 0.01172 *
## t 9.371e-03 2.996e-04 31.282 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06558 on 171 degrees of freedom
## (21 observations deleted due to missingness)
## Multiple R-squared: 0.9847, Adjusted R-squared: 0.9842
## F-statistic: 2195 on 5 and 171 DF, p-value: < 2.2e-16
AIC(ksfit) # -454.2937
## [1] -454.2937
The Ownership rate variable didn’t have a very high p value, so it wasn’t significant in the model. Let’s see what happens when it is removed.
ksfit = lm(x$Median_Sales_Price~x$Housing_Units_Completed_l21+x$Supply_New_Houses_l9+x$Housing_Price_Index+t)
summary(ksfit)
##
## Call:
## lm(formula = x$Median_Sales_Price ~ x$Housing_Units_Completed_l21 +
## x$Supply_New_Houses_l9 + x$Housing_Price_Index + t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.162411 -0.041527 -0.001268 0.037562 0.172413
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.108e+01 3.039e-02 364.541 < 2e-16 ***
## x$Housing_Units_Completed_l21 5.310e-05 1.679e-05 3.163 0.00185 **
## x$Supply_New_Houses_l9 -2.821e-02 3.303e-03 -8.541 6.84e-15 ***
## x$Housing_Price_Index 2.695e-04 1.055e-04 2.554 0.01151 *
## t 9.341e-03 2.925e-04 31.935 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06543 on 172 degrees of freedom
## (21 observations deleted due to missingness)
## Multiple R-squared: 0.9846, Adjusted R-squared: 0.9843
## F-statistic: 2756 on 4 and 172 DF, p-value: < 2.2e-16
AIC(ksfit) # -456.0482
## [1] -456.0482
In this model, all the variables are significant. The AIC value is slightly lower as well. We will now determine how to model the residuals using an ARMA model.
aic5.wge(ksfit$residuals,p=0:4,q=0:2,type='aic') # best 2/0
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 2 0 -6.610677
## 1 1 -6.608485
## 3 0 -6.599501
## 2 1 -6.599466
## 1 2 -6.595973
The model with the best AIC is an AR(2).
fit=arima(x.short$Median_Sales_Price[t.train.short],order=c(2,0,0),xreg=cbind(t.train.short,x.short$Housing_Units_Completed_l21[t.train.short],x.short$Supply_New_Houses_l9[t.train.short],x.short$Housing_Price_Index[t.train.short]))
preds = predict(fit,newxreg = data.frame(t=t.test.short,Housing_Units_Completed_l21=x.short$Housing_Units_Completed_l21[t.test.short],Supply_New_Houses_l9=x.short$Supply_New_Houses_l9[t.test.short],Housing_Price_Index=x.short$Housing_Price_Index[t.test.short]))
ase = mean((fed_housing_data_NL$Median_Sales_Price[t.test.short]-exp(preds$pred))^2)/1e6
ase # 588.675
## [1] 588.675
plot(seq(1,l,1),x.short$Median_Sales_Price,type="b")
points(seq((l-h.short+1),l,1),preds$pred,type="b",pch=15,col="blue")
fit=arima(x.long$Median_Sales_Price[t.train.long],order=c(2,0,0),xreg=cbind(t.train.long,x$Housing_Units_Completed_l21[t.train.long],x$Supply_New_Houses_l9[t.train.long],x$Housing_Price_Index[t.train.long]))
preds = predict(fit,newxreg = data.frame(t=t.test.short,Housing_Units_Completed_l21=x.long$Housing_Units_Completed_l21[t.test.long],Supply_New_Houses_l9=x.long$Supply_New_Houses_l9[t.test.long],Housing_Price_Index=x.long$Housing_Price_Index[t.test.long]))
ase = mean((fed_housing_data_NL$Median_Sales_Price[t.test.long]-exp(preds$pred))^2)/1e6
ase # 1431.873
## [1] 1431.873
plot(seq(1,l,1),x.long$Median_Sales_Price,type="b")
points(seq((l-h.long+1),l,1),preds$pred,type="b",pch=15,col="blue")
fit=arima(x.short$Median_Sales_Price[t.train.short],order=c(2,0,0),xreg=cbind(t.train.short,x.short$Housing_Units_Completed_l21[t.train.short],x.short$Supply_New_Houses_l9[t.train.short],x.short$Housing_Price_Index[t.train.short]))
preds = predict(fit,newxreg = data.frame(t=t.test.short,Housing_Units_Completed_l21=x.short$Housing_Units_Completed_l21[t.test.short],Supply_New_Houses_l9=x.short$Supply_New_Houses_l9[t.test.short],Housing_Price_Index=x.short$Housing_Price_Index[t.test.short]))
plot(seq(xmin_plot,l,1),x.short$Median_Sales_Price[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.short+1),l,1),preds$pred,col="red")
fit=arima(x.long$Median_Sales_Price[t.train.long],order=c(2,0,0),xreg=cbind(t.train.long,x$Housing_Units_Completed_l21[t.train.long],x$Supply_New_Houses_l9[t.train.long],x$Housing_Price_Index[t.train.long]))
preds = predict(fit,newxreg = data.frame(t=t.test.short,Housing_Units_Completed_l21=x.long$Housing_Units_Completed_l21[t.test.long],Supply_New_Houses_l9=x.long$Supply_New_Houses_l9[t.test.long],Housing_Price_Index=x.long$Housing_Price_Index[t.test.long]))
plot(seq(xmin_plot,l,1),x.long$Median_Sales_Price[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.long+1),l,1),preds$pred,col="red")
The short term ASE is 589 M and the long term ASE is 1.4 B.
In order to forecast out past the current date, we had to make forecasts for the exogenous variables as well.
x = fed_housing_data
x$Year_Quarter = c()
fore_df <- as.data.frame(matrix(0, nrow = h.long, ncol = ncol(x)))
colnames(fore_df) <- colnames(x)
# Ownership Rate
x = fed_housing_data$Ownership_Rate
est = est.arma.wge(x,p=9,q=1)
##
##
## Coefficients of AR polynomial:
## 1.6768 -0.8806 0.4076 -0.1012 -0.3757 0.4520 -0.2987 0.3697 -0.2608
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.9534B+0.9571B^2 1.0204+-0.0589i 0.9783 0.0092
## 1-1.3559B+0.7452B^2 0.9098+-0.7171i 0.8632 0.1062
## 1+0.8617B -1.1605 0.8617 0.5000
## 1-0.0469B+0.7004B^2 0.0335+-1.1944i 0.8369 0.2455
## 1+0.8177B+0.6059B^2 -0.6748+-1.0932i 0.7784 0.3380
##
##
##
##
## Coefficients of MA polynomial:
## 0.7795
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.7795B 1.2828 0.7795 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=FALSE,plot=FALSE)
## y.arma 64.4 64.9 64.6 64.5 64.6 64.6 64.9 64.8 64.8 64.5 65 64.9 64.8 64.4 65.2 65.4 64.8 64.9 65.8 65.4 65.5 65.5 65.8 65.5 65.6 65.3 65.6 65.2 64.8 64.9 64.9 64.5 64.7 64.7 64.8 64.4 64.6 64.6 64.6 64.1 64.1 64.1 63.9 63.5 63.6 63.8 63.8 63.9 63.8 63.8 64.2 64.1 63.7 63.7 64 63.8 63.9 63.8 64.1 63.8 64 63.7 64 64.1 63.9 63.9 64.2 64.2 64 63.9 64.3 64.4 64.2 64.4 64.7 64.6 63.8 63.8 64.1 64.2 64.2 64.7 65 65.1 65.1 65.4 65.6 65.4 65.4 65.7 66 65.7 65.9 66 66.8 66.4 66.7 66.6 67 66.9 67.1 67.2 67.7 67.5 67.5 67.7 68.1 68 67.8 67.6 68 68.3 68 68 68.4 68.6 68.6 69.2 69 69.2 69.1 68.6 68.8 69 68.5 68.7 69 68.9 68.4 68.2 68.2 67.8 67.8 68.1 67.9 67.5 67.3 67.4 67.6 67.2 67.1 66.9 66.9 66.5 66.4 65.9 66.3 66 65.4 65.5 65.5 65.4 65 65 65.3 65.2 64.8 64.7 64.4 64 63.7 63.4 63.7 63.8 63.5 62.9 63.5 63.7 63.6 63.7 63.9 64.2 64.2 64.3 64.4 64.8 64.2 64.1 64.8 65.1 65.3 67.9 67.4 65.8 65.6 65.4 65.4 65.5 65.4 65.8 66 65.9 66 65.9 66 65.7 65.6 65.6
fore_df$Ownership_Rate[1:h.long] = f$f
# Housing Units Complete
x = fed_housing_data$Housing_Units_Completed
est = est.arma.wge(x,p=9,q=1)
##
##
## Coefficients of AR polynomial:
## 1.4066 -0.2813 -0.0540 -0.1068 0.0604 -0.0496 0.1985 -0.4289 0.2277
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-1.7319B+0.7686B^2 1.1267+-0.1778i 0.8767 0.0249
## 1-0.8710B 1.1481 0.8710 0.0000
## 1+1.5502B+0.7380B^2 -1.0502+-0.5020i 0.8591 0.4290
## 1+0.5020B+0.6999B^2 -0.3586+-1.1402i 0.8366 0.2985
## 1-0.8558B+0.6584B^2 0.6499+-1.0471i 0.8114 0.1616
##
##
##
##
## Coefficients of MA polynomial:
## 0.5043
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.5043B 1.9829 0.5043 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=FALSE,plot=FALSE)
## y.arma 1588 1278 1276 1134 1258 1332 1322 1388 1457 1548 1687 1675 1777 1843 1957 1885 1850 1866 1759 1832 1794 1751 1472 1272 1270 1469 1337 1206 1052 979 1006 1059 1187 1197 1410 1586 1595 1683 1692 1633 1646 1630 1693 1565 1723 1685 1722 1782 1862 1735 1685 1577 1554 1615 1527 1529 1561 1542 1386 1333 1508 1332 1295 1282 1149 1093 1076 1076 1061 1083 1251 1139 1135 1216 1090 1246 1228 1374 1281 1378 1423 1342 1352 1335 1402 1327 1453 1392 1377 1438 1313 1396 1316 1498 1552 1452 1599 1580 1597 1593 1574 1610 1495 1513 1456 1574 1582 1599 1632 1625 1615 1601 1654 1662 1680 1728 1709 1938 1881 1839 1892 1927 1886 1962 2036 2071 1934 1918 1822 1539 1534 1405 1331 1022 1087 1055 777 846 797 746 689 737 572 605 520 549 634 566 545 671 676 730 714 712 784 801 838 834 839 920 954 1001 988 985 1056 945 1072 1070 1081 1069 1173 1196 1208 1224 1150 1122 1247 1308 1215 1289 1292 1188 1308 1386 1362 1410 1360 1282 1275 1344 1409 1379 1389 1416 1343 1382 1504 1659
fore_df$Housing_Units_Completed[1:h.long] = f$f
# Housing Price Index
x = fed_housing_data$Housing_Price_Index
d = artrans.wge(x,1)
d2 = artrans.wge(d,1)
d3 = artrans.wge(d2,c(0,-1))
dev.off()
## null device
## 1
est = est.arma.wge(d3,p=6,q=5)
##
##
## Coefficients of AR polynomial:
## -0.8116 0.2231 0.5831 0.5250 0.1352 -0.2199
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1+1.7230B+0.8346B^2 -1.0323+-0.3642i 0.9136 0.4460
## 1-0.8558B 1.1685 0.8558 0.0000
## 1+0.4123B+0.6580B^2 -0.3132+-1.1923i 0.8112 0.2909
## 1-0.4679B 2.1373 0.4679 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## -0.6419 -0.0609 0.3220 0.7473 0.6335
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-1.0000B 1.0000 1.0000 0.0000
## 1+0.0687B+0.8449B^2 -0.0407+-1.0872i 0.9192 0.2560
## 1+1.5732B+0.7497B^2 -1.0492+-0.4827i 0.8659 0.4314
##
##
m = mult.wge(fac1=est$phi,fac2=c(0,-1))
f = fore.arima.wge(x,d=2,phi=m$model.coef,theta=est$theta,n.ahead=h.long,lastn=FALSE, plot=FALSE)
## y.arma -0.7 0.82 -0.51 2.1 -1.5 -0.45 1.59 0.92 -1.54 1.16 -0.46 0.83 -0.77 -0.12 1.63 -0.85 -1.09 -0.25 0.04 -0.12 1.14 -2.69 0.51 1.6 -0.78 -2.8 0.01 4.01 -4.44 6.46 -1.95 -1.22 -0.49 -0.32 1.07 -0.16 -0.48 -0.15 0.77 0.12 0.11 -0.55 1.11 0.24 -0.72 0.07 0.64 -0.55 -0.56 -0.7 1.33 0.71 -1.41 -0.18 0.4 0.35 1.24 -2.05 -0.42 -0.24 0.34 -1.68 2.02 -0.25 -0.79 2.03 -1.03 -1.37 2.01 -1.1 -0.79 1.61 -0.26 0.04 -0.34 -0.06 -0.31 -0.92 1.12 2.07 -0.14 -1.44 0.7 -1.83 0.54 0.66 0.11 0 1.08 -0.08 0.44 -1.21 1.08 -0.42 -0.26 0.64 0.23 -0.84 2.03 -0.57 0.52 -0.63 2.35 -1.83 -0.25 -0.75 0.56 0.76 0.82 -1.43 -0.51 0.07 0.94 3.76 -3.48 2.69 4.39 -4.88 0.64 3.21 -0.25 -2.42 -2.27 -1.99 -0.71 0.8 -2.45 -1.65 -4.02 3.04 -1.54 -6.63 -2.06 8.21 5.81 -11.76 0.27 6.39 -1.41 0.98 6.1 -5.46 -6.68 3.46 7.84 -0.87 -4.71 1.86 5.54 -1.67 -0.89 3.27 -0.22 -2.02 0.11 3.73 -1.55 -2.1 1.03 1.86 -0.64 -2.13 0.43 3.53 -0.21 -3.71 -0.19 5.87 -2.55 -2.75 2.35 1.86 -2.42 -3.67 3.29 2.42 -1.6 -1.64 1.25 -0.66 3.48 2.19 1.22 15.51 1.55 -9.92 1.97 17.64 -28.03 -15.21 8.61 15.81 -7.09 -10.76 5.55 10.31
fore_df$Housing_Price_Index[1:h.long] = f$f
# Supply of New Houses
x = fed_housing_data$Supply_New_Houses
est = est.arma.wge(x,p=1,q=1)
##
##
## Coefficients of AR polynomial:
## 0.8930
##
## AR Factor Table
## Factor Roots Abs Recip System Freq
## 1-0.8930B 1.1198 0.8930 0.0000
##
##
##
##
## Coefficients of MA polynomial:
## 0.1287
##
## MA FACTOR TABLE
## Factor Roots Abs Recip System Freq
## 1-0.1287B 7.7707 0.1287 0.0000
##
##
f = fore.arima.wge(x,phi=est$phi,theta=est$theta,n.ahead=h.long,lastn=FALSE, plot=FALSE)
## y.arma 9.9 7.2 7 6.6 6.4 6.4 6.4 5.8 5.3 5.5 6.1 6.1 5.9 5.8 6.6 5.7 6.6 6.8 7.1 7.4 7.9 11.6 6.5 7.3 8 8.6 9 10.3 8.9 9.4 8.2 6.2 5.4 5.1 5.8 5.6 5.3 6 6.7 6 6.7 6.9 5.6 6.4 5.7 4.7 6.1 6.5 6 6 6.7 6.8 7.5 6.4 6.6 6 6.2 7.4 6.1 6.9 7 8.3 7.8 8.7 9.4 7.3 7.1 6.7 5.2 6.1 5.3 5.1 5.4 4.7 5.3 5 5.9 5.2 6.3 5.6 6.8 6.7 5.6 6.3 6.4 6 5.7 5.6 4.7 4.7 4.4 4.3 4 4 4 4 3.9 3.9 4 4.2 4.3 4.4 4.1 4 3.8 3.9 4.2 4.3 4.2 4.3 4.2 4 4 4.1 3.6 3.8 3.8 4 4.5 3.9 4.4 4.3 4.2 4.5 5.3 6.3 7.3 7.3 7.2 7.4 8.3 8.5 9.3 10.3 10.5 11.6 12.2 10.7 7.9 7.4 8.1 6.2 8.9 8.2 7.3 6.7 6.7 6 5.3 4.9 4.6 4.9 4 4.4 5.5 4.9 5.1 5.7 6.2 5.3 4.8 4.9 5.2 5.6 5.6 5 4.5 5.2 5.4 5.4 5.8 5.5 6 5.6 6 7.2 7 5.7 6.1 5.3 5.7 6.7 3.5 3.3 4.1 4.6 6 6.7 5.9 8.2 10.6 9.7 8.3 7.5 7.3 7.9 8.3 7.6
fore_df$Supply_New_Houses[1:h.long] = f$f
# Forecast df
x = fed_housing_data
x$Year_Quarter = c()
fed_housing_data_forecast <- rbind(x, fore_df)
x = fed_housing_data_forecast
x$Housing_Units_Completed_l21 = dplyr::lag(x$Housing_Units_Completed,21)
x$Supply_New_Houses_l9 = dplyr::lag(x$Supply_New_Houses,9)
t=1:l
t.fore.short = (l+1):(l+h.short)
t.fore.long = (l+1):(l+h.long)
fit=arima(x$Median_Sales_Price[t],order=c(2,0,0),xreg=cbind(t,x$Housing_Units_Completed_l21[t],x$Supply_New_Houses_l9[t],x$Housing_Price_Index[t]))
preds = predict(fit,newxreg = data.frame(t=t.fore.short,Housing_Units_Completed_l21=x$Housing_Units_Completed_l21[t.fore.short],Supply_New_Houses_l9=x$Supply_New_Houses_l9[t.fore.short],Housing_Price_Index=x$Housing_Price_Index[t.fore.short]))
plot(seq(xmin_plot,l+h.short,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.short)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Short Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.short),1),preds$pred,col="red")
preds = predict(fit,newxreg = data.frame(t=t.fore.long,Housing_Units_Completed_l21=x$Housing_Units_Completed_l21[t.fore.long],Supply_New_Houses_l9=x$Supply_New_Houses_l9[t.fore.long],Housing_Price_Index=x$Housing_Price_Index[t.fore.long]))
plot(seq(xmin_plot,l+h.long,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.long)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),preds$pred,col="red")
The MLR forecasts continued to increase with time.
VAR Model
x = fed_housing_data
x$Year_Quarter = c()
VARselect(x,lag.max=16,type="both",season=NULL,exogen=NULL) # lag = 4
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 4 2 1 4
##
## $criteria
## 1 2 3 4 5 6 7 8
## AIC(n) 2.330754 1.933777 1.831970 1.690855 1.754028 1.829078 1.939386 1.901505
## HQ(n) 2.580534 2.361972 2.438579 2.475879 2.717466 2.970931 3.259654 3.400187
## SC(n) 2.946909 2.990043 3.328346 3.627343 4.130626 4.645787 5.196206 5.598435
## FPE(n) 10.287646 6.922207 6.263258 5.456431 5.841904 6.344193 7.157132 6.985458
## 9 10 11 12 13 14 15 16
## AIC(n) 1.737782 1.757358 1.775183 1.828460 1.877812 1.886325 1.936906 1.981409
## HQ(n) 3.414879 3.612869 3.809109 4.040800 4.268567 4.455494 4.684489 4.907407
## SC(n) 5.874824 6.334511 6.792446 7.285834 7.775297 8.223921 8.714612 9.199226
## FPE(n) 6.035151 6.291181 6.580194 7.172464 7.838893 8.285860 9.211707 10.275979
The model with lag = 4 was chosen.
fit = VAR(x,p=4,type='both')
summary(fit) # trend and const were significant, but only lag up to 2 for variable of interest, huc not very predictive
##
## VAR Estimation Results:
## =========================
## Endogenous variables: Ownership_Rate, Housing_Units_Completed, Supply_New_Houses, Housing_Price_Index, Median_Sales_Price
## Deterministic variables: both
## Sample size: 194
## Log Likelihood: -1409.069
## Roots of the characteristic polynomial:
## 1.019 0.9661 0.944 0.944 0.8338 0.8338 0.827 0.827 0.6592 0.6592 0.5734 0.5734 0.5261 0.5261 0.4336 0.4336 0.3419 0.3419 0.3146 0.09555
## Call:
## VAR(y = x, p = 4, type = "both")
##
##
## Estimation results for equation Ownership_Rate:
## ===============================================
## Ownership_Rate = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend
##
## Estimate Std. Error t value Pr(>|t|)
## Ownership_Rate.l1 0.8802438 0.0772448 11.396 <2e-16 ***
## Housing_Units_Completed.l1 0.0003522 0.0003466 1.016 0.3110
## Supply_New_Houses.l1 0.0178765 0.0320110 0.558 0.5773
## Housing_Price_Index.l1 0.0006370 0.0080791 0.079 0.9372
## Median_Sales_Price.l1 -1.2365449 1.1566036 -1.069 0.2865
## Ownership_Rate.l2 -0.1675348 0.1006641 -1.664 0.0979 .
## Housing_Units_Completed.l2 0.0001776 0.0004052 0.438 0.6617
## Supply_New_Houses.l2 -0.0620023 0.0390373 -1.588 0.1141
## Housing_Price_Index.l2 0.0013218 0.0153120 0.086 0.9313
## Median_Sales_Price.l2 0.7609832 1.2429600 0.612 0.5412
## Ownership_Rate.l3 0.2373131 0.0992393 2.391 0.0179 *
## Housing_Units_Completed.l3 -0.0002194 0.0003990 -0.550 0.5831
## Supply_New_Houses.l3 0.0633595 0.0399637 1.585 0.1147
## Housing_Price_Index.l3 -0.0003650 0.0158064 -0.023 0.9816
## Median_Sales_Price.l3 0.6246737 1.2399144 0.504 0.6150
## Ownership_Rate.l4 0.0110775 0.0763513 0.145 0.8848
## Housing_Units_Completed.l4 0.0001236 0.0003175 0.389 0.6976
## Supply_New_Houses.l4 -0.0003583 0.0358984 -0.010 0.9920
## Housing_Price_Index.l4 -0.0034601 0.0096862 -0.357 0.7214
## Median_Sales_Price.l4 -0.4423237 1.0989918 -0.402 0.6878
## const 4.9666621 3.7849625 1.312 0.1912
## trend 0.0084872 0.0054763 1.550 0.1230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.3583 on 172 degrees of freedom
## Multiple R-Squared: 0.9552, Adjusted R-squared: 0.9497
## F-statistic: 174.7 on 21 and 172 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation Housing_Units_Completed:
## ========================================================
## Housing_Units_Completed = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend
##
## Estimate Std. Error t value Pr(>|t|)
## Ownership_Rate.l1 1.21161 16.47348 0.074 0.94145
## Housing_Units_Completed.l1 0.62130 0.07391 8.406 1.55e-14 ***
## Supply_New_Houses.l1 -12.72643 6.82677 -1.864 0.06400 .
## Housing_Price_Index.l1 -1.27929 1.72297 -0.742 0.45880
## Median_Sales_Price.l1 -3.72119 246.66115 -0.015 0.98798
## Ownership_Rate.l2 16.99057 21.46795 0.791 0.42978
## Housing_Units_Completed.l2 0.19786 0.08642 2.290 0.02326 *
## Supply_New_Houses.l2 -33.79267 8.32521 -4.059 7.47e-05 ***
## Housing_Price_Index.l2 -0.63782 3.26549 -0.195 0.84537
## Median_Sales_Price.l2 -415.41469 265.07780 -1.567 0.11892
## Ownership_Rate.l3 -20.99121 21.16410 -0.992 0.32267
## Housing_Units_Completed.l3 0.11180 0.08509 1.314 0.19063
## Supply_New_Houses.l3 -9.99171 8.52278 -1.172 0.24268
## Housing_Price_Index.l3 8.58651 3.37092 2.547 0.01173 *
## Median_Sales_Price.l3 33.54969 264.42828 0.127 0.89919
## Ownership_Rate.l4 -0.91091 16.28293 -0.056 0.95545
## Housing_Units_Completed.l4 -0.08470 0.06771 -1.251 0.21267
## Supply_New_Houses.l4 14.11716 7.65581 1.844 0.06691 .
## Housing_Price_Index.l4 -5.63593 2.06570 -2.728 0.00703 **
## Median_Sales_Price.l4 309.08091 234.37467 1.319 0.18901
## const 1582.57402 807.19375 1.961 0.05154 .
## trend -2.14430 1.16790 -1.836 0.06808 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 76.4 on 172 degrees of freedom
## Multiple R-Squared: 0.9572, Adjusted R-squared: 0.9519
## F-statistic: 183.1 on 21 and 172 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation Supply_New_Houses:
## ==================================================
## Supply_New_Houses = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend
##
## Estimate Std. Error t value Pr(>|t|)
## Ownership_Rate.l1 -0.2024886 0.1845156 -1.097 0.2740
## Housing_Units_Completed.l1 -0.0004697 0.0008279 -0.567 0.5712
## Supply_New_Houses.l1 0.6855507 0.0764650 8.966 5.07e-16 ***
## Housing_Price_Index.l1 0.0214770 0.0192986 1.113 0.2673
## Median_Sales_Price.l1 1.5817461 2.7627931 0.573 0.5677
## Ownership_Rate.l2 -0.1408463 0.2404574 -0.586 0.5588
## Housing_Units_Completed.l2 -0.0009644 0.0009679 -0.996 0.3205
## Supply_New_Houses.l2 0.1186182 0.0932488 1.272 0.2051
## Housing_Price_Index.l2 -0.0333143 0.0365760 -0.911 0.3637
## Median_Sales_Price.l2 0.6123029 2.9690737 0.206 0.8369
## Ownership_Rate.l3 0.3641744 0.2370540 1.536 0.1263
## Housing_Units_Completed.l3 -0.0003113 0.0009531 -0.327 0.7443
## Supply_New_Houses.l3 0.0635468 0.0954617 0.666 0.5065
## Housing_Price_Index.l3 -0.0060500 0.0377569 -0.160 0.8729
## Median_Sales_Price.l3 3.2160054 2.9617986 1.086 0.2791
## Ownership_Rate.l4 -0.0840977 0.1823813 -0.461 0.6453
## Housing_Units_Completed.l4 0.0016403 0.0007584 2.163 0.0319 *
## Supply_New_Houses.l4 -0.1508597 0.0857509 -1.759 0.0803 .
## Housing_Price_Index.l4 0.0254085 0.0231374 1.098 0.2737
## Median_Sales_Price.l4 -4.5626989 2.6251752 -1.738 0.0840 .
## const -3.4228974 9.0411862 -0.379 0.7055
## trend -0.0259743 0.0130814 -1.986 0.0487 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.8558 on 172 degrees of freedom
## Multiple R-Squared: 0.7811, Adjusted R-squared: 0.7543
## F-statistic: 29.22 on 21 and 172 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation Housing_Price_Index:
## ====================================================
## Housing_Price_Index = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend
##
## Estimate Std. Error t value Pr(>|t|)
## Ownership_Rate.l1 -0.985346 0.650593 -1.515 0.13173
## Housing_Units_Completed.l1 0.001248 0.002919 0.427 0.66957
## Supply_New_Houses.l1 -0.756803 0.269612 -2.807 0.00558 **
## Housing_Price_Index.l1 1.679492 0.068046 24.682 < 2e-16 ***
## Median_Sales_Price.l1 3.452220 9.741473 0.354 0.72349
## Ownership_Rate.l2 -0.287772 0.847841 -0.339 0.73471
## Housing_Units_Completed.l2 -0.004677 0.003413 -1.370 0.17234
## Supply_New_Houses.l2 0.036430 0.328791 0.111 0.91190
## Housing_Price_Index.l2 -1.206842 0.128965 -9.358 < 2e-16 ***
## Median_Sales_Price.l2 10.364575 10.468808 0.990 0.32354
## Ownership_Rate.l3 -0.036252 0.835841 -0.043 0.96546
## Housing_Units_Completed.l3 0.004608 0.003361 1.371 0.17211
## Supply_New_Houses.l3 -0.082369 0.336593 -0.245 0.80697
## Housing_Price_Index.l3 1.032985 0.133129 7.759 7.24e-13 ***
## Median_Sales_Price.l3 -14.997317 10.443156 -1.436 0.15279
## Ownership_Rate.l4 0.967596 0.643067 1.505 0.13425
## Housing_Units_Completed.l4 0.001041 0.002674 0.389 0.69745
## Supply_New_Houses.l4 0.115456 0.302354 0.382 0.70304
## Housing_Price_Index.l4 -0.495141 0.081581 -6.069 7.97e-09 ***
## Median_Sales_Price.l4 -5.006937 9.256239 -0.541 0.58926
## const 89.325044 31.878778 2.802 0.00566 **
## trend 0.063292 0.046124 1.372 0.17179
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 3.017 on 172 degrees of freedom
## Multiple R-Squared: 0.9996, Adjusted R-squared: 0.9996
## F-statistic: 2.097e+04 on 21 and 172 DF, p-value: < 2.2e-16
##
##
## Estimation results for equation Median_Sales_Price:
## ===================================================
## Median_Sales_Price = Ownership_Rate.l1 + Housing_Units_Completed.l1 + Supply_New_Houses.l1 + Housing_Price_Index.l1 + Median_Sales_Price.l1 + Ownership_Rate.l2 + Housing_Units_Completed.l2 + Supply_New_Houses.l2 + Housing_Price_Index.l2 + Median_Sales_Price.l2 + Ownership_Rate.l3 + Housing_Units_Completed.l3 + Supply_New_Houses.l3 + Housing_Price_Index.l3 + Median_Sales_Price.l3 + Ownership_Rate.l4 + Housing_Units_Completed.l4 + Supply_New_Houses.l4 + Housing_Price_Index.l4 + Median_Sales_Price.l4 + const + trend
##
## Estimate Std. Error t value Pr(>|t|)
## Ownership_Rate.l1 -6.003e-03 5.113e-03 -1.174 0.241960
## Housing_Units_Completed.l1 1.114e-05 2.294e-05 0.486 0.627737
## Supply_New_Houses.l1 -7.269e-03 2.119e-03 -3.431 0.000753 ***
## Housing_Price_Index.l1 1.695e-03 5.347e-04 3.170 0.001806 **
## Median_Sales_Price.l1 4.816e-01 7.655e-02 6.291 2.52e-09 ***
## Ownership_Rate.l2 5.273e-03 6.663e-03 0.791 0.429799
## Housing_Units_Completed.l2 -2.239e-05 2.682e-05 -0.835 0.404910
## Supply_New_Houses.l2 4.012e-03 2.584e-03 1.553 0.122329
## Housing_Price_Index.l2 -3.055e-04 1.013e-03 -0.301 0.763406
## Median_Sales_Price.l2 3.289e-01 8.227e-02 3.998 9.46e-05 ***
## Ownership_Rate.l3 -3.138e-04 6.568e-03 -0.048 0.961956
## Housing_Units_Completed.l3 4.574e-05 2.641e-05 1.732 0.085056 .
## Supply_New_Houses.l3 -1.031e-03 2.645e-03 -0.390 0.697110
## Housing_Price_Index.l3 -1.645e-03 1.046e-03 -1.573 0.117607
## Median_Sales_Price.l3 1.478e-01 8.207e-02 1.801 0.073454 .
## Ownership_Rate.l4 2.436e-03 5.053e-03 0.482 0.630377
## Housing_Units_Completed.l4 -2.301e-05 2.101e-05 -1.095 0.275009
## Supply_New_Houses.l4 3.184e-03 2.376e-03 1.340 0.181943
## Housing_Price_Index.l4 9.110e-05 6.411e-04 0.142 0.887170
## Median_Sales_Price.l4 -3.937e-02 7.274e-02 -0.541 0.589017
## const 8.113e-01 2.505e-01 3.239 0.001441 **
## trend 1.077e-03 3.625e-04 2.972 0.003381 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Residual standard error: 0.02371 on 172 degrees of freedom
## Multiple R-Squared: 0.9986, Adjusted R-squared: 0.9985
## F-statistic: 5986 on 21 and 172 DF, p-value: < 2.2e-16
##
##
##
## Covariance matrix of residuals:
## Ownership_Rate Housing_Units_Completed
## Ownership_Rate 0.128346 -0.32121
## Housing_Units_Completed -0.321209 5837.34662
## Supply_New_Houses 0.006495 -9.27767
## Housing_Price_Index -0.123702 -5.77346
## Median_Sales_Price -0.001310 0.01227
## Supply_New_Houses Housing_Price_Index
## Ownership_Rate 0.006495 -0.123702
## Housing_Units_Completed -9.277673 -5.773455
## Supply_New_Houses 0.732337 -0.448006
## Housing_Price_Index -0.448006 9.104649
## Median_Sales_Price -0.001032 0.003326
## Median_Sales_Price
## Ownership_Rate -0.0013105
## Housing_Units_Completed 0.0122743
## Supply_New_Houses -0.0010317
## Housing_Price_Index 0.0033263
## Median_Sales_Price 0.0005622
##
## Correlation matrix of residuals:
## Ownership_Rate Housing_Units_Completed
## Ownership_Rate 1.00000 -0.011735
## Housing_Units_Completed -0.01174 1.000000
## Supply_New_Houses 0.02118 -0.141898
## Housing_Price_Index -0.11443 -0.025044
## Median_Sales_Price -0.15427 0.006775
## Supply_New_Houses Housing_Price_Index
## Ownership_Rate 0.02118 -0.11443
## Housing_Units_Completed -0.14190 -0.02504
## Supply_New_Houses 1.00000 -0.17350
## Housing_Price_Index -0.17350 1.00000
## Median_Sales_Price -0.05085 0.04649
## Median_Sales_Price
## Ownership_Rate -0.154270
## Housing_Units_Completed 0.006775
## Supply_New_Houses -0.050846
## Housing_Price_Index 0.046491
## Median_Sales_Price 1.000000
fit = VAR(x[1:(l-h.short),],p=4,type='both')
preds=predict(fit,n.ahead=h.short)
ase = mean((fed_housing_data_NL$Median_Sales_Price[(l-h.short+1):l]-exp(preds$fcst$Median_Sales_Price[,1]))^2)/1e6
ase # 43.63274
## [1] 43.63274
plot(seq(1,l,1),x$Median_Sales_Price,type="b")
points(seq(l-h.short+1,l,1),preds$fcst$Median_Sales_Price[1:h.short,1],type="b",pch=15,col="blue")
fanchart(preds)
fit = VAR(x[1:(l-h.long),],p=4,type='both')
preds=predict(fit,n.ahead=h.long)
ase = mean((fed_housing_data_NL$Median_Sales_Price[(l-h.long+1):l]-exp(preds$fcst$Median_Sales_Price[,1]))^2)/1e6
ase # 3009.65
## [1] 3009.65
plot(seq(1,l,1),x$Median_Sales_Price,type="b")
points(seq(l-h.long+1,l,1),preds$fcst$Median_Sales_Price[1:h.long,1],type="b",pch=15,col="blue")
fanchart(preds)
x = fed_housing_data
x$Year_Quarter = c()
fit = VAR(x[1:(l-h.short),],p=4,type='both')
preds=predict(fit,n.ahead=h.short)
plot(seq(xmin_plot,l,1),x$Median_Sales_Price[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="VAR Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.short+1),l,1),preds$fcst$Median_Sales_Price[,1],col="red")
lines(seq((l-h.short+1),l,1),preds$fcst$Median_Sales_Price[,2],col="blue",lty=3) # 2nd column is lower bound
lines(seq((l-h.short+1),l,1),preds$fcst$Median_Sales_Price[,3],col="blue",lty=3) # 3rd column is upper bound
fit = VAR(x[1:(l-h.long),],p=4,type='both')
preds=predict(fit,n.ahead=h.long)
plot(seq(xmin_plot,l,1),x$Median_Sales_Price[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="VAR Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.long+1),l,1),preds$fcst$Median_Sales_Price[,1],col="red")
lines(seq((l-h.long+1),l,1),preds$fcst$Median_Sales_Price[,2],col="blue",lty=3)
lines(seq((l-h.long+1),l,1),preds$fcst$Median_Sales_Price[,3],col="blue",lty=3)
The VAR model has a very small short term ASE of 46.4 M. This is even smaller than the signal plus noise model. However, its long term ASE is larger than the MLR model.
We forcasted out the VAR model for 1 year and 5 years.
x = fed_housing_data
x$Year_Quarter = c()
fit = VAR(x,p=4,type='both')
preds=predict(fit,n.ahead=h.short)
plot(seq(xmin_plot,l+h.short,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.short)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="VAR Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l+1),(l+h.short),1),preds$fcst$Median_Sales_Price[,1],col="red")
lines(seq((l+1),(l+h.short),1),preds$fcst$Median_Sales_Price[,2],col="blue",lty=3) # 2nd column is lower bound
lines(seq((l+1),(l+h.short),1),preds$fcst$Median_Sales_Price[,3],col="blue",lty=3) # 3rd column is upper bound
fit = VAR(x,p=4,type='both')
preds=predict(fit,n.ahead=h.long)
plot(seq(xmin_plot,l+h.long,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.long)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="VAR Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l+1),(l+h.long),1),preds$fcst$Median_Sales_Price[,1],col="red")
lines(seq((l+1),(l+h.long),1),preds$fcst$Median_Sales_Price[,2],col="blue",lty=3)
lines(seq((l+1),(l+h.long),1),preds$fcst$Median_Sales_Price[,3],col="blue",lty=3)
plot(seq(xmin_plot,l+h.long,1),fed_housing_data$Median_Sales_Price[xmin_plot:(l+h.long)],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="MLR Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),preds$pred,col="red")
Neural Networks
We investigated 3 Neural Nets for predictions: Multilayer Perceptron (MLP), Long Short-Term Memory(LSTM), and Temporal Fusion Transformer (TFT).
Multilayer Perceptron
Four different univariate MLP models were fit to the data for both horizons, and the ASE and RMSE were best in both cases using MLP’s default parameters. For the 1 year horizon, the function chose 5 hidden nodes and a Difference of 1. The univariate lags were 1, 2, 3, and 4. For the 5 year horizon, the function chose 5 hidden nodes and a Difference of 1 as well, with lags at 1, 3, and 4. The univariate MLP was used as a baseline for the Multivariate MLP.
# Univariate
log.mhp = fed_housing_data$Median_Sales_Price
msp.194 = ts(log.mhp[1:194])
msp.178 = ts(log.mhp[1:178])
# 1 Year Horizons
mspFit.4 = mlp(msp.194, comb = 'median')
mspFit.d0.4 = mlp(msp.194, comb = 'median', difforder = 0)
mspFit.d1.4 = mlp(msp.194, comb = 'median', difforder = 1)
mspFit.d2.4 = mlp(msp.194, comb = 'median', difforder = 2)
plot(mspFit.4)
plot(mspFit.4$fitted)
f.4 = forecast(mspFit.4, h = 4)
f.d0.4 = forecast(mspFit.d0.4, h = 4)
f.d1.4 = forecast(mspFit.d1.4, h = 4)
f.d2.4 = forecast(mspFit.d2.4, h = 4)
# Defaults
ASE.mlp.exp.h4 = mean((exp(log.mhp[195:198]) - exp(f.4$mean))^2) # 60.95M
rwfit.mlp4 = roll.win.rmse.nn.wge(log.mhp, horizon = 4, fit_model = mspFit.4) # 0.029
## [1] 9
## [1] 186
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 186 windows."
## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002734 0.015904 0.023128 0.029275 0.039717 0.110918
## [1] "The Rolling Window RMSE is: 0.029"
# Difforder = 0
ASE.mlp.exp.d0.h4 = mean((exp(log.mhp[195:198]) - exp(f.d0.4$mean))^2) # 99.75M
rwfit.mlp4.d0 = roll.win.rmse.nn.wge(log.mhp, horizon = 4, fit_model = mspFit.d0.4) # 0.033
## [1] 9
## [1] 186
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 186 windows."
## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002723 0.016662 0.025692 0.032713 0.042055 0.113279
## [1] "The Rolling Window RMSE is: 0.033"
# Difforder = 1
ASE.mlp.exp.d1.h4 = mean((exp(log.mhp[195:198]) - exp(f.d1.4$mean))^2) # 178.5M
rwfit.mlp4.d1 = roll.win.rmse.nn.wge(log.mhp, horizon = 4, fit_model = mspFit.d1.4) # 0.029
## [1] 9
## [1] 186
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 186 windows."
## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005039 0.015694 0.023916 0.028928 0.037541 0.098347
## [1] "The Rolling Window RMSE is: 0.029"
# Difforder = 2
ASE.mlp.exp.d2.h4 = mean((exp(log.mhp[195:198]) - exp(f.d2.4$mean))^2) # 205.4M
rwfit.mlp4.d2 = roll.win.rmse.nn.wge(log.mhp, horizon = 4, fit_model = mspFit.d2.4) # 0.031
## [1] 9
## [1] 186
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 186 windows."
## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.005211 0.018303 0.024713 0.030557 0.039818 0.098140
## [1] "The Rolling Window RMSE is: 0.031"
# 1 Year Horizon
plot(log.mhp[195:198], type = 'l', ylim = c(12.9, 13))
lines(seq(1,4), f.4$mean, col = "blue")
lines(seq(1,4), f.d0.4$mean, col = "red")
lines(seq(1,4), f.d1.4$mean, col = "orange")
lines(seq(1,4), f.d2.4$mean, col = "green")
# 5 Year Horizon
mspFit.20 = mlp(msp.178, comb = 'median')
plot(mspFit.20)
mspFit.d0.20 = mlp(msp.178, comb = 'median', difforder = 0)
mspFit.d1.20 = mlp(msp.178, comb = 'median', difforder = 1)
mspFit.d2.20 = mlp(msp.178, comb = 'median', difforder = 2)
f.20 = forecast(mspFit.20, h = 20)
plot(f.20)
f.d0.20 = forecast(mspFit.d0.20, h = 20)
f.d1.20 = forecast(mspFit.d1.20, h = 20)
f.d2.20 = forecast(mspFit.d2.20, h = 20)
# Defaults
ASE.mlp.exp.h20 = mean((exp(log.mhp[179:198]) - exp(f.20$mean))^2) # 1.72B
rwfit.mlp20 = roll.win.rmse.nn.wge(log.mhp, horizon = 20, fit_model = mspFit.20) # 0.072
## [1] 9
## [1] 170
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 170 windows."
## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01490 0.03531 0.06311 0.07289 0.09689 0.23675
## [1] "The Rolling Window RMSE is: 0.073"
# Difforder = 0
ASE.mlp.exp.d0.h20 = mean((exp(log.mhp[179:198]) - exp(f.d0.20$mean))^2) # 7.24B
rwfit.mlp20.d0 = roll.win.rmse.nn.wge(log.mhp, horizon = 20, fit_model = mspFit.d0.20) # 0.076
## [1] 9
## [1] 170
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 170 windows."
## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01315 0.03614 0.06175 0.07512 0.09523 0.23384
## [1] "The Rolling Window RMSE is: 0.075"
# Difforder = 1
ASE.mlp.exp.d1.h20 = mean((exp(log.mhp[179:198]) - exp(f.d1.20$mean))^2) # 2.02B
rwfit.mlp20.d1 = roll.win.rmse.nn.wge(log.mhp, horizon = 20, fit_model = mspFit.d1.20) # 0.073
## [1] 9
## [1] 170
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 170 windows."
## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01498 0.03505 0.06517 0.07272 0.09145 0.22887
## [1] "The Rolling Window RMSE is: 0.073"
# Difforder = 2
ASE.mlp.exp.d2.h20 = mean((exp(log.mhp[179:198]) - exp(f.d2.20$mean))^2) # 2.61B
rwfit.mlp20.d2 = roll.win.rmse.nn.wge(log.mhp, horizon = 20, fit_model = mspFit.d2.20) # 0.077
## [1] 9
## [1] 170
## [1] "Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with 170 windows."
## [1] "The Summary Statistics for the Rolling Window RMSE Are:"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01288 0.03846 0.06654 0.07606 0.10646 0.22748
## [1] "The Rolling Window RMSE is: 0.076"
# Create table for comparison
labels = c("Defaults", "DiffOrder = 0", "DiffOrder = 1", "DiffOrder = 2")
ASE_4 = c(ASE.mlp.exp.h4, ASE.mlp.exp.d0.h4, ASE.mlp.exp.d1.h4, ASE.mlp.exp.d2.h4)
ASE_20 = c(ASE.mlp.exp.h20, ASE.mlp.exp.d0.h20, ASE.mlp.exp.d1.h20, ASE.mlp.exp.d2.h20)
RMSE_4 = c(rwfit.mlp4$rwRMSE,rwfit.mlp4.d0$rwRMSE,rwfit.mlp4.d1$rwRMSE,rwfit.mlp4.d2$rwRMSE)
RMSE_20 = c(rwfit.mlp20$rwRMSE,rwfit.mlp20.d0$rwRMSE,rwfit.mlp20.d1$rwRMSE,rwfit.mlp20.d2$rwRMSE)
comparison = data.frame(Metrics = labels, ASE_1_yr = ASE_4, ASE_5_Yr = ASE_20, RMSE_1_Yr = RMSE_4, RMSE_5_Yr = RMSE_20)
comparison
## Metrics ASE_1_yr ASE_5_Yr RMSE_1_Yr RMSE_5_Yr
## 1 Defaults 56113947 2360549683 0.02927483 0.07288691
## 2 DiffOrder = 0 71331704 7377710156 0.03271345 0.07511880
## 3 DiffOrder = 1 91170472 2050048407 0.02892788 0.07271617
## 4 DiffOrder = 2 225962073 2323174956 0.03055707 0.07605920
# Comparing 5 Year Forecasts
plot(log.mhp[179:198], type = 'l', ylim = c(12.5, 13))
lines(seq(1,20), f.20$mean, col = "blue")
lines(seq(1,20), f.d0.20$mean, col = "red")
lines(seq(1,20), f.d1.20$mean, col = "orange")
lines(seq(1,20), f.d2.20$mean, col = "green")
# Forecasts
plot(log.mhp, type = 'l', lwd = 1, main = 'Univariate Forecasts, 1 and 5 Years')
points(seq(195,198), f.4$mean, type = 'l', col = 'red')
points(seq(179,198), f.20$mean, type = 'l', col = 'red')
# Zoomed In, 1 yr
plot(seq(150,198,1),log.mhp[150:198], type = 'l', lwd = 1, main = 'Univariate MLP Forecast, 1 Year')
points(seq(195,198,1), f.4$mean, type = 'l', col = 'red')
# Zoomed In, 5 yr
plot(seq(150,198,1), log.mhp[150:198], type = 'l', lwd = 1, main = 'Univariate MLP Forecast, 5 Years')
points(seq(179,198,1), f.20$mean, type = 'l', col = 'red')
Confidence Intervals for Univariate Model
We were able to create confidence intervals for the univariate model by taking the 5th and 95th percentiles of bootstrapped residuals. These confidence intervals did contain the actual values for both backcasts.
################################################################################
# Trying what Dr Sadler did, but for an mlp model
# xt = gen.arma.wge(100,phi = .9)
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSPUS.csv"
mhp <- read.csv(file_path, header = TRUE)
mhp_1975 = mhp[49:246,]
xt = log(mhp_1975$MSPUS)
# Horizon = 4
h.short = 4
l = length(xt)
xtTrain = xt[1:(l-h.short)]
# est = est.arma.wge(xtTrain,p = 1)
library(nnfor)
set.seed(30)
fit.mlp = mlp(ts(xt), comb = 'median')
lf = length(fit.mlp$fitted)
res = xt[(l-lf+1):l]-fit.mlp$fitted
# Are residuals white noise?
plotts.sample.wge(res)
## $xbar
## [1] 1.744812e-05
##
## $autplt
## [1] 1.000000000 -0.036871716 -0.030558554 0.001573962 0.048845593
## [6] 0.012174237 -0.075906809 -0.011477166 0.043462683 -0.014153964
## [11] -0.149881096 0.013599918 0.053422387 -0.142806880 -0.086030066
## [16] 0.047973796 0.068682257 -0.072539163 -0.081279932 -0.103337635
## [21] 0.184191684 -0.097595647 -0.010598749 -0.014667230 -0.022377763
## [26] -0.002837652
##
## $freq
## [1] 0.005181347 0.010362694 0.015544041 0.020725389 0.025906736 0.031088083
## [7] 0.036269430 0.041450777 0.046632124 0.051813472 0.056994819 0.062176166
## [13] 0.067357513 0.072538860 0.077720207 0.082901554 0.088082902 0.093264249
## [19] 0.098445596 0.103626943 0.108808290 0.113989637 0.119170984 0.124352332
## [25] 0.129533679 0.134715026 0.139896373 0.145077720 0.150259067 0.155440415
## [31] 0.160621762 0.165803109 0.170984456 0.176165803 0.181347150 0.186528497
## [37] 0.191709845 0.196891192 0.202072539 0.207253886 0.212435233 0.217616580
## [43] 0.222797927 0.227979275 0.233160622 0.238341969 0.243523316 0.248704663
## [49] 0.253886010 0.259067358 0.264248705 0.269430052 0.274611399 0.279792746
## [55] 0.284974093 0.290155440 0.295336788 0.300518135 0.305699482 0.310880829
## [61] 0.316062176 0.321243523 0.326424870 0.331606218 0.336787565 0.341968912
## [67] 0.347150259 0.352331606 0.357512953 0.362694301 0.367875648 0.373056995
## [73] 0.378238342 0.383419689 0.388601036 0.393782383 0.398963731 0.404145078
## [79] 0.409326425 0.414507772 0.419689119 0.424870466 0.430051813 0.435233161
## [85] 0.440414508 0.445595855 0.450777202 0.455958549 0.461139896 0.466321244
## [91] 0.471502591 0.476683938 0.481865285 0.487046632 0.492227979 0.497409326
##
## $dbz
## [1] -1.607957116 -1.333199475 -0.956723495 -0.557414826 -0.197276541
## [6] 0.085349888 0.272652115 0.361801013 0.360223739 0.282220036
## [11] 0.146507367 -0.025815122 -0.213195430 -0.395491732 -0.555594521
## [16] -0.680578970 -0.762299845 -0.797561975 -0.788096904 -0.740445041
## [21] -0.665592502 -0.578073322 -0.494348614 -0.430523065 -0.399659407
## [26] -0.409013366 -0.457541674 -0.534241914 -0.618359356 -0.682912713
## [31] -0.702371497 -0.662742157 -0.569047029 -0.445123891 -0.325406257
## [36] -0.243202185 -0.220148551 -0.258493774 -0.336107381 -0.405719151
## [41] -0.403461300 -0.271459484 0.012476221 0.419856106 0.884930351
## [46] 1.333003310 1.701732802 1.948434446 2.048685019 1.992600708
## [51] 1.782086937 1.430042768 0.961445003 0.415563152 -0.152520890
## [56] -0.674526939 -1.079724859 -1.313789861 -1.356838696 -1.227881250
## [61] -0.973113995 -0.648429223 -0.306628226 0.008382131 0.263182077
## [66] 0.435070212 0.512334692 0.494937033 0.395382225 0.238913052
## [71] 0.061382161 -0.097075599 -0.202658934 -0.238764730 -0.210263380
## [76] -0.137750779 -0.045712783 0.048036443 0.136964928 0.223984419
## [81] 0.315192773 0.412483461 0.508696665 0.587734345 0.629338738
## [86] 0.616060741 0.539807287 0.406419980 0.237274360 0.066394493
## [91] -0.068523599 -0.140043634 -0.143310298 -0.098167394 -0.039055134
## [96] 0.000482310
acf(res,lag.max=100)
ljung.wge(res,K=24) # p = 0.3804433, white noise
## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 29.63275
##
## $df
## [1] 24
##
## $pval
## [1] 0.1972232
ljung.wge(res,K=48) # p = 0.6786805, white
## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776 -0.002837652 -0.05137369 -0.01063967 0.0134555 0.05280341 -0.137294 -0.04656288 0.1030121 -0.05345923 0.1223522 0.06316164 0.07034745 0.002489447 -0.02428342 0.007679022 0.06837862 -0.02410031 -0.02900041 0.03084667 0.0004189358 0.01420003 0.0386115 0.03606818 -0.008020311
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 47.31109
##
## $df
## [1] 48
##
## $pval
## [1] 0.5009818
hist(res) # look normally distributed
# BSSim = function(ts,res,phi,horizon) {
num_back = l - lf
BSSim = function(ts,res,fit,num_back,horizon) {
origin = length(ts)
ForHolder = numeric(horizon)
for(i in (origin+1):(origin+horizon)){
# ts[i] = ts[i-1]*phi + sample(res,1)
# print(i)
needed_to_predict = ts[(i-num_back):(i-1)]
fore=forecast(fit,y=needed_to_predict,h=1)
ts[i]=fore$mean+sample(res,1)
ForHolder[(i-origin)]=ts[i]
}
return(ForHolder)
}
# BSSamples = 100000
BSSamples = 1000 # I'm worried 100k will take a really long time, so make it less
# holder = matrix(nrow = BSSamples,ncol = 50)
holder = matrix(nrow = BSSamples,ncol = h.short)
for(i in 1:BSSamples)
{
# xtNewFor = BSSim(xtTrain,est$res,est$phi,50)
if ((i %% 100) == 0){
print(i)
}
xtNewFor = BSSim(xtTrain,res,fit.mlp,num_back,h.short)
holder[i,] = xtNewFor
}
## [1] 100
## [1] 200
## [1] 300
## [1] 400
## [1] 500
## [1] 600
## [1] 700
## [1] 800
## [1] 900
## [1] 1000
# Calculate percentiles for each column
percentiles_per_column <- apply(holder, 2, function(column) {
quantile(column, probs = c(0.025, 0.975))
})
# Transpose the result for better readability
percentiles_per_column <- t(percentiles_per_column)
# Print the result
print(percentiles_per_column)
## 2.5% 97.5%
## [1,] 12.91162 13.01384
## [2,] 12.89698 13.01408
## [3,] 12.87999 13.03489
## [4,] 12.86808 13.06548
xtNewForBS = colMeans(holder)
result = fit.mlp$fitted
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
#plotts.wge(c(fit.mlp$fitted,xtNewForBS),ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1), )
#lines(seq(lf+1,lf+h.short,1),percentiles_per_column[,1],col = "blue")
#lines(seq(lf+1,lf+h.short,1),percentiles_per_column[,2],col = "blue")
#lines(seq(lf+1,lf+h.short,1),xt[(l-h.short+1):l],col = "red")
# Alex edited to plot black line as the actual values and predicted as red
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
plot(xt[6:198], type = 'l',
main = 'Univariate MLP Short Term Forecast with Confidence Intervals',
xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(min(x),ymax_future))
lines(seq(lf+1,lf+h.short,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(lf+1,lf+h.short,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(lf+1,lf+h.short,1),xtNewForBS, col = "red")
# Zoomed In
plot(seq(150,198,1), xt[150:198], type = 'l',
main = 'Univariate MLP Short Term Forecast with Confidence Intervals',
xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(ymin_plot,ymax_future))
lines(seq(195,198,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(195,198,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(195,198,1),xtNewForBS,col = "red")
# Horizon = 20
h.long = 20
l = length(xt)
xtTrain = xt[1:(l-h.long)]
# est = est.arma.wge(xtTrain,p = 1)
library(nnfor)
set.seed(30)
fit.mlp = mlp(ts(xt), comb = 'median')
lf = length(fit.mlp$fitted)
res = xt[(l-lf+1):l]-fit.mlp$fitted
# Are residuals white noise?
plotts.sample.wge(res)
## $xbar
## [1] 1.744812e-05
##
## $autplt
## [1] 1.000000000 -0.036871716 -0.030558554 0.001573962 0.048845593
## [6] 0.012174237 -0.075906809 -0.011477166 0.043462683 -0.014153964
## [11] -0.149881096 0.013599918 0.053422387 -0.142806880 -0.086030066
## [16] 0.047973796 0.068682257 -0.072539163 -0.081279932 -0.103337635
## [21] 0.184191684 -0.097595647 -0.010598749 -0.014667230 -0.022377763
## [26] -0.002837652
##
## $freq
## [1] 0.005181347 0.010362694 0.015544041 0.020725389 0.025906736 0.031088083
## [7] 0.036269430 0.041450777 0.046632124 0.051813472 0.056994819 0.062176166
## [13] 0.067357513 0.072538860 0.077720207 0.082901554 0.088082902 0.093264249
## [19] 0.098445596 0.103626943 0.108808290 0.113989637 0.119170984 0.124352332
## [25] 0.129533679 0.134715026 0.139896373 0.145077720 0.150259067 0.155440415
## [31] 0.160621762 0.165803109 0.170984456 0.176165803 0.181347150 0.186528497
## [37] 0.191709845 0.196891192 0.202072539 0.207253886 0.212435233 0.217616580
## [43] 0.222797927 0.227979275 0.233160622 0.238341969 0.243523316 0.248704663
## [49] 0.253886010 0.259067358 0.264248705 0.269430052 0.274611399 0.279792746
## [55] 0.284974093 0.290155440 0.295336788 0.300518135 0.305699482 0.310880829
## [61] 0.316062176 0.321243523 0.326424870 0.331606218 0.336787565 0.341968912
## [67] 0.347150259 0.352331606 0.357512953 0.362694301 0.367875648 0.373056995
## [73] 0.378238342 0.383419689 0.388601036 0.393782383 0.398963731 0.404145078
## [79] 0.409326425 0.414507772 0.419689119 0.424870466 0.430051813 0.435233161
## [85] 0.440414508 0.445595855 0.450777202 0.455958549 0.461139896 0.466321244
## [91] 0.471502591 0.476683938 0.481865285 0.487046632 0.492227979 0.497409326
##
## $dbz
## [1] -1.607957116 -1.333199475 -0.956723495 -0.557414826 -0.197276541
## [6] 0.085349888 0.272652115 0.361801013 0.360223739 0.282220036
## [11] 0.146507367 -0.025815122 -0.213195430 -0.395491732 -0.555594521
## [16] -0.680578970 -0.762299845 -0.797561975 -0.788096904 -0.740445041
## [21] -0.665592502 -0.578073322 -0.494348614 -0.430523065 -0.399659407
## [26] -0.409013366 -0.457541674 -0.534241914 -0.618359356 -0.682912713
## [31] -0.702371497 -0.662742157 -0.569047029 -0.445123891 -0.325406257
## [36] -0.243202185 -0.220148551 -0.258493774 -0.336107381 -0.405719151
## [41] -0.403461300 -0.271459484 0.012476221 0.419856106 0.884930351
## [46] 1.333003310 1.701732802 1.948434446 2.048685019 1.992600708
## [51] 1.782086937 1.430042768 0.961445003 0.415563152 -0.152520890
## [56] -0.674526939 -1.079724859 -1.313789861 -1.356838696 -1.227881250
## [61] -0.973113995 -0.648429223 -0.306628226 0.008382131 0.263182077
## [66] 0.435070212 0.512334692 0.494937033 0.395382225 0.238913052
## [71] 0.061382161 -0.097075599 -0.202658934 -0.238764730 -0.210263380
## [76] -0.137750779 -0.045712783 0.048036443 0.136964928 0.223984419
## [81] 0.315192773 0.412483461 0.508696665 0.587734345 0.629338738
## [86] 0.616060741 0.539807287 0.406419980 0.237274360 0.066394493
## [91] -0.068523599 -0.140043634 -0.143310298 -0.098167394 -0.039055134
## [96] 0.000482310
acf(res,lag.max=100)
ljung.wge(res,K=24) # p = 0.3804433, white noise
## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 29.63275
##
## $df
## [1] 24
##
## $pval
## [1] 0.1972232
ljung.wge(res,K=48) # p = 0.6786805, white
## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776 -0.002837652 -0.05137369 -0.01063967 0.0134555 0.05280341 -0.137294 -0.04656288 0.1030121 -0.05345923 0.1223522 0.06316164 0.07034745 0.002489447 -0.02428342 0.007679022 0.06837862 -0.02410031 -0.02900041 0.03084667 0.0004189358 0.01420003 0.0386115 0.03606818 -0.008020311
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 47.31109
##
## $df
## [1] 48
##
## $pval
## [1] 0.5009818
hist(res) # look normally distributed
# BSSim = function(ts,res,phi,horizon) {
num_back = l - lf
BSSim = function(ts,res,fit,num_back,horizon) {
origin = length(ts)
ForHolder = numeric(horizon)
for(i in (origin+1):(origin+horizon)){
# ts[i] = ts[i-1]*phi + sample(res,1)
# print(i)
needed_to_predict = ts[(i-num_back):(i-1)]
fore=forecast(fit,y=needed_to_predict,h=1)
ts[i]=fore$mean+sample(res,1)
ForHolder[(i-origin)]=ts[i]
}
return(ForHolder)
}
# BSSamples = 100000
BSSamples = 1000 # I'm worried 100k will take a really long time, so make it less
# holder = matrix(nrow = BSSamples,ncol = 50)
holder = matrix(nrow = BSSamples,ncol = h.long)
for(i in 1:BSSamples)
{
# xtNewFor = BSSim(xtTrain,est$res,est$phi,50)
if ((i %% 100) == 0){
print(i)
}
xtNewFor = BSSim(xtTrain,res,fit.mlp,num_back,h.long)
holder[i,] = xtNewFor
}
## [1] 100
## [1] 200
## [1] 300
## [1] 400
## [1] 500
## [1] 600
## [1] 700
## [1] 800
## [1] 900
## [1] 1000
# Calculate percentiles for each column
percentiles_per_column <- apply(holder, 2, function(column) {
quantile(column, probs = c(0.025, 0.975))
})
# Transpose the result for better readability
percentiles_per_column <- t(percentiles_per_column)
# Print the result
print(percentiles_per_column)
## 2.5% 97.5%
## [1,] 12.61870 12.72615
## [2,] 12.61532 12.73625
## [3,] 12.60530 12.75608
## [4,] 12.59535 12.78286
## [5,] 12.59227 12.81404
## [6,] 12.58460 12.83386
## [7,] 12.58620 12.86215
## [8,] 12.58660 12.88227
## [9,] 12.58345 12.90117
## [10,] 12.58453 12.92735
## [11,] 12.58455 12.94401
## [12,] 12.56501 12.97066
## [13,] 12.57141 12.98767
## [14,] 12.57361 13.00649
## [15,] 12.56880 13.02081
## [16,] 12.56941 13.04938
## [17,] 12.57778 13.06957
## [18,] 12.57743 13.08411
## [19,] 12.57157 13.10765
## [20,] 12.58245 13.14049
xtNewForBS = colMeans(holder)
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
#plotts.wge(c(fit.mlp$fitted,xtNewForBS),ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1), col = 'red')
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,1],col = "blue")
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,2],col = "blue")
#lines(seq(lf+1,lf+h.long,1),xt[(l-h.long+1):l],col = "black")
# Alex edited to plot black line as the actual values and predicted as red
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
plot(xt[6:198], type = 'l',
main = 'Univariate MLP Long Term Forecast with Confidence Intervals',
xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(min(xt),ymax_future))
lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(lf+1,lf+h.long,1),xtNewForBS,col = "red")
# Zoomed In
plot(seq(150,198,1),xt[150:198], type = 'l',
main = 'Univariate MLP Long Term Forecast with Confidence Intervals',
xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(ymin_plot,ymax_future))
lines(seq(179,198,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(179,198,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(179,198,1),xtNewForBS, col = "red")
Future Forecast
log.mhp = ts(fed_housing_data$Median_Sales_Price)
# Univariate MLP Forecasting into the Future
future.mspFit.4 = mlp(log.mhp, comb = 'median')
future.f.4 = forecast(future.mspFit.4, h = 4)
future.mspFit.20 = mlp(log.mhp, comb = 'median')
future.f.20 = forecast(future.mspFit.20, h = 20)
future.f.4$mean[4] # 12.96544
## t+4
## 12.94831
future.f.20$mean[20] # 13.10206
## t+20
## 13.16013
exp(future.f.4$mean[4]) # $427,384.77
## t+4
## 420125.9
exp(future.f.20$mean[20]) # $489,950.67
## t+20
## 519243.2
h.short = 4
h.long = 20
x = fed_housing_data$Median_Sales_Price
l = length(x)
# Forecasting next year (Zoomed In)
plot(seq(xmin_plot,l+h.short,1),x[xmin_plot:(l+h.short)],type="l",col="black",
xlab="Time",ylab="log Median Housing Sales Price",
main="Univariate MLP Short Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.short),1),future.f.4$mean,col="red")
# Forecasting next 5 years (Zoomed In)
plot(seq(xmin_plot,l+h.long,1),x[xmin_plot:(l+h.long)],type="l",col="black",
xlab="Time",ylab="log Median Housing Sales Price",
main="Univariate MLP Long Term Forecast",ylim=c(ymin_plot,ymax_future))
lines(seq((l+1),(l+h.long),1),future.f.20$mean,col="red")
CI for Future Forecast
Wording:
In 1 year, we are 95% confident that the median home sale price will be between $386,583 (e^12.8651) and $463,006 (e^13.04549). Our best estimate is $422,090 (e^12.95297).
In 5 years, we are 95% confident that the median home sale price will be between $375,203 (e^12.83522) and $654,866 (e^13.39219). Our best estimate is $496,983 (e^13.11631).
################################################################################
# Trying what Dr Sadler did, but for an mlp model
# xt = gen.arma.wge(100,phi = .9)
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSPUS.csv"
mhp <- read.csv(file_path, header = TRUE)
mhp_1975 = mhp[49:246,]
xt = log(mhp_1975$MSPUS)
# Horizon = 4
h.short = 4
l = length(xt)
xtTrain = xt
# est = est.arma.wge(xtTrain,p = 1)
library(nnfor)
set.seed(30)
fit.mlp = mlp(ts(xt), comb = 'median')
lf = length(fit.mlp$fitted)
res = xt[(l-lf+1):l]-fit.mlp$fitted
# Are residuals white noise?
plotts.sample.wge(res)
## $xbar
## [1] 1.744812e-05
##
## $autplt
## [1] 1.000000000 -0.036871716 -0.030558554 0.001573962 0.048845593
## [6] 0.012174237 -0.075906809 -0.011477166 0.043462683 -0.014153964
## [11] -0.149881096 0.013599918 0.053422387 -0.142806880 -0.086030066
## [16] 0.047973796 0.068682257 -0.072539163 -0.081279932 -0.103337635
## [21] 0.184191684 -0.097595647 -0.010598749 -0.014667230 -0.022377763
## [26] -0.002837652
##
## $freq
## [1] 0.005181347 0.010362694 0.015544041 0.020725389 0.025906736 0.031088083
## [7] 0.036269430 0.041450777 0.046632124 0.051813472 0.056994819 0.062176166
## [13] 0.067357513 0.072538860 0.077720207 0.082901554 0.088082902 0.093264249
## [19] 0.098445596 0.103626943 0.108808290 0.113989637 0.119170984 0.124352332
## [25] 0.129533679 0.134715026 0.139896373 0.145077720 0.150259067 0.155440415
## [31] 0.160621762 0.165803109 0.170984456 0.176165803 0.181347150 0.186528497
## [37] 0.191709845 0.196891192 0.202072539 0.207253886 0.212435233 0.217616580
## [43] 0.222797927 0.227979275 0.233160622 0.238341969 0.243523316 0.248704663
## [49] 0.253886010 0.259067358 0.264248705 0.269430052 0.274611399 0.279792746
## [55] 0.284974093 0.290155440 0.295336788 0.300518135 0.305699482 0.310880829
## [61] 0.316062176 0.321243523 0.326424870 0.331606218 0.336787565 0.341968912
## [67] 0.347150259 0.352331606 0.357512953 0.362694301 0.367875648 0.373056995
## [73] 0.378238342 0.383419689 0.388601036 0.393782383 0.398963731 0.404145078
## [79] 0.409326425 0.414507772 0.419689119 0.424870466 0.430051813 0.435233161
## [85] 0.440414508 0.445595855 0.450777202 0.455958549 0.461139896 0.466321244
## [91] 0.471502591 0.476683938 0.481865285 0.487046632 0.492227979 0.497409326
##
## $dbz
## [1] -1.607957116 -1.333199475 -0.956723495 -0.557414826 -0.197276541
## [6] 0.085349888 0.272652115 0.361801013 0.360223739 0.282220036
## [11] 0.146507367 -0.025815122 -0.213195430 -0.395491732 -0.555594521
## [16] -0.680578970 -0.762299845 -0.797561975 -0.788096904 -0.740445041
## [21] -0.665592502 -0.578073322 -0.494348614 -0.430523065 -0.399659407
## [26] -0.409013366 -0.457541674 -0.534241914 -0.618359356 -0.682912713
## [31] -0.702371497 -0.662742157 -0.569047029 -0.445123891 -0.325406257
## [36] -0.243202185 -0.220148551 -0.258493774 -0.336107381 -0.405719151
## [41] -0.403461300 -0.271459484 0.012476221 0.419856106 0.884930351
## [46] 1.333003310 1.701732802 1.948434446 2.048685019 1.992600708
## [51] 1.782086937 1.430042768 0.961445003 0.415563152 -0.152520890
## [56] -0.674526939 -1.079724859 -1.313789861 -1.356838696 -1.227881250
## [61] -0.973113995 -0.648429223 -0.306628226 0.008382131 0.263182077
## [66] 0.435070212 0.512334692 0.494937033 0.395382225 0.238913052
## [71] 0.061382161 -0.097075599 -0.202658934 -0.238764730 -0.210263380
## [76] -0.137750779 -0.045712783 0.048036443 0.136964928 0.223984419
## [81] 0.315192773 0.412483461 0.508696665 0.587734345 0.629338738
## [86] 0.616060741 0.539807287 0.406419980 0.237274360 0.066394493
## [91] -0.068523599 -0.140043634 -0.143310298 -0.098167394 -0.039055134
## [96] 0.000482310
acf(res,lag.max=100)
ljung.wge(res,K=24) # p = 0.3804433, white noise
## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 29.63275
##
## $df
## [1] 24
##
## $pval
## [1] 0.1972232
ljung.wge(res,K=48) # p = 0.6786805, white
## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776 -0.002837652 -0.05137369 -0.01063967 0.0134555 0.05280341 -0.137294 -0.04656288 0.1030121 -0.05345923 0.1223522 0.06316164 0.07034745 0.002489447 -0.02428342 0.007679022 0.06837862 -0.02410031 -0.02900041 0.03084667 0.0004189358 0.01420003 0.0386115 0.03606818 -0.008020311
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 47.31109
##
## $df
## [1] 48
##
## $pval
## [1] 0.5009818
hist(res) # look normally distributed
# BSSim = function(ts,res,phi,horizon) {
num_back = l - lf
BSSim = function(ts,res,fit,num_back,horizon) {
origin = length(ts)
ForHolder = numeric(horizon)
for(i in (origin+1):(origin+horizon)){
# ts[i] = ts[i-1]*phi + sample(res,1)
# print(i)
needed_to_predict = ts[(i-num_back):(i-1)]
fore=forecast(fit,y=needed_to_predict,h=1)
ts[i]=fore$mean+sample(res,1)
ForHolder[(i-origin)]=ts[i]
}
return(ForHolder)
}
# BSSamples = 100000
BSSamples = 1000 # I'm worried 100k will take a really long time, so make it less
# holder = matrix(nrow = BSSamples,ncol = 50)
holder = matrix(nrow = BSSamples,ncol = h.short)
for(i in 1:BSSamples)
{
# xtNewFor = BSSim(xtTrain,est$res,est$phi,50)
if ((i %% 100) == 0){
print(i)
}
xtNewFor = BSSim(xtTrain,res,fit.mlp,num_back,h.short)
holder[i,] = xtNewFor
}
## [1] 100
## [1] 200
## [1] 300
## [1] 400
## [1] 500
## [1] 600
## [1] 700
## [1] 800
## [1] 900
## [1] 1000
# Calculate percentiles for each column
percentiles_per_column <- apply(holder, 2, function(column) {
quantile(column, probs = c(0.025, 0.975))
})
# Transpose the result for better readability
percentiles_per_column <- t(percentiles_per_column)
# Print the result
print(percentiles_per_column)
## 2.5% 97.5%
## [1,] 12.89311 12.99534
## [2,] 12.88840 12.99354
## [3,] 12.88019 13.01974
## [4,] 12.86510 13.04549
xtNewForBS = colMeans(holder)
# Forecast 1 year, log scale
xtNewForBS[4] # 12.95297
## [1] 12.95297
percentiles_per_column[4,1] # 12.8651
## 2.5%
## 12.8651
percentiles_per_column[4,2] # 13.04549
## 97.5%
## 13.04549
# Forecast 1 year, exp
exp(xtNewForBS[4]) # $422,089.9
## [1] 422089.9
exp(percentiles_per_column[4,1]) # $386,583.3
## 2.5%
## 386583.3
exp(percentiles_per_column[4,2]) # $463,005.6
## 97.5%
## 463005.6
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
#plotts.wge(c(fit.mlp$fitted,xtNewForBS),ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1), )
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,1],col = "blue")
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,2],col = "blue")
#lines(seq(lf+1,lf+h.long,1),xt[(l-h.long+1):l],col = "red")
# Alex hard coded seq and changed prediction line to red
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
plot(seq(1,l+h.short,1),xt[1:(l+h.short)], type = 'l',
main = 'Univariate MLP Short Term Future Forecast with Confidence Intervals',
xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(min(xt),ymax_future))
lines(seq(199,202,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(199,202,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(199,202,1),xtNewForBS, col = "red")
# Zoomed In
plot(seq(150,202,1), xt[xmin_plot:(l+h.short)], type = 'l',
main = 'Univariate MLP Short Term Future Forecast with Confidence Intervals',
xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(ymin_plot,ymax_future))
lines(seq(199,202,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(199,202,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(199,202,1),xtNewForBS,ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1),col = "red")
# Horizon = 20
h.long = 20
l = length(xt)
xtTrain = xt
# est = est.arma.wge(xtTrain,p = 1)
library(nnfor)
set.seed(30)
fit.mlp = mlp(ts(xt), comb = 'median')
lf = length(fit.mlp$fitted)
res = xt[(l-lf+1):l]-fit.mlp$fitted
# Are residuals white noise?
plotts.sample.wge(res)
## $xbar
## [1] 1.744812e-05
##
## $autplt
## [1] 1.000000000 -0.036871716 -0.030558554 0.001573962 0.048845593
## [6] 0.012174237 -0.075906809 -0.011477166 0.043462683 -0.014153964
## [11] -0.149881096 0.013599918 0.053422387 -0.142806880 -0.086030066
## [16] 0.047973796 0.068682257 -0.072539163 -0.081279932 -0.103337635
## [21] 0.184191684 -0.097595647 -0.010598749 -0.014667230 -0.022377763
## [26] -0.002837652
##
## $freq
## [1] 0.005181347 0.010362694 0.015544041 0.020725389 0.025906736 0.031088083
## [7] 0.036269430 0.041450777 0.046632124 0.051813472 0.056994819 0.062176166
## [13] 0.067357513 0.072538860 0.077720207 0.082901554 0.088082902 0.093264249
## [19] 0.098445596 0.103626943 0.108808290 0.113989637 0.119170984 0.124352332
## [25] 0.129533679 0.134715026 0.139896373 0.145077720 0.150259067 0.155440415
## [31] 0.160621762 0.165803109 0.170984456 0.176165803 0.181347150 0.186528497
## [37] 0.191709845 0.196891192 0.202072539 0.207253886 0.212435233 0.217616580
## [43] 0.222797927 0.227979275 0.233160622 0.238341969 0.243523316 0.248704663
## [49] 0.253886010 0.259067358 0.264248705 0.269430052 0.274611399 0.279792746
## [55] 0.284974093 0.290155440 0.295336788 0.300518135 0.305699482 0.310880829
## [61] 0.316062176 0.321243523 0.326424870 0.331606218 0.336787565 0.341968912
## [67] 0.347150259 0.352331606 0.357512953 0.362694301 0.367875648 0.373056995
## [73] 0.378238342 0.383419689 0.388601036 0.393782383 0.398963731 0.404145078
## [79] 0.409326425 0.414507772 0.419689119 0.424870466 0.430051813 0.435233161
## [85] 0.440414508 0.445595855 0.450777202 0.455958549 0.461139896 0.466321244
## [91] 0.471502591 0.476683938 0.481865285 0.487046632 0.492227979 0.497409326
##
## $dbz
## [1] -1.607957116 -1.333199475 -0.956723495 -0.557414826 -0.197276541
## [6] 0.085349888 0.272652115 0.361801013 0.360223739 0.282220036
## [11] 0.146507367 -0.025815122 -0.213195430 -0.395491732 -0.555594521
## [16] -0.680578970 -0.762299845 -0.797561975 -0.788096904 -0.740445041
## [21] -0.665592502 -0.578073322 -0.494348614 -0.430523065 -0.399659407
## [26] -0.409013366 -0.457541674 -0.534241914 -0.618359356 -0.682912713
## [31] -0.702371497 -0.662742157 -0.569047029 -0.445123891 -0.325406257
## [36] -0.243202185 -0.220148551 -0.258493774 -0.336107381 -0.405719151
## [41] -0.403461300 -0.271459484 0.012476221 0.419856106 0.884930351
## [46] 1.333003310 1.701732802 1.948434446 2.048685019 1.992600708
## [51] 1.782086937 1.430042768 0.961445003 0.415563152 -0.152520890
## [56] -0.674526939 -1.079724859 -1.313789861 -1.356838696 -1.227881250
## [61] -0.973113995 -0.648429223 -0.306628226 0.008382131 0.263182077
## [66] 0.435070212 0.512334692 0.494937033 0.395382225 0.238913052
## [71] 0.061382161 -0.097075599 -0.202658934 -0.238764730 -0.210263380
## [76] -0.137750779 -0.045712783 0.048036443 0.136964928 0.223984419
## [81] 0.315192773 0.412483461 0.508696665 0.587734345 0.629338738
## [86] 0.616060741 0.539807287 0.406419980 0.237274360 0.066394493
## [91] -0.068523599 -0.140043634 -0.143310298 -0.098167394 -0.039055134
## [96] 0.000482310
acf(res,lag.max=100)
ljung.wge(res,K=24) # p = 0.3804433, white noise
## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 24
##
## $chi.square
## [1] 29.63275
##
## $df
## [1] 24
##
## $pval
## [1] 0.1972232
ljung.wge(res,K=48) # p = 0.6786805, white
## Obs -0.03687172 -0.03055855 0.001573962 0.04884559 0.01217424 -0.07590681 -0.01147717 0.04346268 -0.01415396 -0.1498811 0.01359992 0.05342239 -0.1428069 -0.08603007 0.0479738 0.06868226 -0.07253916 -0.08127993 -0.1033376 0.1841917 -0.09759565 -0.01059875 -0.01466723 -0.02237776 -0.002837652 -0.05137369 -0.01063967 0.0134555 0.05280341 -0.137294 -0.04656288 0.1030121 -0.05345923 0.1223522 0.06316164 0.07034745 0.002489447 -0.02428342 0.007679022 0.06837862 -0.02410031 -0.02900041 0.03084667 0.0004189358 0.01420003 0.0386115 0.03606818 -0.008020311
## $test
## [1] "Ljung-Box test"
##
## $K
## [1] 48
##
## $chi.square
## [1] 47.31109
##
## $df
## [1] 48
##
## $pval
## [1] 0.5009818
hist(res) # look normally distributed
# BSSim = function(ts,res,phi,horizon) {
num_back = l - lf
BSSim = function(ts,res,fit,num_back,horizon) {
origin = length(ts)
ForHolder = numeric(horizon)
for(i in (origin+1):(origin+horizon)){
# ts[i] = ts[i-1]*phi + sample(res,1)
# print(i)
needed_to_predict = ts[(i-num_back):(i-1)]
fore=forecast(fit,y=needed_to_predict,h=1)
ts[i]=fore$mean+sample(res,1)
ForHolder[(i-origin)]=ts[i]
}
return(ForHolder)
}
# BSSamples = 100000
BSSamples = 1000 # I'm worried 100k will take a really long time, so make it less
# holder = matrix(nrow = BSSamples,ncol = 50)
holder = matrix(nrow = BSSamples,ncol = h.long)
for(i in 1:BSSamples)
{
# xtNewFor = BSSim(xtTrain,est$res,est$phi,50)
if ((i %% 100) == 0){
print(i)
}
xtNewFor = BSSim(xtTrain,res,fit.mlp,num_back,h.long)
holder[i,] = xtNewFor
}
## [1] 100
## [1] 200
## [1] 300
## [1] 400
## [1] 500
## [1] 600
## [1] 700
## [1] 800
## [1] 900
## [1] 1000
# Calculate percentiles for each column
percentiles_per_column <- apply(holder, 2, function(column) {
quantile(column, probs = c(0.025, 0.975))
})
# Transpose the result for better readability
percentiles_per_column <- t(percentiles_per_column)
# Print the result
print(percentiles_per_column)
## 2.5% 97.5%
## [1,] 12.89311 13.00056
## [2,] 12.88194 13.00451
## [3,] 12.87148 13.02123
## [4,] 12.85401 13.04448
## [5,] 12.85060 13.06950
## [6,] 12.84008 13.08704
## [7,] 12.83404 13.11585
## [8,] 12.83586 13.13375
## [9,] 12.83169 13.15603
## [10,] 12.83207 13.17937
## [11,] 12.83095 13.20530
## [12,] 12.81948 13.21947
## [13,] 12.82531 13.24356
## [14,] 12.82483 13.26146
## [15,] 12.82571 13.27875
## [16,] 12.82621 13.30590
## [17,] 12.83141 13.33041
## [18,] 12.82385 13.34815
## [19,] 12.82350 13.36700
## [20,] 12.83522 13.39219
xtNewForBS = colMeans(holder)
# Forecast 5 years, log scale
xtNewForBS[20] # 13.11631
## [1] 13.11631
percentiles_per_column[20,1] # 12.83522
## 2.5%
## 12.83522
percentiles_per_column[20,2] # 13.39219
## 97.5%
## 13.39219
# Forecast 5 years, exp
exp(xtNewForBS[20]) # 496982.7
## [1] 496982.7
exp(percentiles_per_column[20,1]) # 375202.5
## 2.5%
## 375202.5
exp(percentiles_per_column[20,2]) # 654865.7
## 97.5%
## 654865.7
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
#plotts.wge(c(fit.mlp$fitted,xtNewForBS),ylim=c(min(fit.mlp$fitted)-.1,max(fit.mlp$fitted)+.1), )
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,1],col = "blue")
#lines(seq(lf+1,lf+h.long,1),percentiles_per_column[,2],col = "blue")
#lines(seq(lf+1,lf+h.long,1),xt[(l-h.long+1):l],col = "red")
# Alex hard coded seq and changed prediction line to red
# plotts.wge(c(xtTrain-est$res,xtNewForBS))
plot(seq(1,l+h.long,1),xt[1:(l+h.long)], type = 'l',
main = 'Multivariate MLP Long Term Future Forecast',
xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(min(xt),ymax_future))
lines(seq(199,218,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(199,218,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(199,218,1),xtNewForBS,col = "red")
# Zoomed In
plot(seq(150,218,1),xt[xmin_plot:(l+h.long)], type = 'l',
main = 'Multivariate MLP Long Term Future Forecast',
xlab = 'Time', ylab = 'log Median Housing Sales Price', ylim=c(ymin_plot,ymax_future))
lines(seq(199,218,1),percentiles_per_column[,1], lty = 3, col = "blue")
lines(seq(199,218,1),percentiles_per_column[,2], lty = 3, col = "blue")
lines(seq(199,218,1),xtNewForBS, col = "red")
LSTM
LSTMs are a type of Recurrent Neural Network (RNN) that have shown success with Time Series data.
The LSTM was created in Python. The code is below.
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import random
class TimeSeriesDataset(Dataset):
def __init__(self, data, sequence_length):
self.data = torch.FloatTensor(data)
self.sequence_length = sequence_length
def __len__(self):
return len(self.data) - self.sequence_length
def __getitem__(self, idx):
return (
self.data[idx:idx+self.sequence_length],
self.data[idx+self.sequence_length]
)
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True
)
self.fc = nn.Linear(hidden_size, 1)
def forward(self, x):
# Initialize hidden state with zeros
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
# Initialize cell state
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
# Forward propagate LSTM
out, _ = self.lstm(x, (h0, c0))
# Decode the hidden state of the last time step
out = self.fc(out[:, -1, :])
return out
def train_model(model, train_loader, criterion, optimizer, num_epochs):
model.train()
for epoch in range(num_epochs):
total_loss = 0
for batch_x, batch_y in train_loader:
batch_x = batch_x.unsqueeze(-1) # Ensure batch_x is 3-D: (batch_size, sequence_length, input_size)
optimizer.zero_grad()
outputs = model(batch_x)
loss = criterion(outputs, batch_y.unsqueeze(1))
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')
# After evaluation
def save_results_to_csv(predictions, actuals, ase, horizon_type="short"):
results_df = pd.DataFrame({
'Prediction': predictions,
'Actual': actuals,
'Error': [pred - act for pred, act in zip(predictions, actuals)]
})
# Add summary statistics
summary_df = pd.DataFrame({
'Metric': ['ASE'],
'Value': [ase]
})
# Save to CSV files
results_df.to_csv(f'predictions_{horizon_type}_term.csv', index=True)
summary_df.to_csv(f'metrics_{horizon_type}_term.csv', index=False)
# Set seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# Load and preprocess your data
df = pd.read_csv('https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/MSPUS.csv')
# import pdb
#pdb.set_trace()
data = df['MSPUS'].values[49:246] # Select rows 49 through 245 inclusive
# After loading data but before normalization
data = np.log(data) # Apply log transformation
# After loading data but before creating datasets
# Initialize the scaler
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data.reshape(-1, 1)).flatten()
# Define parameters
# sequence_length = 10
sequence_length = 8
# sequence_length = 5
hidden_size = 64
# hidden_size = 128
num_layers = 2
# num_layers = 3
batch_size = 32
# batch_size = 16
num_epochs = 100
learning_rate = 0.01
# learning_rate = 0.005
h_short = 4
h_long = 20
# Create dataset and dataloader
# dataset = TimeSeriesDataset(data, sequence_length)
# train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# Split data into train and test
train_data = data_normalized[:-h_short] # All data except last h_short entries
test_data = data_normalized[-sequence_length-h_short:] # Last sequence_length + h_short entries
# Create datasets and dataloaders
train_dataset = TimeSeriesDataset(train_data, sequence_length)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TimeSeriesDataset(test_data, sequence_length)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
# Initialize model, criterion, and optimizer
model = LSTM(input_size=1, hidden_size=hidden_size, num_layers=num_layers)
# model = LSTM(input_size=1, hidden_size=hidden_size, num_layers=num_layers, dropout=0.2)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs)
# Add evaluation mode
def evaluate_model(model, test_loader):
model.eval()
predictions = []
actuals = []
total_se = 0 # Initialize before the loop
n = 0 # Initialize before the loop
with torch.no_grad():
for batch_x, batch_y in test_loader:
batch_x = batch_x.unsqueeze(-1) # Add this line to ensure 3D input
output = model(batch_x)
# Denormalize predictions and actuals
pred = scaler.inverse_transform(output.numpy())[0][0]
actual = scaler.inverse_transform(batch_y.numpy().reshape(-1, 1))[0][0]
predictions.append(pred)
actuals.append(actual)
# predictions.append(output.item())
# actuals.append(batch_y.item())
# Then un-log transform
pred = np.exp(pred)
actual = np.exp(actual)
# Calculate squared error for this prediction
se = (pred - actual) ** 2
total_se += se
n += 1
ase = total_se / 1e6 / n if n > 0 else 0
return predictions, actuals, ase
# return predictions, actuals
# Evaluate on test data
# predictions, actuals = evaluate_model(model, test_loader)
predictions, actuals, ase = evaluate_model(model, test_loader)
print("\nTest Results for short term prediction:")
for i, (pred, actual) in enumerate(zip(predictions, actuals)):
print(f"Prediction {i+1}: {pred:.2f}, Actual: {actual:.2f}")
print(f"\nAverage Squared Error: {ase:,.2f}")
# Save results
save_results_to_csv(predictions, actuals, ase, "short")
# Also do it with the long term evaluation
train_data = data_normalized[:-h_long] # All data except last h_long entries
test_data = data_normalized[-sequence_length-h_long:] # Last sequence_length + h_long entries
train_dataset = TimeSeriesDataset(train_data, sequence_length)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TimeSeriesDataset(test_data, sequence_length)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
train_model(model, train_loader, criterion, optimizer, num_epochs)
predictions, actuals, ase = evaluate_model(model, test_loader)
print("\nTest Results for long term prediction:")
for i, (pred, actual) in enumerate(zip(predictions, actuals)):
print(f"Prediction {i+1}: {pred:.2f}, Actual: {actual:.2f}")
print(f"\nAverage Squared Error: {ase:,.2f}")
save_results_to_csv(predictions, actuals, ase, "long")
The short term prediction was the following:
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_predictions_short_term.csv"
pred <- read.csv(file_path, header = TRUE)
pred
## X Prediction Actual Error
## 1 0 12.97172 12.98402 -0.012302399
## 2 1 12.97577 12.95560 0.020169258
## 3 2 12.97100 12.96407 0.006934166
## 4 3 12.96965 12.93483 0.034823418
x = fed_housing_data$Median_Sales_Price
l = length(x)
plot(seq(1,l,1),x,type="b")
points(seq(l-h.short+1,l,1),pred$Prediction,type="b",pch=15,col="blue")
The short term ASE was 81.8 M. This was slightly better than the ARIMA model. Note that it is like our other ASEs, where it compares to the unlogged, original Medium Sales Prices.
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_metrics_short_term.csv"
ase <- read.csv(file_path, header = TRUE)
ase
## Metric Value
## 1 ASE 81.81499
The long term prediction was the following:
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_predictions_long_term.csv"
pred <- read.csv(file_path, header = TRUE)
pred
## X Prediction Actual Error
## 1 0 12.70576 12.67106 0.034692764
## 2 1 12.70608 12.69802 0.008055687
## 3 2 12.72127 12.70381 0.017460823
## 4 3 12.73490 12.66697 0.067928314
## 5 4 12.72078 12.70046 0.020320892
## 6 5 12.72533 12.73257 -0.007248879
## 7 6 12.74889 12.77987 -0.030979156
## 8 7 12.79208 12.81530 -0.023219109
## 9 8 12.83588 12.88715 -0.051267624
## 10 9 12.89563 12.93362 -0.037988663
## 11 10 12.94953 12.93241 0.017118454
## 12 11 12.96874 12.98929 -0.020545006
## 13 12 12.99935 12.98997 0.009381294
## 14 13 13.01107 13.00042 0.010648727
## 15 14 13.01676 12.96921 0.047543526
## 16 15 12.99711 12.94443 0.052679062
## 17 16 12.96769 12.98402 -0.016334534
## 18 17 12.97404 12.95560 0.018435478
## 19 18 12.96841 12.96407 0.004336357
## 20 19 12.97093 12.93483 0.036101340
plot(seq(1,l,1),x,type="b")
points(seq(l-h.long+1,l,1),pred$Prediction,type="b",pch=15,col="blue")
The long term ASE was 149 M. This was an order of magnitude better than the Signal Plus Noise model, which had the best long term ASE so far. And the plot of the predictions (of the logged data) vs the actual values seems to line up almost exactly.
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_metrics_long_term.csv"
ase <- read.csv(file_path, header = TRUE)
ase
## Metric Value
## 1 ASE 149.8804
x = fed_housing_data$Median_Sales_Price
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_predictions_short_term.csv"
pred <- read.csv(file_path, header = TRUE)
plot(seq(xmin_plot,l,1),x[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="LSTM Short Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.short+1),l,1),pred$Prediction,col="red")
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/LSTM_predictions_long_term.csv"
pred <- read.csv(file_path, header = TRUE)
plot(seq(xmin_plot,l,1),x[xmin_plot:l],type="l",col="black",xlab="Time",ylab="log Median Housing Sales Price",main="LSTM Long Term Forecast",ylim=c(ymin_plot,ymax_plot))
lines(seq((l-h.long+1),l,1),pred$Prediction,col="red")
TFT
TFT (Temporal Fusion Transformer) is an algorithm that uses Transformers and Attention to forecast Time Series data. The model is known for being able to do very well with non-linear data, when it has tens of thousands or even hundreds of thousands of training examples. We have less than 1000, so it is unlikely to do well on this dataset.
The TFT was created in Python. The code is below.
# https://unit8co.github.io/darts/generated_api/darts.models.forecasting.tft_model.html#darts.models.forecasting.tft_model.TFTModel
# from darts.datasets import WeatherDataset
from darts.models import TFTModel
from darts import TimeSeries
from darts.explainability.tft_explainer import TFTExplainer
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# series = WeatherDataset().load()
series = pd.read_csv('https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/fed_housing_data.csv')
# Convert 'Year_Quarter' to datetime
# import pdb
# pdb.set_trace()
# series['Year_Quarter'] = pd.to_datetime(series['Year_Quarter'], format='%Y-Q%q') # Adjust format as needed
series['Year_Quarter'] = pd.to_datetime(series['Year_Quarter'])
# Convert pandas Series/DataFrames to Darts TimeSeries
# Assuming 'Year_Quarter' is your time index
# First set the index to Year_Quarter
series = series.set_index('Year_Quarter')
# predicting atmospheric pressure
# target = series['p (mbar)'][:100]
h_short = 4
h_long = 20
# import pdb
# pdb.set_trace()
target = TimeSeries.from_dataframe(series[['Median_Sales_Price']][:-h_short]) # All data except last h_short entries
# optionally, past observed rainfall (pretending to be unknown beyond index 100)
# import pdb
# pdb.set_trace()
past_cov = TimeSeries.from_dataframe(series[['Ownership_Rate','Housing_Units_Completed','Supply_New_Houses','Housing_Price_Index']][:-h_short])
# future temperatures (pretending this component is a forecast)
# future_cov = series['T (degC)'][:106]
# by default, TFTModel is trained using a `QuantileRegression` making it a probabilistic forecasting model
# input_chunk_length = 12
input_chunk_length = 8
# n_epochs = 5
# n_epochs = 200
n_epochs = 20
random_state = 42
model = TFTModel(
# input_chunk_length=8,
input_chunk_length=input_chunk_length,
output_chunk_length=h_short,
hidden_size=128, # Larger hidden size for more capacity
lstm_layers=2, # More LSTM layers
num_attention_heads=4, # More attention heads
dropout=0.1, # Some dropout for regularization
batch_size=16, # Smaller batch size since we have limited data
n_epochs=n_epochs,
# learning_rate=0.001, # Standard learning rate, ValueError: Invalid model creation parameters. Model `TFTModel` has no args/kwargs `['learning_rate']`
random_state=random_state
)
# future_covariates are mandatory for `TFTModel`
# import pdb
# pdb.set_trace()
# future_cov = TimeSeries.from_dataframe(series[['Year_Quarter']])
# future_cov = TimeSeries.from_dataframe(series.iloc[:, 0])
future_cov = TimeSeries.from_dataframe(series.iloc[:, [0]])
model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
# TFTModel is probabilistic by definition; using `num_samples >> 1` to generate probabilistic forecasts
# pred = model.predict(h_short, num_samples=1000)
# Get mean prediction
# mean_pred = pred.mean()
# Get quantiles if you want confidence intervals
# quantiles_pred = pred.quantile([0.025, 0.975]) # 95% confidence interval
# import pdb
# pdb.set_trace()
# quantiles_pred = pred.quantile([0.025, 0.975]) # 95% confidence interval
# quantiles_pred_1 = pred.quantile(0.025)
# quantiles_pred_2 = pred.quantile(0.975)
def evaluate_model(model, h):
"""
Evaluate the TFT model using multiple metrics
"""
# Make predictions
# predictions = model.predict(n=h_short, num_samples=100)
predictions = model.predict(n=h, num_samples=1000)
mean_predictions = predictions.mean()
# Get confidence intervals
# confidence_intervals = predictions.quantiles([0.025, 0.975])
lower_bound = predictions.quantile(0.025) # 2.5th percentile
upper_bound = predictions.quantile(0.975) # 97.5th percentile
# Get actual values (last h_short values)
# actuals = target[-h_short:]
actuals = TimeSeries.from_dataframe(series[['Median_Sales_Price']][-h:])
# Convert to numpy arrays for calculations
pred_values = mean_predictions.values()
actual_values = actuals.values()
lower_values = lower_bound.values()
upper_values = upper_bound.values()
# Calculate metrics
total_se = 0
n = len(pred_values)
predictions_list = []
actuals_list = []
lower_bounds_list = []
upper_bounds_list = []
for i in range(n):
pred = pred_values[i][0] # Assuming single-variable predictions
actual = actual_values[i][0]
lower = lower_values[i][0]
upper = upper_values[i][0]
# Store predictions and actuals
predictions_list.append(pred)
actuals_list.append(actual)
lower_bounds_list.append(lower)
upper_bounds_list.append(upper)
# Then un-log transform
pred = np.exp(pred)
actual = np.exp(actual)
# Calculate squared error
se = (pred - actual) ** 2
total_se += se
# Calculate average squared error
ase = total_se / 1e6 /n if n > 0 else 0
return predictions_list, actuals_list, lower_bounds_list, upper_bounds_list, ase
# Use the evaluation function
preds, acts, lower_bounds, upper_bounds, ase = evaluate_model(model, h_short)
# Print results
print(f"Average Squared Error for short term prediction: {ase:,.2f}")
print("\nPredictions vs Actuals with 95% Confidence Intervals:")
for i, (p, a, l, u) in enumerate(zip(preds, acts, lower_bounds, upper_bounds)):
print(f"Period {i+1}:")
print(f" Predicted = {p:,.2f} [{l:,.2f}, {u:,.2f}]")
print(f" Actual = {a:,.2f}")
print(f" Diff = {abs(p-a):,.2f}")
print(f" Width CI = {u-l:,.2f}")
# Optional: Calculate additional metrics
# mape_score = mape(target[-h_short:], model.predict(n=h_short).mean())
# print(f"\nMAPE: {mape_score:.2%}")
# After training the model
# static_covariates = None # If you have any static covariates, include them here
def save_detailed_results(preds, acts, lower_bounds, upper_bounds, ase, model_name="short_term"):
# Predictions DataFrame
results_df = pd.DataFrame({
'Predicted': preds,
'Actual': acts,
'Lower_Bound': lower_bounds,
'Upper_Bound': upper_bounds,
'Absolute_Error': np.abs(np.array(preds) - np.array(acts)),
'Squared_Error': (np.array(preds) - np.array(acts))**2,
'CI_Width': np.array(upper_bounds) - np.array(lower_bounds),
'Within_CI': (np.array(acts) >= np.array(lower_bounds)) &
(np.array(acts) <= np.array(upper_bounds))
})
# Calculate summary metrics
metrics_df = pd.DataFrame({
'Metric': ['ASE', 'MAE', 'RMSE', 'Mean_CI_Width', 'Coverage_Rate'],
'Value': [
ase,
np.mean(np.abs(np.array(preds) - np.array(acts))),
np.sqrt(np.mean((np.array(preds) - np.array(acts))**2)),
np.mean(np.array(upper_bounds) - np.array(lower_bounds)),
np.mean((np.array(acts) >= np.array(lower_bounds)) &
(np.array(acts) <= np.array(upper_bounds)))
]
})
# Save both files
results_df.to_csv(f'predictions_detailed_{model_name}.csv', index=True)
metrics_df.to_csv(f'metrics_detailed_{model_name}.csv', index=False)
print(f"Detailed results saved to predictions_detailed_{model_name}.csv and metrics_detailed_{model_name}.csv")
save_detailed_results(preds, acts, lower_bounds, upper_bounds, ase, model_name="TFT_short_term")
# Get interpretability components
# interpretability = model.interpret_output(
# import pdb
# pdb.set_trace()
# interpretability = model.interpret(target,past_covariates=past_cov,future_covariates=future_cov,static_covariates=static_covariates)
explainer = TFTExplainer(model)
results = explainer.explain()
# Save the variable selection plot
# plt.figure(figsize=(12, 8))
explainer.plot_variable_selection(results)
# plt.savefig('variable_importance_short.png') # Save to file
# plt.close() # Close the plot to free memory
# Try with long term predictions as well
# import pdb
# pdb.set_trace()
target = TimeSeries.from_dataframe(series[['Median_Sales_Price']][:-h_long])
past_cov = TimeSeries.from_dataframe(series[['Ownership_Rate','Housing_Units_Completed','Supply_New_Houses','Housing_Price_Index']][:-h_long])
model = TFTModel(
# input_chunk_length=8,
input_chunk_length=input_chunk_length,
output_chunk_length=h_long,
hidden_size=128, # Larger hidden size for more capacity
lstm_layers=2, # More LSTM layers
num_attention_heads=4, # More attention heads
dropout=0.1, # Some dropout for regularization
batch_size=16, # Smaller batch size since we have limited data
n_epochs=n_epochs,
# learning_rate=0.001, # Standard learning rate, ValueError: Invalid model creation parameters. Model `TFTModel` has no args/kwargs `['learning_rate']`
random_state=random_state
)
model.fit(target, past_covariates=past_cov, future_covariates=future_cov)
preds, acts, lower_bounds, upper_bounds, ase = evaluate_model(model, h_long)
print(f"Average Squared Error for long term prediction: {ase:,.2f}")
print("\nPredictions vs Actuals with 95% Confidence Intervals:")
for i, (p, a, l, u) in enumerate(zip(preds, acts, lower_bounds, upper_bounds)):
print(f"Period {i+1}:")
print(f" Predicted = {p:,.2f} [{l:,.2f}, {u:,.2f}]")
print(f" Actual = {a:,.2f}")
print(f" Diff = {abs(p-a):,.2f}")
print(f" Width CI = {u-l:,.2f}")
save_detailed_results(preds, acts, lower_bounds, upper_bounds, ase, model_name="TFT_long_term")
explainer = TFTExplainer(model)
results = explainer.explain()
explainer.plot_variable_selection(results)
The short term prediction was the following:
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/predictions_detailed_TFT_short_term.csv"
pred <- read.csv(file_path, header = TRUE)
pred
## X Predicted Actual Lower_Bound Upper_Bound Absolute_Error Squared_Error
## 1 0 12.66963 12.98402 11.89675 13.02597 0.3143873 0.09883936
## 2 1 12.66474 12.95560 11.81557 13.04598 0.2908556 0.08459696
## 3 2 12.66228 12.96407 11.70568 13.03513 0.3017937 0.09107942
## 4 3 12.68728 12.93483 11.96067 13.03658 0.2475483 0.06128015
## CI_Width Within_CI
## 1 1.129227 True
## 2 1.230405 True
## 3 1.329448 True
## 4 1.075911 True
h.short = 4
x = fed_housing_data$Median_Sales_Price
l = length(x)
plot(seq(1,l,1),x,type="b")
points(seq(l-h.short+1,l,1),pred$Predicted,type="b",pch=15,col="blue")
lines(seq(l-h.short+1,l,1),pred$Lower_Bound,lty=2, col="red", lwd=2)
lines(seq(l-h.short+1,l,1),pred$Upper_Bound,lty=2, col="red", lwd=2)
The short term ASE was 11.5 B. This was significantly worse than other models. The poor results aren’t unexpected though, as there were so few data points to train on.
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/metrics_detailed_TFT_short_term.csv"
metrics <- read.csv(file_path, header = TRUE)
metrics # 1146
## Metric Value
## 1 ASE 1.145680e+04
## 2 MAE 2.886462e-01
## 3 RMSE 2.897395e-01
## 4 Mean_CI_Width 1.191248e+00
## 5 Coverage_Rate 1.000000e+00
The long term prediction was the following:
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/predictions_detailed_TFT_long_term.csv"
pred <- read.csv(file_path, header = TRUE)
pred
## X Predicted Actual Lower_Bound Upper_Bound Absolute_Error Squared_Error
## 1 0 12.43832 12.67106 12.21089 12.75497 0.2327429 0.05416924
## 2 1 12.47469 12.69802 12.23497 12.76391 0.2233344 0.04987824
## 3 2 12.49115 12.70381 12.24657 12.83910 0.2126641 0.04522604
## 4 3 12.48697 12.66697 12.24163 12.80761 0.1800009 0.03240032
## 5 4 12.49683 12.70046 12.24852 12.83739 0.2036384 0.04146860
## 6 5 12.49251 12.73257 12.24345 12.83757 0.2400679 0.05763260
## 7 6 12.49833 12.77987 12.25592 12.81032 0.2815470 0.07926873
## 8 7 12.50507 12.81529 12.25759 12.82373 0.3102269 0.09624075
## 9 8 12.49876 12.88715 12.25309 12.81391 0.3883918 0.15084819
## 10 9 12.50641 12.93362 12.27013 12.77429 0.4272105 0.18250879
## 11 10 12.51155 12.93241 12.26884 12.82441 0.4208647 0.17712707
## 12 11 12.51987 12.98929 12.26915 12.78959 0.4694197 0.22035481
## 13 12 12.53635 12.98997 12.27686 12.85769 0.4536235 0.20577428
## 14 13 12.53622 13.00042 12.27722 12.84829 0.4642035 0.21548488
## 15 14 12.54577 12.96921 12.28666 12.87724 0.4234453 0.17930589
## 16 15 12.54831 12.94443 12.28880 12.87036 0.3961252 0.15691519
## 17 16 12.54871 12.98402 12.28873 12.83241 0.4353107 0.18949542
## 18 17 12.55493 12.95560 12.29005 12.86720 0.4006676 0.16053455
## 19 18 12.55411 12.96407 12.29579 12.89488 0.4099561 0.16806402
## 20 19 12.55270 12.93483 12.29220 12.84341 0.3821264 0.14602056
## CI_Width Within_CI
## 1 0.5440809 True
## 2 0.5289398 True
## 3 0.5925264 True
## 4 0.5659723 True
## 5 0.5888688 True
## 6 0.5941170 True
## 7 0.5544005 True
## 8 0.5661477 True
## 9 0.5608154 False
## 10 0.5041576 False
## 11 0.5555715 False
## 12 0.5204439 False
## 13 0.5808367 False
## 14 0.5710670 False
## 15 0.5905744 False
## 16 0.5815662 False
## 17 0.5436749 False
## 18 0.5771557 False
## 19 0.5990881 False
## 20 0.5512100 False
h.long=20
x = fed_housing_data$Median_Sales_Price
l = length(x)
plot(seq(1,l,1),x,type="b")
points(seq(l-h.long+1,l,1),pred$Predicted,type="b",pch=15,col="blue")
lines(seq(l-h.long+1,l,1),pred$Lower_Bound,lty=2, col="red", lwd=2)
lines(seq(l-h.long+1,l,1),pred$Upper_Bound,lty=2, col="red", lwd=2)
The long term ASE was 15.0 B. Similar to the short term predictions, this model did very poorly compared to the other models.
file_path = "https://raw.githubusercontent.com/aabromowitz/TimeSeriersProject/refs/heads/main/metrics_detailed_TFT_long_term.csv"
metrics <- read.csv(file_path, header = TRUE)
metrics # 1146
## Metric Value
## 1 ASE 1.498351e+04
## 2 MAE 3.477784e-01
## 3 RMSE 3.611591e-01
## 4 Mean_CI_Width 5.635607e-01
## 5 Coverage_Rate 4.000000e-01