CausalImpact(버티카와 연동)
##버티카와 연동 TEST
Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jre7')
library(rJava)
library(RJDBC)
vDriver <- JDBC(driverClass="com.vertica.jdbc.Driver", classPath="C:\\ytkim\\util\\vertica-jdbc-7.2.3-0.jar")
vertica <- dbConnect(vDriver, "jdbc:vertica://아이피/디비명", "계정", "")
myframe = dbGetQuery(vertica, "select * from public.iris")
dim(myframe)
head(myframe)
View(myframe)
## 입력값 추출 첫번째 컬럼은 y값(결과) 두번째컬럼부터 x값(영향을 주는 변수)
myframe = dbGetQuery(vertica, "쿼리기술")
vertica_xxx = dbGetQuery(vertica,
"select ...
from ...
where 일자 >= '2014-01-01'
group by 1
order by 1
) a
order by 일자
")
dim(vertica_xxx)
head(vertica_xxx)
View(vertica_xxx)
#2014년도의 추이확인
plot(1:12,vertica_xxx$sales[1:365],type ='l')
pre.period <- c(1, 390)
post.period <- c(391,415)
vertica_impact <- CausalImpact(vertica_xxx, pre.period, post.period
, model.args = list(niter = 10000, nseasons = 7, standardize.data = T))
## model.args = list(niter = 10000, nseasons = 12, standardize.data = T, dynamic.regression = T))
summary(vertica_impact)
plot(vertica_impact)
plot(vertica_impact$model$bsts.model, 'coefficient')
## 보통 시계열 데이터는 시퀀스를 숫자가 아닌 날짜로 보는것이 가독성이 좋다.
##데이터결과 건수가 742건이므로
time.points <- seq.Date(as.Date("2015-01-01"), by = 1, length.out = 742)
ytkim <- zoo( ytkim, time.points)
pre.period <- as.Date(c("2015-01-01", "2016-12-11"))
post.period <- as.Date(c("2016-12-12", "2016-12-13"))
head(ytkim)
## ytkim 데이터셋을 1000번 학습하여 vertica_impact3 모델을 만듬
vertica_impact3 <- CausalImpact(ytkim, pre.period, post.period
, model.args = list(niter = 1000, nseasons = 365, standardize.data = T),alpha=0.01)
## , model.args = list(niter = 10000, nseasons = 12, standardize.data = T, dynamic.regression = T))
##결과 확인
summary(vertica_impact3)
plot(vertica_impact3)
vertica_impact4 <- CausalImpact(ytkim, before, after
, model.args = list(niter = 10000, nseasons = 365, standardize.data = T, dynamic.regression = T))
summary(vertica_impact4)
plot(vertica_impact4)
plot(vertica_impact4, c("original", "pointwise"))
plot(vertica_impact4, c("original"))
##dynamic.regression = T을 적용하면 coefficient 는 확인불가
plot(vertica_impact4$model$bsts.model, 'coefficient')
summary(vertica_impact4, 'report')