EK_simulations.R

# Eerste Kamer simulations ------------------------------------------------
# Tom Louwerse, 2019

setwd("../..")

# Load data ---------------------------------------------------------------

# Load output from Peilingwijzer
load("Last/outputs.RData")
load("Last/sourcedata.RData")

# Read libraries
library(rjags)

## Loading required package: coda

## Linked to JAGS 4.2.0

## Loaded modules: basemod,bugs

library(colorspace)
library(doSNOW)

## Loading required package: foreach

## Loading required package: iterators

## Loading required package: snow

library(car)

## Loading required package: carData

library(xlsx)
library(Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Warning: package 'survival' was built under R version 3.5.1

## Loading required package: Formula

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 3.5.1

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(rjson)
library(dplyr)

## Warning: package 'dplyr' was built under R version 3.5.1

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(SDMTools)
library(truncnorm)

## Warning: package 'truncnorm' was built under R version 3.5.1

# Source code
source("Code/printgraph.R")
source("Code/makeGraph.R")
source("Code/seatDistribution.R")
source("Code/runModels.R")
source("Code/resultParty.R")
source("Code/resultPartyDate.R")
source("Code/houseEffects.R")

## Loading required package: stringr

source("Code/extractnames.R")
source("Code/getClosestPoll.R")
source("Code/compareTable.R")
source("Code/compareDates.R")

# Set some parameters -----------------------------------------------------
electionDate <- as.Date("2017-03-15")
electionDay <- julian(as.Date("2017-03-15"),origin=as.Date("2010-01-01"))
lastdate <- max(sourcedata[[1]]$dat$date)

partylist <- sapply(sourcedata, function(q) q$partyname)
partyabbr <- sapply(sourcedata, function(q) q$partyabbr)
n.parties <- length(partylist)
partycolours <- c(sapply(sourcedata, function(q) q$partycolour), "#7f8085")
partycolours.bright <- hex(mixcolor(0.4, hex2RGB(partycolours), RGB(1,1,1)))
partycolours.brightest <- hex(mixcolor(0.7, hex2RGB(partycolours), RGB(1,1,1)))

plotcolours <- partycolours
lastdate <- max(sourcedata[[1]]$dat$date)


# Read ratio data ---------------------------------------------------------
ratio_data <- rio::import("projects/2019 EK/Ratio_TK_EK_2019.xlsx")
ratios <- ratio_data %>%
  select(PW_Ratio_EK_2011, PW_Ratio_2011, PW_Ratio_EK_2015, PW_Ratio_2015)


ratio_source <- left_join(data.frame(party=partylist), data.frame(party=ratio_data$party,ratios))

## Joining, by = "party"

ratio_weights = c(1,1,1,1) / 4 * ncol(ratios)
ratio <- apply(ratios, 1, function(q) weighted.mean(q, ratio_weights,na.rm=TRUE))
ratio_min <- apply(ratios, 1, function(q) min(q, na.rm=TRUE))
ratio_max <- apply(ratios, 1, function(q) max(q, na.rm=TRUE))
ratio_sd <- apply(ratios, 1, function(q) wt.sd(q, ratio_weights))


# Show data used
ratio_df <- data.frame(party=ratio_data$party, Ratio=ratio, Ratio_SD=ratio_sd, 
           Ratio_Min=ratio_min, Ratio_Max=ratio_max)

# Add OSF ratio
ratio_df <- left_join(data.frame(party=partylist), ratio_df) %>%
  rbind(data.frame(party="OSF", Ratio=1,Ratio_SD=0.05, Ratio_Min=0.90, Ratio_Max=1.1))

## Joining, by = "party"

ratios <- ratio_source[,-1]

#Display ratios as used in nowcast model
ratio_df

##     party     Ratio   Ratio_SD Ratio_Min Ratio_Max
## 1     VVD 0.9664993 0.05860477 0.8960212 1.0373404
## 2     PVV 0.7802082 0.01878366 0.7540041 0.7952925
## 3     CDA 1.2365208 0.08232037 1.1608209 1.3281802
## 4     D66 0.9058463 0.06240140 0.8452586 0.9922335
## 5      GL 1.1616014 0.03282734 1.1303654 1.2066960
## 6      SP 0.9243595 0.03521430 0.8866963 0.9618485
## 7    PvdA 1.0484233 0.09464969 0.9518941 1.1526222
## 8      CU 0.9936987 0.11286002 0.8889668 1.1407023
## 9    PvdD 1.0592104 0.20363731 0.7625481 1.2119175
## 10 50PLUS 1.0383855 0.28086409 0.7850632 1.4035978
## 11    SGP 1.0964924 0.16729256 0.9443960 1.3113269
## 12   Denk 0.9000000 0.23094011 0.7000000 1.1000000
## 13    FvD 0.9000000 0.23094011 0.7000000 1.1000000
## 14    OSF 1.0000000 0.05000000 0.9000000 1.1000000

# Plot ratios -------------------------------------------------------------
ratioColours <- c("black", "grey","darkred","pink") #"orange","darkblue"
ratioPch <- c(15,16,17,18,19)  #23,25

windows(8,6)
par(mar=c(5.1,6.1,4.1,8.1),xpd=FALSE)
plot(ratios[1:11,1], 1:11, type="n", axes=F, xlim=c(0.6,1.5),
     ylab="", xlab="")
title("Verhouding tussen Peilingwijzer Tweede Kamer en \nuitslag Provinciale Staten/Eerste Kamer")
abline(h=1:11, col="grey80")
abline(v=1, lty=2, col="grey60")
for(i in 1:11)
 points(ratios[i,1:4], rep(i, ncol(ratios)),
        col=ratioColours, pch=ratioPch,bg=ratioColours)
axis(1, at=seq(0.5,1.5,.1), col="grey60")
axis(2, at=1:11, labels=partylist[1:11], las=1, col="grey60")

ratioNames <- c("2011 (EK)","2011 (PS)", "2015 (EK)","2015 (PS)")

par(xpd=TRUE)
legend("bottomright", inset=c(-0.30,0), legend=ratioNames, col=ratioColours,lwd=NA,pch=ratioPch,ncol=1, cex=.8, box.col="grey50", text.col="grey60", pt.bg=ratioColours)
text(.55,-1, "Scoort slechter bij\nPS/EK", pos=4)
text(1.55,-1, "Scoort beter bij\n PS/EK", pos=2)

 savePlot("projects/2019 EK/Ratio peilingen PS-TK", type="pdf")
 savePlot("projects/2019 EK/Ratio peilingen PS-TK", type="png")
 graphics.off()


# Calculate EK simulations based on TK poll -------------------------------

# Get data from Peilingwijzer output
alldata <- foreach(i=1:length(outputs), .combine=cbind) %do% {
  alphas <- outputs[[i]][,grep("^alpha",dimnames(outputs[[i]][[1]])[[2]])]
  unlist(alphas[,ncol(alphas[[1]])])
}

# Function to calculate EK score based on TK polls, using ratios
calcEK <- function(x) {
  # Add OSF score ~ uniform distribution 1%-2%
  x <- c(x, runif(1, 0.01,.02))
  names(x) <- c(partylist, "OSF")
  
  # # Sample ratio from truncated normal distribution
  # ratio_sample <- rtruncnorm(length(ratio_df$Ratio), 
  #                            a=ratio_df$Ratio_Min,
  #                            b=ratio_df$Ratio_Max,
  #                            ratio_df$Ratio, ratio_df$Ratio_SD)
  # 
  
  # Sample ratio from truncated normal distribution
  # Because we have no information on ratios from current polls yet, 
  # used wider range for truncation (i.e. more uncertainty)
  ratio_sample <- rtruncnorm(length(ratio_df$Ratio), 
                             a=0,
                             b=2,
                             ratio_df$Ratio, ratio_df$Ratio_SD)
  
  # Calculate original data times ratio and sum to 1
  return(x * ratio_sample/sum(x * ratio_sample))
}

# Get percentage scores for EK
results.perc <- foreach(i = 1:nrow(alldata), .combine=rbind) %do% calcEK(alldata[i,])

# Calculate seats for EK
results <- foreach(i = 1:nrow(results.perc), .combine=rbind) %do% {
  dhondt(results.perc[i,], n.seats=75)  
}

save(results.perc, results, file="projects/2019 EK/Results simulation.RData")

# Display some outputs ----------------------------------------------------
print("Kans op aantal zetels per partij")

## [1] "Kans op aantal zetels per partij"

apply(results, 2, function(q) table(q) / 2500)

## $VVD
## q
##     11     12     13     14     15     16     17     18 
## 0.0020 0.0232 0.2024 0.4124 0.2892 0.0668 0.0036 0.0004 
## 
## $PVV
## q
##      6      7      8 
## 0.2392 0.7460 0.0148 
## 
## $CDA
## q
##      6      7      8      9     10     11 
## 0.0012 0.0856 0.5140 0.3616 0.0364 0.0012 
## 
## $D66
## q
##      4      5      6      7 
## 0.0180 0.5764 0.3980 0.0076 
## 
## $GL
## q
##      9     10     11     12     13 
## 0.0024 0.2272 0.6552 0.1144 0.0008 
## 
## $SP
## q
##      5      6      7 
## 0.1844 0.7848 0.0308 
## 
## $PvdA
## q
##      3      4      5      6      7 
## 0.0004 0.0644 0.5792 0.3340 0.0220 
## 
## $CU
## q
##      2      3      4      5      6 
## 0.0316 0.6384 0.3244 0.0052 0.0004 
## 
## $PvdD
## q
##      0      1      2      3      4      5      6 
## 0.0004 0.0044 0.0960 0.4492 0.3712 0.0756 0.0032 
## 
## $`50PLUS`
## q
##      0      1      2      3      4      5 
## 0.0032 0.1196 0.4344 0.3604 0.0784 0.0040 
## 
## $SGP
## q
##      0      1      2      3 
## 0.0012 0.5180 0.4800 0.0008 
## 
## $Denk
## q
##      0      1      2      3 
## 0.0216 0.3912 0.5328 0.0544 
## 
## $FvD
## q
##      0      1      2      3      4      5      6      7      8      9 
## 0.0004 0.0032 0.0276 0.0772 0.1948 0.2752 0.2380 0.1320 0.0420 0.0092 
##     10 
## 0.0004 
## 
## $OSF
## q
##      0      1 
## 0.1984 0.8016

print("Mediaan aantal zetels + CI per partij")

## [1] "Mediaan aantal zetels + CI per partij"

apply(results, 2, function(q) c(median(q), quantile(q, .025), quantile(q, .975)))

##          VVD PVV CDA D66 GL SP PvdA CU PvdD 50PLUS SGP Denk FvD OSF
##       14.000   7   8   5 11  6    5  3    3      2   1    2   5   1
## 2.5%  12.475   6   7   5 10  5    4  2    2      1   1    1   2   0
## 97.5% 16.000   7  10   6 12  7    6  4    5      4   2    3   8   1

print("Percentage + CI per partij")

## [1] "Percentage + CI per partij"

round(apply(results.perc, 2, function(q) c(mean(q), quantile(q, .025), quantile(q, .975)))*100, 1)

##        VVD PVV  CDA D66   GL  SP PvdA  CU PvdD 50PLUS SGP Denk  FvD OSF
##       17.9 8.8 10.8 7.2 13.9 7.7  7.1 4.7  4.8    3.5 2.4  2.6  7.0 1.5
## 2.5%  15.9 8.1  9.3 6.2 12.8 6.9  5.8 3.6  3.1    1.7 1.7  1.3  3.5 1.0
## 97.5% 19.9 9.6 12.3 8.3 15.1 8.6  8.5 5.7  6.6    5.4 3.2  3.9 10.4 2.0

print("Aantal zetels bij gemiddeld percentage")

## [1] "Aantal zetels bij gemiddeld percentage"

dhondt(colMeans(results.perc), n.seats=75)

##    VVD    PVV    CDA    D66     GL     SP   PvdA     CU   PvdD 50PLUS 
##     14      7      8      6     11      6      5      3      3      2 
##    SGP   Denk    FvD    OSF 
##      2      2      5      1

# Largest parties
table(colnames(results)[apply(results, 1, which.max)]) / nrow(results)

## 
##     GL    VVD 
## 0.0004 0.9996

# Get coalition seats
coa_rowsums <- data.frame(results) %>%
  select(VVD,CDA,D66,CU) %>%
  rowSums

print("Mediaan zetelaantal en CI Coalitie")

## [1] "Mediaan zetelaantal en CI Coalitie"

c(median(coa_rowsums), quantile(coa_rowsums,.025), quantile(coa_rowsums, .975))

##        2.5% 97.5% 
##    31    29    34

print("Verdeling zetelaantallen Coalitie")

## [1] "Verdeling zetelaantallen Coalitie"

table(coa_rowsums) / length(coa_rowsums) * 100

## coa_rowsums
##    27    28    29    30    31    32    33    34    35    37 
##  0.08  0.96  6.80 19.72 32.08 25.04 11.96  2.96  0.36  0.04

print("Kans op meerderheid Coalitie")

## [1] "Kans op meerderheid Coalitie"

table(coa_rowsums > 37) / length(coa_rowsums) * 100

## 
## FALSE 
##   100

# Plot graph (parties) ----------------------------------------------------

graphSeats = function(x,...) {
  barplot(x/sum(x)*100,...)
}

windows(6.7,6.7)
par(mfrow=c(5,3),bg="transparent",mar=c(2.5,2.5,2.5,2.5))

v = apply(results, 2, function(x) table(x))
for(i in 1:length(v)) {
  uit = graphSeats(v[[i]], 
                   plot=T,axes=F,col="transparent",
                   border=NA, names.arg=F)
  abline(h=pretty(c(0,as.numeric(v[[i]]/sum(v[[i]]))) * 100,n.min=3),
         col="grey80")
  abline(h=c(0),col="grey40",lwd=1)
  colbars = rep(partycolours[i], length(names(v[[i]])))
  # Plot bars again to get on top of help lines
  graphSeats(v[[i]],main=names(v)[i],
             col=colbars, axisnames=FALSE,
             fg="grey60",border=NA, 
             col.main="grey50",add=TRUE,axes=FALSE)
  
  #col.axis="grey60",
  axis(2,at=pretty(c(0,as.numeric(v[[i]]/sum(v[[i]]))) * 100,n.min=3),
        labels=paste(pretty(c(0,as.numeric(v[[i]]/sum(v[[i]]))) * 100,n.min=3),
                     "%",sep=""),
       col.axis="grey60",col="grey80",tick=FALSE)
  axis(1,at=uit, labels=as.integer(names(v[[i]])), tick=FALSE, 
       col.axis="grey60", cex=.9)
} 
par(xpd=TRUE)
plot(c(0,1),c(0,1),type="n", axes=FALSE)
text(0,0.3, "Nowcast Eerste Kamer \nobv Peilingwijzer\nTom Louwerse,\nUniversiteit Leiden", cex=1, pos=4, col="grey")

savePlot("projects/2019 EK/Zetelaantallen Partijen", type="pdf")
savePlot("projects/2019 EK/Zetelaantallen Partijen", type="png")
graphics.off()

# Graph coalition + C3 ----------------------------------------------------

windows(6.2,6.2)
par(mfrow=c(1,1),bg="transparent",mar=c(3,3,3,3))

# Get table of outcomes for coalition parties combined
coac3 <- data.frame(results) %>%
  select(VVD,CDA,D66,CU) %>%
  rowSums %>%
  table

uit = graphSeats(coac3, plot=T,axes=F,
                 col="transparent",border=NA, names.arg=F,
                 xlab="Zetels", ylab="% van de simulaties", 
                 col.lab="grey80")
abline(h=pretty(coac3/sum(coac3) * 100,3),col="grey80")
abline(h=c(0),col="grey40",lwd=1)
colbars = c(rep("grey70",length(which(as.numeric(names(coac3))<38))), 
            rep("grey45", length(which(as.numeric(names(coac3))>37))))
graphSeats(coac3,main="Zetels Coalitie",
           col=colbars,col.axis="grey60",fg="grey60",border=NA, 
           col.main="grey50",add=TRUE,axes=F)
axis(2,at=pretty(coac3/sum(coac3) * 100,3),
     col.axis="grey60",col="grey80",tick=FALSE)

savePlot("projects/2019 EK/Zetelaantallen Coalitie", type="pdf")
savePlot("projects/2019 EK/Zetelaantallen Coalitie", type="png")
graphics.off()


# Graph Peilingwijzer TK - Percentages EK ---------------------------------

windows(7,7)
tk <- colMeans(alldata)
ek <- colMeans(results.perc)
ek <- ek[-14] # Remove OSF, not in TK data

plot(tk, ek, pch=16, xlim=c(0,.2), ylim=c(0,.2),
     xlab="Tweede Kamer (Peilingwijzer)", ylab="Eerste Kamer (Doorrekening)")
abline(a=0,b=1, lty=2, col="grey")
text(tk, ek, labels=names(ek), pos=3)

savePlot("projects/2019 EK/Vergelijking TK-EK", type="pdf")
savePlot("projects/2019 EK/Vergelijking TK-EK", type="png")
graphics.off()


# Met error

windows(7,7)
plot(tk, ek, pch=16, xlim=c(0,.2), ylim=c(0,.2), type="n",
     xlab="Tweede Kamer (Peilingwijzer)", ylab="Eerste Kamer (Doorrekening)")

for(i in 1:nrow(alldata)) {
  tk <- alldata[i,]
  ek <- results.perc[i,]
  ek <- ek[-14] # Remove OSF, not in TK data
  points(tk,ek,  col=partycolours.bright, pch=16, cex=.2)
}

tk <- colMeans(alldata)
ek <- colMeans(results.perc)
ek <- ek[-14] # Remove OSF, not in TK data
points(tk,ek,  col=partycolours, pch=16, cex=2)

abline(a=0,b=1, lty=2, col="grey", lwd=2)
text(tk, ek, labels=names(ek))

EK_simulations.R

louwersetp

Tue Oct 09 13:39:26 2018