Folder <- "C:/Laboratory/DraftKings/Impacts/"
SoBe <- read.csv(paste0(Folder,"SocialBehaviors.csv"),header=TRUE)
######################################################################
# Show some sample risk score calculations.                          #
# Our hypothetical user will play in three contests.                 #
# * The first contest is a 9-person contest.                         #
# -- The entry fee is $6.50.                                         #
# -- First place gets $27.00.                                        #
# -- Second place gets $16.20.                                       #
# -- Third place gets $10.80.                                        #
# -- Fourth through ninth get $0.00.                                 #
# * The second contest is a 45-person contest.                       #
# -- The entry fee is $3.25.                                         #
# -- First place gets $42.00.                                        #
# -- Second place gets $30.00.                                       #
# -- Third place gets $21.00.                                        #
# -- Fourth place gets $15.00.                                       #
# -- Fifth place gets $12.00.                                        #
# -- Sixth place gets $9.00.                                         #
# -- Seventh place gets $6.00.                                       #
# -- Eighth through forty-fifth get $0.00.                           #
# * The third contest is a 10-person 50/50 contest.                  #
# -- The entry fee is $5.20.                                         #
# -- First through fifth get $10.00.                                 #
# -- Sixth through tenth get $0.00.                                  #
######################################################################
# Make the table PayTab with the payouts.                  #
# * TNum is the tournament number (1, 2, 3).               #
# * MinPlace is the minimum place for that payout.         #
# * MaxPlace is the maximum place for that payout.         #
# * Places is the number of people that will win that      #
#   prize amount.                                          #
# * Payout is the prize amount.                            #
# We'll also create TData with some numbers for each of    #
#  the three tournaments.                                  #
############################################################
PayTab <- data.frame(TNum=rep(1:3,c(4,8,2)),
                     MinPlace=c(c(1,2,3,4),
                                c(1,2,3,4,5,6,7,8),
                                c(1,6)),
                     MaxPlace=c(c(1,2,3,9),
                                c(1,2,3,4,5,6,7,45),
                                c(5,10)),
                     Places=0,
                     Payout=c(c(27.00,16.20,10.80,0),
                              c(42,30,21,15,12,9,6,0),
                              c(10,0))
                    );
   PayTab$Places = PayTab$MaxPlace - PayTab$MinPlace + 1;
############################################################
# Scrap1 is the table I use to calculate the variance for  #
#  the probability distributions implied by the three      #
#  tournament's payout tables.                             #
# * I'm using the formula Var(X) = E(X^2)-(E(X))^2.        #
# * TNum is the tournament number (1, 2, 3).               #
# * x has the values the random variables can take.        #
# * p contains the corresponding probabilities.            #
# * xp has the terms I need to get E(X).                   #
# * x2p has the terms I need to get E(X^2).                #
# Scrap2 has E(X^2), E(X), Var(X), and SD(X) corresponding #
#  to each tournament.                                     #
# * TNum is the tournament number (1, 2, 3).               #
# * Entrants is the total number of entrants.              #
# * EX2 contains the expected squares E(X^2).              #
# * EX contains the expected values E(X).                  #
# * VarX contains the variances Var(X).                    #
# * SDX contains the standard deviations SD(X).            #
# * RiskScore contains the three contest risk scores.      #
############################################################
require(plyr);
Scrap2 <- ddply(PayTab,~TNum,summarise,
                Entrants=max(MaxPlace));
Scrap1 <- data.frame(TNum=PayTab$TNum,
                     x=PayTab$Payout,
                     p=PayTab$Places/Scrap2$Entrants[PayTab$TNum]);
   Scrap1$xp <- Scrap1$x * Scrap1$p;
   Scrap1$x2p <- Scrap1$x * Scrap1$xp;
Scrap2 <- merge(Scrap2,
                ddply(Scrap1,~TNum,summarise,
                      EX2=sum(x2p),EX=sum(xp)),
                by=c("TNum"),all.x=TRUE,all.y=TRUE);
   Scrap2$VarX <- Scrap2$EX2 - Scrap2$EX*Scrap2$EX;
   Scrap2$SDX <- sqrt(Scrap2$VarX);
   Scrap2$EntryFee <- c(6.5,3.25,5.2);
   Scrap2$RiskScore <- Scrap2$SDX / Scrap2$EntryFee;
PayTab
Scrap1
Scrap2
############################################################
# The three contest risk scores are                        #
#  1.436, 2.589, and 0.962.                                #
# * The risk score of the third contest is equal to the    #
#   proportion of the entry fees that went to the prize    #
#   pool ($50.00 in the prize pool, ten people at $5.20    #
#   per person is $52.00).                                 #
# The player's risk score is the average of the three:     #
#  1.662.                                                  #
############################################################
round(Scrap2$RiskScore,digits=3)
round(mean(Scrap2$RiskScore),digits=3)
######################################################################
# The data file has eight variables.                                 #
# * UserID is just a sequential indexing number.                     #
# * Group tells us which users are in the 80% group ("1") and which  #
#   are in the 20% group ("2").                                      #
# * HasRAF says whether or not this player was referred              #
#   by another player.                                               #
# * RAFBonusYes says whether a player received cash via the          #
#   Refer A Friend program.                                          #
# * RAFBonusCount is the number of different days the player         #
#   received cash through the Refer A Friend program.                #
# * RAFBonusCash is the total amount of cash the player received     #
#   through the Refer A Friend program.                              #
# * HasMutualFriend answers the question "Does this player have      #
#   another player who they have added as a friend who has also      #
#   added them as a friend as well (i.e., a mutual friend mapping)?  #
# * RiskScore is based only on contests where the player used cash   #
#   to pay for entries, and had a chance to win cash or something    #
#   else of value.                                                   #
# -- No free contests.                                               #
# -- No contests where everything was paid for with tickets.         #
# -- No contests where even first place gets nothing                 #
#    (i.e., charity mechanisms).                                     #
# -- A player's risk score is the average of the contest risk        #
#    scores, where we don't worry about how many entries the         #
#    player had in each contest.                                     #
######################################################################
# These sections are just setup.                           #
############################################################
# These are for the five-number summaries and    #
#  other tables.                                 #
##################################################
require(psych);
Every25N <- c(0.00,0.25,0.50,0.75,1.00);
Every25T <- c("Q000","Q025","Q050","Q075","Q100");
##################################################
# Make the two subsets, the 80% and the 20%.     #
# We randomly generated Group 1 and Group 2,     #
#  and then we hard-coded the groups so that     #
#  we'd get the same results each time.          #
# * It made debugging easier.                    #
# * Feel free to generate your own 80/20         #
#   partition to see if you get similar results. #
##################################################
SoBeA <- subset(SoBe,SoBe$Group==1); nSA = nrow(SoBeA); nSA
SoBeB <- subset(SoBe,SoBe$Group==2); nSB = nrow(SoBeB); nSB
##################################################
# This function calculates the effect size for a #
#  Wilcoxon rank-sum test.                       #
##################################################
rFromWilcox <- function(wilcoxModel,N) {
   z <- qnorm(wilcoxModel$p.value/2);
   r <- z/sqrt(N);
   cat(wilcoxModel$data.name,", Effect Size, r = ",r,"\n");
}
##################################################
# This function creates a box plot that goes on  #
#  top of the dot plots.                         #
##################################################
MyBox <- function(Height,HalfWhisker,Quantiles) {
   Marks <- Height + HalfWhisker * c(-1,0,1);
   i1 <- 5; while (i1 > 0) {
      lines(x=Quantiles[c(i1,i1)],y=Marks[c(1,3)],lty=1,lwd=2);
      i1 = i1 - 1;
   }
   lines(x=Quantiles[c(2,4)],y=Marks[c(1,1)],lty=1,lwd=2);
   lines(x=Quantiles[c(2,4)],y=Marks[c(3,3)],lty=1,lwd=2);
   lines(x=Quantiles[c(1,2)],y=Marks[c(2,2)],lty=2,lwd=2);
   lines(x=Quantiles[c(4,5)],y=Marks[c(2,2)],lty=2,lwd=2);
}
############################################################
# Do some preliminary exploratory data analysis.           #
############################################################
# Make some preliminary histograms.              #
# * Start with the big sample.                   #
# * Then do the small sample.                    #
# This section didn't make it into the paper.    #
##################################################
#Hist80RiskScore <- ggplot(SoBeA,aes(SoBeA$RiskScore)) +
#                                       geom_histogram(binwidth=0.4) +
#                             labs(x="User Risk Score",y="Frequency");
#   Hist80RiskScore
#Hist80HasMutualFriend <- ggplot(SoBeA,aes(SoBeA$HasMutualFriend)) +
#                                       geom_histogram(binwidth=0.4) +
#                           labs(x="Has Mutual Friend",y="Frequency");
#   Hist80HasMutualFriend
#Hist80HasRAF <- ggplot(SoBeA,aes(SoBeA$HasRAF)) +
#                                       geom_histogram(binwidth=0.4) +
#                           labs(x="Has Been Referred",y="Frequency");
#   Hist80HasRAF
#Hist80RAFBonusYes <- ggplot(SoBeA,aes(SoBeA$RAFBonusYes)) +
#                                       geom_histogram(binwidth=0.4) +
#                 labs(x="Refer a friend bonus binary",y="Frequency");
#   Hist80RAFBonusYes
#Hist80RAFBonusCount <- ggplot(SoBeA,aes(SoBeA$RAFBonusCount)) +
#                                       geom_histogram(binwidth=0.4) +
#                 labs(x="Refer a friend bonus number",y="Frequency");
#   Hist80RAFBonusCount
#Hist80RAFBonusCash <- ggplot(SoBeA,aes(SoBeA$RAFBonusCash)) +
#                                       geom_histogram(binwidth=0.4) +
#                  labs(x="Refer a friend bonus total",y="Frequency");
#   Hist80RAFBonusCash
#Hist20RiskScore <- ggplot(SoBeB,aes(SoBeB$RiskScore)) +
#                                       geom_histogram(binwidth=0.4) +
#                             labs(x="User Risk Score",y="Frequency");
#   Hist20RiskScore
#Hist20HasMutualFriend <- ggplot(SoBeB,aes(SoBeB$HasMutualFriend)) +
#                                       geom_histogram(binwidth=0.4) +
#                           labs(x="Has Mutual Friend",y="Frequency");
#   Hist20HasMutualFriend
#Hist20HasRAF <- ggplot(SoBeB,aes(SoBeB$HasRAF)) +
#                                       geom_histogram(binwidth=0.4) +
#                           labs(x="Has Been Referred",y="Frequency");
#   Hist20HasRAF
#Hist20RAFBonusYes <- ggplot(SoBeB,aes(SoBeB$RAFBonusYes)) +
#                                       geom_histogram(binwidth=0.4) +
#                 labs(x="Refer a friend bonus binary",y="Frequency");
#   Hist20RAFBonusYes
#Hist20RAFBonusCount <- ggplot(SoBeB,aes(SoBeB$RAFBonusCount)) +
#                                       geom_histogram(binwidth=0.4) +
#                 labs(x="Refer a friend bonus number",y="Frequency");
#   Hist20RAFBonusCount
#Hist20RAFBonusCash <- ggplot(SoBeB,aes(SoBeB$RAFBonusCash)) +
#                                       geom_histogram(binwidth=0.4) +
#                  labs(x="Refer a friend bonus total",y="Frequency");
#   Hist20RAFBonusCash
##################################################
# Calculate descriptive statistics for SoBeA.    #
# This section didn't make it into the paper.    #
##################################################
#Descriptives80 <- describe.by(SoBeA[,c("RiskScore","HasMutualFriend",
#                                       "HasRAF","RAFBonusYes",
#                                       "RAFBonusCount",
#                                       "RAFBonusCash")]);
#   Descriptives80
##                  vars    n  mean    sd median trimmed   mad
##  RiskScore          1 8904 16.17 15.31  12.14   13.65 10.38
##  HasMutualFriend    2 8904  0.25  0.43   0.00    0.18  0.00
##  HasRAF             3 8904  0.01  0.11   0.00    0.00  0.00
##  RAFBonusYes        4 8904  0.03  0.18   0.00    0.00  0.00
##  RAFBonusCount      5 8904  0.10  0.72   0.00    0.00  0.00
##  RAFBonusCash       6 8904  2.25 30.22   0.00    0.00  0.00
##                    min     max   range  skew kurtosis   se
##  RiskScore        0.49  181.31  180.81  2.71    12.75 0.16
##  HasMutualFriend  0.00    1.00    1.00  1.17    -0.62 0.00
##  HasRAF           0.00    1.00    1.00  9.18    82.29 0.00
##  RAFBonusYes      0.00    1.00    1.00  5.32    26.27 0.00
##  RAFBonusCount    0.00   14.00   14.00 10.99   148.01 0.01
##  RAFBonusCash     0.00 1627.12 1627.12 32.34  1371.91 0.32
#Descriptives20 <- describe.by(SoBeB[,c("RiskScore","HasMutualFriend",
#                                       "HasRAF","RAFBonusYes",
#                                       "RAFBonusCount",
#                                       "RAFBonusCash")]);
#   Descriptives20
#               vars    n  mean    sd median trimmed mad  min    max  range  skew kurtosis   se
#UserRiskScore     1 2226 15.77 15.33  11.79   13.11  10 0.49  129.73  129.23  2.62    10.71 0.32
#HasMutualFriend   2 2226  0.24  0.43   0.00    0.17   0 0.00    1.00    1.00  1.24    -0.47 0.01
#RAFBonusBinary    3 2226  0.03  0.17   0.00    0.00   0 0.00    1.00    1.00  5.54    28.73 0.00
#RAFBonusNumber    4 2226  0.09  0.75   0.00    0.00   0 0.00   15.00   15.00 12.74   195.59 0.02
#RAFBonusTotal     5 2226  3.13 64.76   0.00    0.00   0 0.00 2788.36 2788.36 37.65  1559.54 1.37
#                  min    max    range  skew kurtosis   se
#UserRiskScore    0.49  129.73  129.23  2.62    10.71 0.32
#HasMutualFriend  0.00    1.00    1.00  1.24    -0.47 0.01
#RAFBonusBinary   0.00    1.00    1.00  5.54    28.73 0.00
#RAFBonusNumber   0.00   15.00   15.00 12.74   195.59 0.02
#RAFBonusTotal    0.00 2788.36 2788.36 37.65  1559.54 1.37
############################################################
# Go through the paper and get some numbers.               #
############################################################
# Get the counts and percentages for             #
#  HasMutualFriend and RAFBonusYes for both the  #
#  big subgroup and the small subgroup.          #
##################################################
BinTable <- data.frame(Sample=c(rep("Exploratory Sample",3),
                                rep("Confirmatory Sample",3)),
                       Variable=c("Reciprocal Friendship",
                                  "Referred by a friend",
                                  "Received RAF Bonus Cash",
                                  "Reciprocal Friendship",
                                  "Referred by a friend",
                                  "Received RAF Bonus Cash"),
                       Count=c(sum(SoBeA$HasMutualFriend),
                               sum(SoBeA$HasRAF),
                               sum(SoBeA$RAFBonusYes),
                               sum(SoBeB$HasMutualFriend),
                               sum(SoBeB$HasRAF),
                               sum(SoBeB$RAFBonusYes)),
                       Pct=0,stringsAsFactors=FALSE);
   BinTable$Pct = 100 * BinTable$Count / c(nSA,nSA,nSA,nSB,nSB,nSB);
   BinTable$Pct = round(BinTable$Pct,digits=2);
   BinTable
#                Sample                Variable Count   Pct
# 1  Exploratory Sample   Reciprocal Friendship  2199 24.70
# 2  Exploratory Sample    Referred by a friend   102  1.15
# 3  Exploratory Sample Received RAF Bonus Cash   285  3.20
# 4 Confirmatory Sample   Reciprocal Friendship   527 23.67
# 5 Confirmatory Sample    Referred by a friend    26  1.17
# 6 Confirmatory Sample Received RAF Bonus Cash    66  2.96
##################################################
# Get the five-number summaries for              #
#  RAFBonusCount and RiskScore.                  #
# Get the numbers of users with positive values  #
#  of RAFBonusCount.                             #
##################################################
ConTable <- data.frame(Sample=c(rep("Exploratory Sample",2),
                                rep("Confirmatory Sample",2)),
                       Variable=c("Player Risk Score",
                                  "Number of RAF Bonuses",
                                  "Player Risk Score",
                                  "Number of RAF Bonuses"),
                       Count=c(0,sum(SoBeA$RAFBonusCount>0),
                               0,sum(SoBeB$RAFBonusCount>0)),
                       Pct=0,Q000=0,Q025=0,Q050=0,Q075=0,Q100=0,
                       stringsAsFactors=FALSE);
   ConTable$Pct = 100 * ConTable$Count / c(nSA,nSA,nSB,nSB);
   ConTable$Pct = round(ConTable$Pct,digits=2);
   ConTable[1,Every25T] = quantile(SoBeA$RiskScore,Every25N);
   ConTable[2,Every25T] = quantile(SoBeA$RAFBonusCount,Every25N);
   ConTable[3,Every25T] = quantile(SoBeB$RiskScore,Every25N);
   ConTable[4,Every25T] = quantile(SoBeB$RAFBonusCount,Every25N);
   ConTable[,Every25T] = round(ConTable[,Every25T],digits=2);
   ConTable
# E.S.: Exploratory Sample
# C.S.: Confirmatory Sample
# P.R.S.: Player Risk Score
# N.RAF.B.: Number of RAF Bonuses
#   Sample Variable Count  Pct Q000 Q025  Q050  Q075   Q100
# 1   E.S. P.R.S.       0 0.00 0.49 6.12 12.14 21.12 181.31
# 2   E.S. N.RAF.B.   285 3.20 0.00 0.00  0.00  0.00  14.00
# 3   C.S. P.R.S.       0 0.00 0.49 5.74 11.79 20.15 129.73
# 4   C.S. N.RAF.B.    66 2.96 0.00 0.00  0.00  0.00  15.00
############################################################
# Perform the hypothesis tests on the larger subgroup.     #
############################################################
# Define some parameters for the plots.          #
##################################################
DotSpread <- 0.1;
MyColor <- "#FF3399";
MyColor <- "grey45";
RiskScoreLabel <- "Player Risk Score";
##################################################
# H1: Players referred to DraftKings by a friend #
#  (HasRAF) will have different risk scores      #
#  (RiskScore) from those who weren't referred.  #
# Findings: There were no significant            #
#           differences in risk scores between   #
#           people referred by a friend and      #
#           others.                              #
# * We retain the null hypothesis.               #
# * Being referred by others does not influence  #
#   propensity for engaging in risky gameplay.   #
##################################################
# Get the five-number summaries.       #
# Do the hypothesis test.              #
########################################
H1A <- wilcox.test(RiskScore~HasRAF,data=SoBeA); H1A
   rFromWilcox(H1A,nSA)
H1ATable <- data.frame(Count=c(sum(SoBeA$HasRAF==1),
                               sum(SoBeA$HasRAF==0)),
                       Pct=0,Q000=0,Q025=0,Q050=0,Q075=0,Q100=0,
                       W=0,pValue=c(H1A$p.value,0),
                       rEffectSize=0);
   rownames(H1ATable) = c("HasRAF","NoRAF");
   H1ATable$Pct = 100 * H1ATable$Count / nSA;
   H1ATable[1,Every25T] = quantile(subset(SoBeA$RiskScore,
                                          SoBeA$HasRAF==1),
                                   Every25N);
   H1ATable[2,Every25T] = quantile(subset(SoBeA$RiskScore,
                                          SoBeA$HasRAF==0),
                                   Every25N);
   H1ATable$W[1] = H1A$statistic;
   H1ATable$rEffectSize[1] = qnorm(H1A$p.value[1]/2)/sqrt(nSA);
########################################
# Plot RiskScore versus HasRAF.        #
########################################
par(mar=c(5.1,9.1,4.1,2.1))
plot(x=SoBeA$RiskScore,
     y=SoBeA$HasRAF+1+rnorm(nSA,mean=0,sd=DotSpread),
     xlab=RiskScoreLabel,yaxt="n",ylab="",
     col=c(MyColor,MyColor)[SoBeA$HasRAF+1],pch=".",cex=3)
MyBox(2,DotSpread,H1ATable[1,Every25T])
MyBox(1,DotSpread,H1ATable[2,Every25T])
axis(2,at=c(2,1),lab=c("Referred","Not referred"),
     las=2,cex.axis=1.3)
#title(main=paste0("Figure 1. Boxplots of Player Risk Score\n",
#                  "by Referred by a Friend"))
par(mar=c(5.1,4.1,4.1,2.1))
#savePlot(filename=paste0(Folder,"H1-80pct",sep=""),
#         type="jpg");
########################################
# Round the values in the table        #
#  for display purposes.               #
########################################
H1ATable$Pct = round(H1ATable$Pct,digits=2);
   H1ATable[,Every25T] = round(H1ATable[,Every25T],digits=2);
   H1ATable$pValue = round(H1ATable$pValue,digits=3);
   H1ATable$rEffectSize = round(H1ATable$rEffectSize,digits=3);
   H1ATable
##################################################
# H2: Players who have successfully referred a   #
#     friend (RAFBonusYes) will have different   #
#     risk behaviors from those (a) who have not #
#     not attempted to refer a friend or (b)     #
#     attempted to refer a friend and failed.    #
# Findings: Wilcoxon rank-sum test was           #
#           conducted to compare risk scores     #
#           among people who have successfully   #
#           referred a friend to others.         #
# * We reject the null hypothesis.               #
# * People who refer friends to play DFS have    #
#   significantly higher risk propensity than    #
#   others.                                      #
##################################################
# Get the five-number summaries.       #
# Do the hypothesis test.              #
########################################
H2A <- wilcox.test(RiskScore~RAFBonusYes,data=SoBeA); H2A
   rFromWilcox(H2A,nSA)
H2ATable <- data.frame(Count=c(sum(SoBeA$RAFBonusYes==1),
                               sum(SoBeA$RAFBonusYes==0)),
                       Pct=0,Q000=0,Q025=0,Q050=0,Q075=0,Q100=0,
                       W=0,pValue=c(H2A$p.value,0),
                       rEffectSize=0);
   rownames(H2ATable) = c("HasRAFBonus","NoRAFBonus");
   H2ATable$Pct = 100 * H2ATable$Count / nSA;
   H2ATable[1,Every25T] = quantile(subset(SoBeA$RiskScore,
                                          SoBeA$RAFBonusYes==1),
                                   Every25N);
   H2ATable[2,Every25T] = quantile(subset(SoBeA$RiskScore,
                                          SoBeA$RAFBonusYes==0),
                                   Every25N);
   H2ATable$W[1] = H2A$statistic;
   H2ATable$rEffectSize[1] = qnorm(H2A$p.value[1]/2)/sqrt(nSA);
########################################
# Plot RiskScore versus RAFBonusYes.   #
########################################
par(mar=c(5.1,7.1,4.1,2.1))
plot(x=SoBeA$RiskScore,
     y=SoBeA$RAFBonusYes+1+rnorm(nSA,mean=0,sd=DotSpread),
     xlab=RiskScoreLabel,yaxt="n",ylab="",
     col=c(MyColor,MyColor)[SoBeA$RAFBonusYes+1],pch=".",cex=3)
MyBox(2,DotSpread,H2ATable[1,Every25T])
MyBox(1,DotSpread,H2ATable[2,Every25T])
axis(2,at=c(2,1),lab=c("Successful\nReferral","No Referral"),
     las=2,cex.axis=1.3)
#title(main=paste0("Figure 2. Boxplots of Player Risk Score\n",
#                  "by Referred a Friend"))
par(mar=c(5.1,4.1,4.1,2.1))
#savePlot(filename=paste0(Folder,"H2-80pct",sep=""),
#         type="jpg");
########################################
# Round the values in the table        #
#  for display purposes.               #
########################################
H2ATable$Pct = round(H2ATable$Pct,digits=2);
   H2ATable[,Every25T] = round(H2ATable[,Every25T],digits=2);
   H2ATable$pValue = round(H2ATable$pValue,digits=3);
   H2ATable$rEffectSize = round(H2ATable$rEffectSize,digits=3);
   H2ATable
##################################################
# H3: People who have a reciprocal friendship    #
#     (HasMutualFriend) will have significantly  #
#     different risk scores from others.         #
# Findings: Wilcoxon rank-sum test was conducted #
#           to compare risk scores among people  #
#           with reciprocal friendship.          #
# * We reject the null hypothesis.               #
# * People with reciprocal friendships on DFS    #
#   websites engage in contests with higher risk #
#   propensity than others.                      #
##################################################
H3A <- wilcox.test(RiskScore~HasMutualFriend,data=SoBeA); H3A
   rFromWilcox(H3A,nSA)
H3ATable <- data.frame(Count=c(sum(SoBeA$HasMutualFriend==1),
                               sum(SoBeA$HasMutualFriend==0)),
                       Pct=0,Q000=0,Q025=0,Q050=0,Q075=0,Q100=0,
                       W=0,pValue=c(H3A$p.value,0),
                       rEffectSize=0);
   rownames(H3ATable) = c("HasMutualFriend","NoMutualFriend");
   H3ATable$Pct = 100 * H3ATable$Count / nSA;
   H3ATable[1,Every25T] = quantile(subset(SoBeA$RiskScore,
                                          SoBeA$HasMutualFriend==1),
                                   Every25N);
   H3ATable[2,Every25T] = quantile(subset(SoBeA$RiskScore,
                                          SoBeA$HasMutualFriend==0),
                                   Every25N);
   H3ATable$W[1] = H3A$statistic;
   H3ATable$rEffectSize[1] = qnorm(H3A$p.value[1]/2)/sqrt(nSA);
########################################
# Plot RiskScore versus                #
#  HasMutualFriend.                    #
########################################
par(mar=c(5.1,9.1,4.1,2.1))
plot(x=SoBeA$RiskScore,
     y=SoBeA$HasMutualFriend+1+rnorm(nSA,mean=0,sd=DotSpread),
     xlab=RiskScoreLabel,yaxt="n",ylab="",
     col=c(MyColor,MyColor)[SoBeA$HasMutualFriend+1],
     pch=".",cex=3)
MyBox(2,DotSpread,H3ATable[1,Every25T])
MyBox(1,DotSpread,H3ATable[2,Every25T])
axis(2,at=c(2,1),lab=c("Reciprocal\nFriendship",
                       "No Reciprocal\nFriendship"),
     las=2,cex.axis=1.3)
#title(main=paste0("Figure 3. Boxplot of Player Risk Score\n",
#                  "by Reciprocal Friendship"))
par(mar=c(5.1,4.1,4.1,2.1))
#savePlot(filename=paste0(Folder,"H3-80pct",sep=""),
#         type="jpg");
########################################
# Round the values in the table        #
#  for display purposes.               #
########################################
H3ATable$Pct = round(H3ATable$Pct,digits=2);
   H3ATable[,Every25T] = round(H3ATable[,Every25T],digits=2);
   H3ATable$pValue = round(H3ATable$pValue,digits=3);
   H3ATable$rEffectSize = round(H3ATable$rEffectSize,digits=3);
   H3ATable
##################################################
# H4: Among players with reciprocal friendships, #
#     there will be statistically significant    #
#     differences on risk scores between those   #
#     who have successfully referred a friend    #
#     (HasMutualFriend=1,RAFBonusYes=1) and      #
#     those who have not (HasMutualFriend=1,     #
#     RAFBonusYes=1).                            #
# Findings: Wilcoxon rank-sum test was conducted #
#           to compare risk scores among people  #
#           with both reciprocal friendship and  #
#           successful referrals to people with  #
#           reciprocal friendship but no         #
#           successful referrals.                #
# * We reject the null hypothesis.               #
# * Among players with reciprocal friendships,   #
#   those who have successfully referred a       #
#   friend have significantly higher risk scores #
#   than others.                                 #
##################################################
SoBe4A <- subset(SoBeA,SoBeA$HasMutualFriend>0);
H4A <- wilcox.test(RiskScore~RAFBonusYes,data=SoBe4A); H4A
   rFromWilcox(H4A,nSA)
H4ATable <- data.frame(Count=c(sum(SoBe4A$RAFBonusYes==1),
                               sum(SoBe4A$RAFBonusYes==0)),
                       Pct=0,Q000=0,Q025=0,Q050=0,Q075=0,Q100=0,
                       W=0,pValue=c(H4A$p.value,0),
                       rEffectSize=0);
   rownames(H4ATable) = c("HasMutualFriendHasRAFBonus",
                          "HasMutualFriendNoRAFBonus");
   H4ATable$Pct = 100 * H4ATable$Count / nSA;
   H4ATable[1,Every25T] = quantile(subset(SoBe4A$RiskScore,
                                          SoBe4A$RAFBonusYes==1),
                                   Every25N);
   H4ATable[2,Every25T] = quantile(subset(SoBe4A$RiskScore,
                                          SoBe4A$RAFBonusYes==0),
                                   Every25N);
   H4ATable$W[1] = H4A$statistic;
   H4ATable$rEffectSize[1] = qnorm(H4A$p.value[1]/2)/sqrt(nSA);
########################################
# Plot RiskScore versus HasRAF for     #
#  those with HasMutualFriend = 1.     #
########################################
par(mar=c(5.1,9.1,4.1,2.1))
Scrap1 <- 2 * SoBe4A$HasMutualFriend + SoBe4A$HasRAF + 1;
   Scrap1 = Scrap1 + rnorm(nrow(SoBe4A),mean=0,sd=DotSpread);
plot(x=SoBe4A$RiskScore,y=Scrap1,
     xlab=RiskScoreLabel,ylab="",yaxt="n",
     xlim=quantile(SoBeA$RiskScore,c(0,1)),ylim=c(2.5,4.5),
     col=rep(MyColor,4)[SoBeA$HasMutualFriend+1],
     pch=".",cex=3)
MyBox(4,DotSpread,H4ATable[1,Every25T])
MyBox(3,DotSpread,H4ATable[2,Every25T])
axis(2,at=c(4,3),lab=c("Successful\nReferral",
                       "No Referral"),
     las=2,cex.axis=1.3)
#title(main=paste0("Figure 4. Boxplots of Player Risk Score\n",
#                  "by Referred by a Friend\n",
#                  "(only those with reciprocal friendships)"))
par(mar=c(5.1,4.1,4.1,2.1))
#savePlot(filename=paste0(Folder,"H4-80pct",sep=""),
#         type="jpg");
########################################
# Round the values in the table        #
#  for display purposes.               #
########################################
H4ATable$Pct = round(H4ATable$Pct,digits=2);
   H4ATable[,Every25T] = round(H4ATable[,Every25T],digits=2);
   H4ATable$pValue = round(H4ATable$pValue,digits=3);
   H4ATable$rEffectSize = round(H4ATable$rEffectSize,digits=3);
   H4ATable
##################################################
# H5: Number of friend referrals (RAFBonusCount) #
#     will relate to risk scores.                #
# Findings: Spearman's rank order correlation    #
#           was conducted to determine the       #
#           relation between number of           #
#           successful referrals and risk        #
#           scores.                              #
# * We reject the null hypothesis.               #
# * The more a person refers friends to play     #
#   online DFS, the greater their risk scores.   #
##################################################
# Check the Spearman correlations.     #
########################################
H5ATable <- data.frame(Q000=c(0,0),Q025=0,Q050=0,Q075=0,Q100=0);
   H5ATable[1,1:5] = quantile(SoBeA$RAFBonusCount,Every25N);
   H5ATable[2,1:5] = quantile(SoBeA$RiskScore,Every25N);
   H5ATable <- round(H5ATable,digits=2); H5ATable
H5A <- corr.test(SoBeA[,c("RiskScore","HasMutualFriend","HasRAF",
                          "RAFBonusCash","RAFBonusCount",
                          "RAFBonusYes")],
                 method="spearman"); H5A
   H5A$r[4,1]
   H5A$p[4,1]
#  Correlation matrix 
#                        (1)   (2)    (3)    (4)   (5)    (6)
#  RiskScore       (1)  1.00
#  HasMutualFriend (2) -0.02  1.00
#  HasRAF          (3)  0.00  0.17** 1.00
#  RAFBonusCash    (4)  0.02* 0.30** 0.06** 1.00
#  RAFBonusCount   (5)  0.02* 0.30** 0.06** 1.00* 1.00
#  RAFBonusYes     (6)  0.02* 0.30** 0.06** 1.00* 1.00** 1.00
#  * = sig. < 0.05; ** = sig. < 0.01
########################################
# Plot RiskScore versus RAFBonusCount. #
# Plot RiskScore versus                #
#  round(RAFBonusCash/10,digits=0).    #
########################################
plot(x=SoBeA$RAFBonusCount,y=SoBeA$RiskScore,
     xlab="Number of days receiving a Refer a Friend bonus",
     ylab="Player Risk Score",
     pch=19,col=MyColor);
#title(main=paste0("Figure 5. Player Risk Score\n",
#                  "by Number of Refer a Friend Bonus Rewards"))
plot(x=round(SoBeA$RAFBonusCash/10,digits=0),y=SoBeA$RiskScore,
     xlab="Estimated number of successful referrals",
     ylab="Player Risk Score",
     pch=19,col=MyColor);
#savePlot(filename=paste0(Folder,"H5-80pct",sep=""),
#         type="jpg");
########################################
# We wanted the Spearman and Pearson   #
#  correlations between player risk    #
#  score and estimated number of       #
#  referrals, but only when looking at #
#  the people we think had at least    #
#  one referral.                       #
########################################
Scrap1 <- data.frame(RiskScore=SoBeA$RiskScore,
                     Referrals=round(SoBeA$RAFBonusCash/10,digits=0));
Scrap2 <- subset(Scrap1,Scrap1$Referrals>0);
nrow(Scrap2)
cor(x=Scrap2$RiskScore,y=Scrap2$Referrals,method="pearson")
# -0.001775578
corr.test(Scrap2,method="pearson")
# Correlation matrix 
#            RiskScore Referrals
#  RiskScore         1         0
#  Referrals         0         1
# Sample Size 
#  285
# Probability values (Entries above the diagonal are adjusted
#  for multiple tests.) 
#            RiskScore Referrals
#  RiskScore      0.00      0.98
#  Referrals      0.98      0.00
cor(x=Scrap2$RiskScore,y=Scrap2$Referrals,method="spearman")
# -0.03346069
corr.test(Scrap2,method="spearman")
# Correlation matrix 
#            RiskScore Referrals
#  RiskScore      1.00     -0.03
#  Referrals     -0.03      1.00
# Sample Size 
#  285
# Probability values (Entries above the diagonal are adjusted
#  for multiple tests.) 
#            RiskScore Referrals
#  RiskScore      0.00      0.57
#  Referrals      0.57      0.00
############################################################
# Perform the hypothesis tests on the smaller subgroup.    #
############################################################
# H1: Players referred to DraftKings by a friend #
#  (HasRAF) will have different risk scores      #
#  (RiskScore) from those who weren't referred.  #
##################################################
H1B <- wilcox.test(RiskScore~HasRAF,data=SoBeB); H1B
   rFromWilcox(H1B,nSB)
   H1B$p.value
##################################################
# H2: Players who have successfully referred a   #
#     friend (RAFBonusYes) will have different   #
#     risk behaviors from those (a) who have not #
#     not attempted to refer a friend or (b)     #
#     attempted to refer a friend and failed.    #
##################################################
H2B <- wilcox.test(RiskScore~RAFBonusYes,data=SoBeB); H2B
   rFromWilcox(H2B,nSB)
   H2B$p.value
##################################################
# H3: People who have a reciprocal friendship    #
#     (HasMutualFriend) will have significantly  #
#     different risk scores from others.         #
# Findings: Wilcoxon rank-sum test was conducted #
#           to compare risk scores among people  #
#           with reciprocal friendship.          #
# * We fail to reject the null hypothesis.       #
# * People with reciprocal friendships on DFS    #
#   websites engage in contests with higher risk #
#   propensity than others.                      #
# However, when we combine H3A and H3B,          #
#  we reject the null.                           #
##################################################
H3B <- wilcox.test(RiskScore~HasMutualFriend,data=SoBeB); H3B
   rFromWilcox(H3B,nSB)
H3BTable <- data.frame(Count=c(sum(SoBeB$HasMutualFriend==1),
                               sum(SoBeB$HasMutualFriend==0)),
                       Pct=0,Q000=0,Q025=0,Q050=0,Q075=0,Q100=0,
                       W=0,pValue=c(H3B$p.value,0),
                       rEffectSize=0);
   rownames(H3BTable) = c("HasMutualFriend","NoMutualFriend");
   H3BTable$Pct = 100 * H3BTable$Count / nSB;
   H3BTable[1,Every25T] = quantile(subset(SoBeB$RiskScore,
                                          SoBeB$HasMutualFriend==1),
                                   Every25N);
   H3BTable[2,Every25T] = quantile(subset(SoBeB$RiskScore,
                                          SoBeB$HasMutualFriend==0),
                                   Every25N);
   H3BTable$W[1] = H3B$statistic;
   H3BTable$rEffectSize[1] = qnorm(H3B$p.value[1]/2)/sqrt(nSB);
########################################
# Plot RiskScore versus                #
#  HasMutualFriend.                    #
########################################
par(mar=c(5.1,9.1,4.1,2.1))
plot(x=SoBeB$RiskScore,
     y=SoBeB$HasMutualFriend+1+rnorm(nSB,mean=0,sd=DotSpread),
     xlab=RiskScoreLabel,yaxt="n",ylab="",
     col=c(MyColor,MyColor)[SoBeB$HasMutualFriend+1],
     pch=".",cex=3)
MyBox(2,DotSpread,H3BTable[1,Every25T])
MyBox(1,DotSpread,H3BTable[2,Every25T])
axis(2,at=c(2,1),lab=c("Reciprocal\nFriendship",
                       "No Reciprocal\nFriendship"),
     las=2,cex.axis=1.3)
#title(main=paste0("Figure 3. Boxplot of Player Risk Score\n",
#                  "by Reciprocal Friendship"))
par(mar=c(5.1,4.1,4.1,2.1))
#savePlot(filename=paste0(Folder,"H3-20pct",sep=""),
#         type="jpg");
########################################
# Round the values in the table        #
#  for display purposes.               #
########################################
H3BTable$Pct = round(H3BTable$Pct,digits=2);
   H3BTable[,Every25T] = round(H3BTable[,Every25T],digits=2);
   H3BTable$pValue = round(H3BTable$pValue,digits=3);
   H3BTable$rEffectSize = round(H3BTable$rEffectSize,digits=3);
   H3BTable
########################################
# Do we reject based on p1*p2? No.     #
########################################
H3A$p.value * H3B$p.value
##################################################
# H4: Among players with reciprocal friendships, #
#     there will be statistically significant    #
#     differences on risk scores between those   #
#     who have successfully referred a friend    #
#     (HasMutualFriend=1,RAFBonusYes=1) and      #
#     those who have not (HasMutualFriend=1,     #
#     RAFBonusYes=1).                            #
##################################################
SoBe4B <- subset(SoBeB,SoBeB$HasMutualFriend>0);
H4B <- wilcox.test(RiskScore~RAFBonusYes,data=SoBe4B); H4B
   rFromWilcox(H4B,nSB)
   H4B$p.value
##################################################
# H5: Number of friend referrals, estimated by   #
#     RAFBonusCount/10, is associated with risk  #
#     score.                                     #
##################################################
# Check the Spearman correlations.     #
########################################
H5BTable <- data.frame(Q000=c(0,0),Q025=0,Q050=0,Q075=0,Q100=0);
   H5BTable[1,1:5] = quantile(SoBeB$RAFBonusCount,Every25N);
   H5BTable[2,1:5] = quantile(SoBeB$RiskScore,Every25N);
   H5BTable <- round(H5BTable,digits=2); H5BTable
H5B <- corr.test(SoBeB[,c("RiskScore","HasMutualFriend","HasRAF",
                          "RAFBonusCash","RAFBonusCount",
                          "RAFBonusYes")],
                 method="spearman"); H5B
   H5B$r[4,1]
   H5B$p[4,1]
############################################################
# We are working from                                      #
#  Bauer, P., & Köhne, K. (1994).                          #
#  Evaluation of Experiments with Adaptive Interim         #
#   Analyses.                                              #
#  Biometrics, 50(4), 1029–1041.                           #
# * We are using an overall significance level of          #
#   alpha = 0.05.                                          #
# * We stop and fail to reject if the first test run has a #
#   p-value above alpha1 = 0.10.                           #
# * Since we are combining the two test runs, whether we   #
#   reject the null might depend on whether or not the     #
#   product of the two test runs' p-values (p1*p2) is less #
#   than or equal to                                       #
#   c1 = exp(-0.5*qchisq(1-alpha,df=4)) = 0.008704941.     #
# * Since we have an "immediately fail to reject" value    #
#   alpha0, we need an "immediately reject" threshold      #
#   below which we will reject the null without doing the  #
#   second test.                                           #
# -- This value is alpha1 = 0.0425647.                     #
############################################################
# Lock in the initial constants.                 #
##################################################
alpha = 0.05;
alpha0 <- 0.10;
c1 <- exp(-0.5*qchisq(1-alpha,df=4)); c1
##################################################
# Calculate alpha1 using Equation 3              #
#  in the paper.                                 #
##################################################
Digits <- 6;
Test1 <- data.frame(alpha1=alpha0*(1:10^Digits)/10^Digits,
                    area=0);
Test1$area = Test1$alpha1 + c1*(log(alpha0)-log(Test1$alpha1));
Test1 <- subset(Test1,c1<Test1$alpha1);
Test1 <- subset(Test1,0.99*alpha<Test1$area);
Test1 <- subset(Test1,Test1$area<1.01*alpha);
plot(x=Test1$alpha1,y=Test1$area,pch=".",
     xlab="Threshold alpha1",ylab="Size of rejection region");
abline(h=alpha,col="red");
Scrap1 <- subset(Test1,Test1$area<alpha);
alpha1 <- Scrap1[nrow(Scrap1),1]; alpha1
text(x=quantile(Test1$alpha,0.55),y=quantile(Test1$area,0.45),
     col="red",pos=4,lab=paste0("alpha1: ",alpha1));
abline(v=alpha1,col="red");
##################################################
# Run a Monte Carlo simulation to see if the     #
#  rejection region has the size it's supposed   #
#  to.                                           #
##################################################
# Start with some graphics parameters. #
########################################
Mag <- 6;
Points <- 10^Mag;
Highlights <- 1000;
Curve <- data.frame(p1=(1:10^(Digits-2))/10^(Digits-2),p2=0);
   Curve$p2 = c1/Curve$p1;
   Curve <- subset(Curve,Curve$p1<=1);
   Curve <- subset(Curve,Curve$p2<=1);
Drop <- 0.01;
High <- 0.90;
Offset <- 0.05;
xCrit <- 0.20
yCrit <- High + Offset - (1:4)*Offset;
#Colors <- c("red","darkolivegreen","blue","black");
Colors <- c("gray65","gray45","black","black");
########################################
# Plot the unit square.                #
# Color in the rejection regions.      #
########################################
plot(x=c(0,0,1,1,0),y=c(0,1,1,0,0),type="l",
     xlab=expression(paste(p[1],
                           ": p-value from the exploratory sample")),
     ylab=expression(paste(p[2],
                          ": p-value from the confirmatory sample")),
    );
polygon(x=c(0,0,alpha1,alpha1),y=c(0,1,1,0),
        col=Colors[1],border=Colors[1]);
Scrap4 <- data.frame(p1=c(alpha0,alpha0,alpha1,alpha1),
                     p2=c(c1/alpha0,0,0,c1/alpha1));
Scrap3 <- subset(Curve,Curve$p1>alpha1);
   Scrap3 <- subset(Scrap3,Scrap3$p1<alpha0);
   Scrap3 <- rbind(Scrap3,Scrap4);
polygon(x=Scrap3$p1,y=Scrap3$p2,
        col=Colors[2],border=Colors[2]);
########################################
# Graph the borders of the             #
#  rejection regions.                  #
########################################
lines(x=c(alpha0,alpha0),y=c(0,1),col=Colors[2]);
lines(x=c(alpha1,alpha1),y=c(0,1),col=Colors[1]);
lines(x=Curve$p1,y=Curve$p2,col=Colors[3])
text(col=Colors[3],x=0.5,y=0.05,pos=4,
     lab=substitute(paste(p[1],p[2]," = ",Scrap2),
                    list(Scrap2=round(c1,digits=4))));
text(col=Colors[1],x=xCrit,y=yCrit[1],pos=4,
     lab=substitute(paste("reject the null if ",Scrap2 <= p[1],
                          " < ",Scrap3),
                    list(Scrap2=sprintf("%.3f",0),
                         Scrap3=sprintf("%.3f",alpha1)
                        )
                   )
    );
text(col=Colors[2],x=xCrit,y=yCrit[2],pos=4,
     lab=substitute(paste("reject the null if ",Scrap2 <= p[1],
                          " < ",Scrap3," and ",p[1],p[2] < Scrap4),
                    list(Scrap2=sprintf("%.3f",alpha1),
                         Scrap3=sprintf("%.3f",alpha0),
                         Scrap4=sprintf("%.4f",c1)
                        )
                   )
    );
text(col=Colors[3],x=xCrit,y=yCrit[3],pos=4,
     lab=substitute(paste("retain the null if ",Scrap2 <= p[1],
                          " < ",Scrap3," and ",p[1],p[2] >= Scrap4),
                    list(Scrap2=sprintf("%.3f",alpha1),
                         Scrap3=sprintf("%.3f",alpha0),
                         Scrap4=sprintf("%.4f",c1)
                        )
                   )
    );
text(col=Colors[3],x=xCrit,y=yCrit[4],pos=4,
     lab=substitute(paste("retain the null if ",Scrap2 <= p[1],
                          " < ",Scrap3),
                    list(Scrap2=sprintf("%.3f",alpha0),
                         Scrap3=sprintf("%.3f",1)
                        )
                   )
    );
#Height <- 1; while (Height > 0) {
#   lines(col=Colors[1],x=c(0,alpha1),y=c(Height,Height));
#   Height = Height - Drop;
#}
#Height <- c1/alpha1; while (Height > c1/alpha0) {
#   lines(col=Colors[3],x=c(alpha1,c1/Height),y=c(Height,Height));
#   Height <- Height - Drop;
#}
#while (Height > 0) {
#   lines(col=Colors[3],x=c(alpha1,alpha0),y=c(Height,Height));
#   Height <- Height - Drop;
#}
########################################
# Make random ordered pairs (p1,p2).   #
# Color the locations based            #
#  on where they land.                 #
########################################
#p1 <- runif(Points); p2 <- runif(Points);
#Zone <- rep(Colors[4],Points);
#   Zone[p1*p2<c1] = Colors[3];
#   Zone[alpha0<p1] = Colors[4];
#   Zone[p1<alpha1] = Colors[1];
#Success <- sum(Zone==Colors[1]) + sum(Zone==Colors[3]);
#   Proportion <- Success / Points;
#points(x=p1[1:Highlights],y=p2[1:Highlights],
#       pch=18,cex=0.25,col=Zone[1:Highlights]);
########################################
# Print the results of the simulation. #
########################################
#text(x=0.5,y=0.5,pos=4,col=Colors[4],
#     lab=paste0("Test run:\n",
#                "Rejection region: ",Success,"\n",
#                "Total p1/p2 pairs: ",sprintf("%.0f",Points),"\n",
#                "Resulting proportion: ",
#                round(Proportion,digits=5)));
########################################
# Add the locations for the five pairs #
#  of hypothesis tests.                #
########################################
Hypes <- data.frame(Hypothesis=c("H1","H2","H3","H4","H5"),
                    p1=c(H1A$p.value,H2A$p.value,H3A$p.value,
                         H4A$p.value,H5A$p[4,1]),
                    p2=c(H1B$p.value,H2B$p.value,H3B$p.value,
                         H4B$p.value,H5B$p[4,1]),
                    stringsAsFactors=FALSE);
points(x=Hypes$p1,y=Hypes$p2,pch=15);
text(x=Hypes$p1,y=Hypes$p2,lab=Hypes$Hypothesis,pos=4);
#savePlot(filename=paste0(Folder,"HypeLocations",sep=""),
#         type="jpg");