######################################################################
# Read in the data files.                                            #
# * After you download the data files, remember to change Folder so  #
#   that R can find the files.                                       #
######################################################################
# Read in Data1 with the codes for states and nations.     #
# * IDNumber is the integers used as ID numbers.           #
# -- "Unknown" corresponds to 0 or -1.                     #
# * Name is the name of the state, province, or nation.    #
# -- DraftKings has more states, provinces, and nations in #
#    their codebook. The ones listed here are a shorter    #
#    list for all the codes we need for this cohort.       #
# * Level tells us which name/number combinations are for  #
#   states and which ones are for nations.                 #
# -- For example, State #1 is Alabama, but Nation #1 is    #
#    the U.S.A.                                            #
# Data1S will have the codes for states and provinces.     #
# * Data1N will have the codes for nations.                #
############################################################
Folder <- "C:/Laboratory/DraftKings/Impacts/";
Data1 <- read.csv(file=paste0(Folder,"TacklingData1Codes.csv"),
                  header=TRUE,stringsAsFactors=FALSE);
Data1S <- subset(Data1,Data1$Level=="State");
Data1N <- subset(Data1,Data1$Level=="Nation");
############################################################
# Read in the data for geographic location, birth year,    #
#  and player risk score.                                  #
# * UserID are sequential numbers denoting the players in  #
#   this cohort. The ID numbers for this package are only  #
#   for this package. For example, UserID = 1 in this      #
#   package does not necesarily denote the same player as  #
#   UserID = 1 in the Social Behaviors package.            #
# * RegStateID lists who listed which state/province as    #
#   their state/province of residence.                     #
# * RegCountryID lists who listed which nation as          #
#   their nation of residence.                             #
# * BirthYear has the users' birth years, if known.        #
# -- If DraftKings did not provide a birth year, then      #
#    BirthYear = 0.                                        #
# * RiskScore is the Player Risk Score, based on the       #
#   contests the players entered during the 2014 NFL       #
#   season (between 2014-08-22 and 2015-01-25).            #
# -- RiskScore = 0 for free contests.                      #
# Data2All has all 12041 users.                            #
# * Data2Cut has the 10385 users in the analytic sample.   #
############################################################
Data2All <- read.csv(file=paste0(Folder,"TacklingData2Cohort.csv"),
                     header=TRUE,stringsAsFactors=FALSE);
   Data2Cut <- subset(Data2All,!is.na(Data2All$RiskScore));
   nD2A <- nrow(Data2All); nD2A
   nD2C <- nrow(Data2Cut); nD2C
############################################################
# Read in the metrics for players' entries into NFL-based  #
#  contests, non-NFL-based contests, and all contests      #
#  (Data3NFL, Data4Non, and Data5All, respectively).       #
# * Date1st is the start date for the first contest the    #
#   user entered.                                          #
# * DateLst is defined differently based on whether the    #
#   player has any activity after the 2014 NFL season.     #
# -- If the player has activity after the 2014 NFL season, #
#    then DateLst is 2015-01-25, the last date of          #
#    DraftKings' 2014 NFL season.                          #
# -- If the player only has activity during the 2014 NFL   #
#    season, then DateLst is the start date of the         #
#    player's last contest.                                #
# * nDays is the number of unique contest start dates.     #
# * nCont is the number of contests the player entered.    #
# * nEntries is the total number of entries over the       #
#   contests during the 2014 NFL season.                   #
# * nLineups is the total number of lineups over the       #
#   contests during the 2014 NFL season.                   #
# * TotFees is the total amount spent on entry fees.       #
# * AvgBuyIn is the average entry fee over the contests    #
#   the player entered.                                    #
# * TotWinnings is the player's total winnings.            #
# * nUserUp is the number of contests where the player     #
#   made a profit (i.e., the amount won in prizes exceeds  #
#   the amount spent on entry fees).                       #
# * UserID is the ID number that links this data to the    #
#   demographic data in Data2All and Data2Cut.             #
############################################################
Data3NFL <- read.csv(file=paste0(Folder,"TacklingData3NFL.csv"),
                     header=TRUE,stringsAsFactors=FALSE);
Data4Not <- read.csv(file=paste0(Folder,"TacklingData4Not.csv"),
                     header=TRUE,stringsAsFactors=FALSE);
Data5All <- read.csv(file=paste0(Folder,"TacklingData5All.csv"),
                     header=TRUE,stringsAsFactors=FALSE);
############################################################
# Read in data on the types of contests the players        #
#  entered, both in terms of sport and payout structure.   #
# * UserID is the ID number that links this data to the    #
#   demographic data in Data2All and Data2Cut.             #
# * DidNFL is TRUE if the player played in at least one    #
#   NFL-based contest.                                     #
# * DidNBA is TRUE if the player played in at least one    #
#   NBA-based contest.                                     #
# * DidOth is TRUE if the player played in at least one    #
#   contest based on some other league.                    #
# * Cnt1 has the numbers of 50/50 contests.                #
# * Cnt2 has the numbers of head-to-head contests.         #
# * Cnt3 has the numbers of multiplier contests.           #
# * Cnt4 has the numbers of league contests.               #
# * Cnt5 has the numbers of "move your way up" contests.   #
# * Cnt6 has the numbers of other contests.                #
############################################################
Data6Play <- read.csv(file=paste0(Folder,"TacklingData6Play.csv"),
                      header=TRUE,stringsAsFactors=FALSE);
######################################################################
# Get the results for the "Player Characteristics" section of the    #
#  paper.                                                            #
######################################################################
# Among the initial sample of 12,041 daily fantasy sports  #
#  players who deposited money with DraftKings between     #
#  August 1st and September 30th, 2014, 86.2% (10,385)     #
#  made at least one paid entry into an NFL contest during #
#  the 2014 NFL season.                                    #
############################################################
round(100*nD2C/nD2A,digits=1)
nD2C
############################################################
# In their records, DraftKings had birth years for 3,990   #
#  out of the 12,041, and 3,795 out of the 10,385.         #
# To the nearest tenth of a year, the ranges and mean of   #
#  the birth years were the same for the 3,990 and the     #
#  3,795 -- 1932 to 1999, with a mean of 1980.5 and a      #
#  standard deviation of 9.4 years.                        #
############################################################
Scrap1 <- data.frame(Cohort=rep(0,6),AnalyticSample=0);
   Scrap2 <- subset(Data2All,Data2All$BirthYear>0);
   Scrap3 <- subset(Scrap2,!is.na(Scrap2$RiskScore));
   Scrap1[1,1] = nrow(Scrap2);
   Scrap1[2,1] = nD2A;
   Scrap1[3,1] = min(Scrap2$BirthYear);
   Scrap1[4,1] = max(Scrap2$BirthYear);
   Scrap1[5,1] = mean(Scrap2$BirthYear);
   Scrap1[6,1] = sd(Scrap2$BirthYear);
   Scrap1[1,2] = nrow(Scrap3);
   Scrap1[2,2] = nD2C;
   Scrap1[3,2] = min(Scrap3$BirthYear);
   Scrap1[4,2] = max(Scrap3$BirthYear);
   Scrap1[5,2] = mean(Scrap3$BirthYear);
   Scrap1[6,2] = sd(Scrap3$BirthYear);
   rownames(Scrap1) =c("Count","All","Min","Max","Mean","SD");
round(Scrap1,digits=1)
#       Cohort AnalyticSample
#Count  3990.0         3795.0
#All   12041.0        10385.0
#Min    1932.0         1932.0
#Max    1999.0         1999.0
#Mean   1980.5         1980.5
#SD        9.4            9.4
############################################################
# When they initially registered, players had the          #
#  opportunity to list a country and a state.              #
# * All 12,041 players were from North America.            #
# * Almost all of the 12,041 listed either the United      #
#   States or Canada.                                      #
# * Of the 10,385 in Data2Cut, 9,815 listed the United     #
#   States, 180 listed Canada,                             #
#   and 390 did not provide a country.                     #
# * Of the other 1,656, 1,534 listed the United States,    #
#   80 listed Canada, and 42 did not provide a country.    #
############################################################
Scrap1 <- cbind(xtabs(rep(1,nD2C)~Data2Cut$RegCountryID),
                c(0,0,0),
                xtabs(rep(1,nD2A)~Data2All$RegCountryID))
   Scrap1[1:3,2] = Scrap1[1:3,3] - Scrap1[1:3,1];
   colnames(Scrap1) = c("InD2C","NotD2C","Total");
   rownames(Scrap1) = c("None","USA","CAN");
   Scrap1 <- Scrap1[c(2,3,1),]; Scrap1
#      InD2C NotD2C Total
# USA   9815   1534 11349
# CAN    180     80   260
# None   390     42   432
############################################################
# Most listed the United States (11,349 out of the 12,041; #
#  9,815 out of the 10,385).                               #
# * They were distributed across 49 states (no one listed  #
#   Montana), the District of Columbia, and armed forces   #
#   stations in the Americas, Europe, and the Pacific.     #
# * Looking at the 9,815, the states with the most players #
#   were California (9.0%), Texas (9.0%), and New York     #
#   (6.9%).                                                #
############################################################
PlayUSA <- subset(Data2All,Data2All$RegCountryID==1)
   nUSA <- length(PlayUSA$UserID);
   nStates <- length(unique(PlayUSA$RegStateID));
Scrap1 <- xtabs(rep(1,nUSA)~(!is.na(PlayUSA$RiskScore))
                            +PlayUSA$RegStateID);
   StateTab <- data.frame(#Name=rep("a",nStates+1),
                          IDNumber=rep(0,nStates+1),
                          AnaInCnt=rep(0,nStates+1),
                          AnaInPct=rep(0,nStates+1),
                          AnaOutCnt=rep(0,nStates+1),
                          AnaOutPct=rep(0,nStates+1),
                          AnaTotCnt=rep(0,nStates+1),
                          AnaTotPct=rep(0,nStates+1),
                          stringsAsFactors=FALSE);
   StateTab$AnaInCnt[1:nStates] = Scrap1[2,];
   StateTab$AnaOutCnt[1:nStates] = Scrap1[1,];
   StateTab$IDNumber[1:nStates] = sort(unique(PlayUSA$RegStateID));
   StateTab <- merge(StateTab,Data1S[,c("IDNumber","Name")],
                     by=c("IDNumber"),all.x=TRUE);
   StateTab$Name[1] = "Total";
   StateTab$AnaInCnt[1] = -1;
   StateTab <- StateTab[,c(8,2,3,4,5,6,7,1)];
   StateTab <- StateTab[order(-StateTab$AnaInCnt),];
   StateTab$AnaInCnt[nStates+1] = sum(StateTab$AnaInCnt[1:nStates]);
   StateTab$AnaOutCnt[nStates+1] = sum(StateTab$AnaOutCnt[1:nStates]);
   StateTab$AnaTotCnt = StateTab$AnaInCnt + StateTab$AnaOutCnt;
   StateTab$AnaInPct = 100 * StateTab$AnaInCnt /
                                         StateTab$AnaInCnt[nStates+1];
   StateTab$AnaOutPct = 100 * StateTab$AnaOutCnt /
                                        StateTab$AnaOutCnt[nStates+1];
   StateTab$AnaTotPct = 100 * StateTab$AnaTotCnt /
                                        StateTab$AnaTotCnt[nStates+1];
   StateTab$AnaInPct = round(StateTab$AnaInPct,digits=1);
   StateTab$AnaOutPct = round(StateTab$AnaOutPct,digits=1);
   StateTab$AnaTotPct = round(StateTab$AnaTotPct,digits=1);
   StateTab
#                      Ana   Ana   Ana    Ana    Ana    Ana
#                Name InCnt InPct OutCnt OutPct TotCnt TotPct IDNumber
#          California   885   9.0    187   12.2   1072    9.4        5
#               Texas   885   9.0    148    9.6   1033    9.1       44
#            New York   680   6.9    104    6.8    784    6.9       33
#       Massachusetts   558   5.7     61    4.0    619    5.5       22
#             Florida   530   5.4     75    4.9    605    5.3       10
#        Pennsylvania   523   5.3     63    4.1    586    5.2       39
#            Illinois   511   5.2     81    5.3    592    5.2       14
#          New Jersey   413   4.2     53    3.5    466    4.1       31
#                Ohio   395   4.0     39    2.5    434    3.8       36
#             Alabama   321   3.3     95    6.2    416    3.7        1
#            Michigan   296   3.0     46    3.0    342    3.0       23
#ArmedForces:Americas   272   2.8     28    1.8    300    2.6       65
#             Georgia   239   2.4     36    2.3    275    2.4       11
#            Virginia   239   2.4     44    2.9    283    2.5       47
#            Maryland   237   2.4     29    1.9    266    2.3       21
#      North Carolina   218   2.2     28    1.8    246    2.2       34
#            Colorado   215   2.2     31    2.0    246    2.2        6
#           Minnesota   210   2.1     43    2.8    253    2.2       24
#             Indiana   198   2.0     26    1.7    224    2.0       15
#           Wisconsin   196   2.0     28    1.8    224    2.0       50
#            Missouri   162   1.7     22    1.4    184    1.6       26
#           Tennessee   156   1.6     22    1.4    178    1.6       43
#            Kentucky   133   1.4     22    1.4    155    1.4       18
#         Connecticut   132   1.3     18    1.2    150    1.3        7
#      South Carolina   105   1.1     14    0.9    119    1.0       41
#              Nevada    99   1.0     20    1.3    119    1.0       29
#            Oklahoma    95   1.0     10    0.7    105    0.9       37
#              Kansas    81   0.8     13    0.8     94    0.8       17
#                Utah    71   0.7      7    0.5     78    0.7       45
#              Oregon    70   0.7     29    1.9     99    0.9       38
#       New Hampshire    68   0.7      7    0.5     75    0.7       30
#            Arkansas    63   0.6     13    0.8     76    0.7        4
#            Nebraska    58   0.6     12    0.8     70    0.6       28
#        Rhode Island    54   0.6      6    0.4     60    0.5       40
#         Mississippi    52   0.5      5    0.3     57    0.5       25
#        North Dakota    51   0.5      8    0.5     59    0.5       35
#              Hawaii    40   0.4      9    0.6     49    0.4       12
#               Maine    40   0.4      4    0.3     44    0.4       20
#       West Virginia    39   0.4      2    0.1     41    0.4       49
#          New Mexico    38   0.4      6    0.4     44    0.4       32
#        South Dakota    38   0.4      2    0.1     40    0.4       42
#            Delaware    34   0.3      6    0.4     40    0.4        8
#               Idaho    29   0.3      9    0.6     38    0.3       13
#District of Columbia    26   0.3      6    0.4     32    0.3        9
#             Vermont    16   0.2      1    0.1     17    0.1       46
#             Wyoming    16   0.2      0    0.0     16    0.1       51
#              Alaska    14   0.1      3    0.2     17    0.1        2
# ArmedForces: Europe     7   0.1      1    0.1      8    0.1       66
#ArmedForces: Pacific     3   0.0      0    0.0      3    0.0       67
#                Iowa     2   0.0      0    0.0      2    0.0       16
#           Louisiana     1   0.0      4    0.3      5    0.0       19
#          Washington     1   0.0      3    0.2      4    0.0       48
#             Arizona     0   0.0      5    0.3      5    0.0        3
#               Total  9815 100.0   1534  100.0  11349  100.0        0
############################################################
# A few players listed Canada (260 out of the 12,041;      #
#  180 out of the 10,385).                                 #
############################################################
PlayCAN <- subset(Data2All,Data2All$RegCountryID==2)
   nCAN <- length(PlayCAN$UserID);
   nProv <- length(unique(PlayCAN$RegStateID));
Scrap1 <- xtabs(rep(1,nCAN)~(!is.na(PlayCAN$RiskScore))
                            +PlayCAN$RegStateID);
   StateTab <- data.frame(IDNumber=rep(0,nProv+1),
                          AnaInCnt=rep(0,nProv+1),
                          AnaInPct=rep(0,nProv+1),
                          AnaOutCnt=rep(0,nProv+1),
                          AnaOutPct=rep(0,nProv+1),
                          AnaTotCnt=rep(0,nProv+1),
                          AnaTotPct=rep(0,nProv+1),
                          stringsAsFactors=FALSE);
   StateTab$AnaInCnt[1:nProv] = Scrap1[2,];
   StateTab$AnaOutCnt[1:nProv] = Scrap1[1,];
   StateTab$IDNumber[1:nProv] = sort(unique(PlayCAN$RegStateID));
   StateTab <- merge(StateTab,Data1S[,c("IDNumber","Name")],
                     by=c("IDNumber"),all.x=TRUE);
   StateTab$Name[1] = "Total";
   StateTab$AnaInCnt[1] = -1;
   StateTab <- StateTab[,c(8,2,3,4,5,6,7,1)];
   StateTab <- StateTab[order(-StateTab$AnaInCnt),];
   StateTab$AnaInCnt[nProv+1] = sum(StateTab$AnaInCnt[1:nProv]);
   StateTab$AnaOutCnt[nProv+1] = sum(StateTab$AnaOutCnt[1:nProv]);
   StateTab$AnaTotCnt = StateTab$AnaInCnt + StateTab$AnaOutCnt;
   StateTab$AnaInPct = 100 * StateTab$AnaInCnt /
                                         StateTab$AnaInCnt[nProv+1];
   StateTab$AnaOutPct = 100 * StateTab$AnaOutCnt /
                                        StateTab$AnaOutCnt[nProv+1];
   StateTab$AnaTotPct = 100 * StateTab$AnaTotCnt /
                                        StateTab$AnaTotCnt[nProv+1];
   StateTab$AnaInPct = round(StateTab$AnaInPct,digits=1);
   StateTab$AnaOutPct = round(StateTab$AnaOutPct,digits=1);
   StateTab$AnaTotPct = round(StateTab$AnaTotPct,digits=1);
   StateTab
#                  Ana   Ana   Ana    AnA    Ana    Ana
#            Name InCnt InPct OutCnt OutPct TotCnt TotPct IDNumber
#         Ontario    82  45.6     47   58.8    129   49.6       60
#British Columbia    38  21.1     14   17.5     52   20.0       53
#         Alberta    34  18.9      9   11.2     43   16.5       52
#        Manitoba    10   5.6      0    0.0     10    3.8       54
#   New Brunswick     6   3.3      1    1.2      7    2.7       55
#     Nova Scotia     6   3.3      1    1.2      7    2.7       58
#    Saskatchewan     4   2.2      7    8.8     11    4.2       63
#          Quebec     0   0.0      1    1.2      1    0.4       62
#           Total   180 100.0     80  100.0    260  100.0        0
######################################################################
# We start with Data3NFL, Data4Not, and Data5All, and then calculate #
#  more metrics based on the data we read in.                        #
# * Duration is DateLst minus Date1st plus 1.                        #
# * Freq = 100*nDays/Duration is the percentage of days in Duration  #
#   where at least one of the player's contests started.             #
# * ContPED = nCont/nDays is the number of contests per entry day.   #
# * EntPCont = nEntries/nCont is the average number of entries       #
#   per contest.                                                     #
# * FeesPCont = TotFees/nCont is the average amount spent on entry   #
#   fees per contest.                                                #
# * LineupPE = nLineups/nEntries is the number of distinct lineups   #
#   per entry. For most, it'll be 1.00. For others, it'll be less.   #
# * FeesPDay = TotFees/nDays is the average amount the player spends #
#   on a day he actually plays.                                      #
# * NetLoss = TotFees-TotWinnings is the total amount lost over the  #
#   2004 NFL season.                                                 #
# * PctLoss = 100*NetLoss/TotFees tells us what percent of the total #
#   entry fees the player lost.                                      #
# * PctProfit = 100*nUserUp/nCont is the percentage of contests      #
#   where the user made a profit (meaning that the total prizes      #
#   were greater than the total amount paid in entry fees.           #
# Make Table 1 and Table 2.                                          #
######################################################################
For1Pt1 <- data.frame(nDays=Data3NFL$nDays,
                      Duration=difftime(time1=Data3NFL$DateLst,
                                        time2=Data3NFL$Date1st,
                                        units=c("days")),
                      Freq=0,
                      nCont=Data3NFL$nCont,
                      ContPED=Data3NFL$nCont/Data3NFL$nDays,
                      EntPCont=Data3NFL$nEntries/Data3NFL$nCont,
                      FeesPCont=Data3NFL$TotFees/Data3NFL$nCont,
                      AvgBuyIn=Data3NFL$AvgBuyIn,
                      LineupPE=Data3NFL$nLineups/Data3NFL$nEntries,
                      TotFees=Data3NFL$TotFees,
                      FeesPDay=Data3NFL$TotFees/Data3NFL$nDays,
                      NetLoss=Data3NFL$TotFees-Data3NFL$TotWinnings,
                      PctLoss=0,
                      PctProfit=100*Data3NFL$nUserUp/Data3NFL$nCont,
                      UserID=Data3NFL$UserID,
                      stringsAsFactors=FALSE);
   For1Pt1$Duration = round(as.numeric(For1Pt1$Duration),digits=0);
   For1Pt1$Duration = For1Pt1$Duration + 1;
   For1Pt1$Freq = 100 * For1Pt1$nDays / For1Pt1$Duration;
   For1Pt1$PctLoss = 100 * For1Pt1$NetLoss / For1Pt1$TotFees;
Data4Not <- read.csv(file=paste0(Folder,"TacklingData4Not.csv"),
                     header=TRUE,stringsAsFactors=FALSE);
For1Pt2 <- data.frame(nDays=Data4Not$nDays,
                      Duration=difftime(time1=Data4Not$DateLst,
                                        time2=Data4Not$Date1st,
                                        units=c("days")),
                      Freq=0,
                      nCont=Data4Not$nCont,
                      ContPED=Data4Not$nCont/Data4Not$nDays,
                      EntPCont=Data4Not$nEntries/Data4Not$nCont,
                      FeesPCont=Data4Not$TotFees/Data4Not$nCont,
                      AvgBuyIn=Data4Not$AvgBuyIn,
                      LineupPE=Data4Not$nLineups/Data4Not$nEntries,
                      TotFees=Data4Not$TotFees,
                      FeesPDay=Data4Not$TotFees/Data4Not$nDays,
                      NetLoss=Data4Not$TotFees-Data4Not$TotWinnings,
                      PctLoss=0,
                      PctProfit=100*Data4Not$nUserUp/Data4Not$nCont,
                      UserID=Data4Not$UserID,
                      stringsAsFactors=FALSE);
   For1Pt2$Duration = round(as.numeric(For1Pt2$Duration),digits=0);
   For1Pt2$Duration = For1Pt2$Duration + 1;
   For1Pt2$Freq = 100 * For1Pt2$nDays / For1Pt2$Duration;
   For1Pt2$PctLoss = 100 * For1Pt2$NetLoss / For1Pt2$TotFees;
Data5All <- read.csv(file=paste0(Folder,"TacklingData5All.csv"),
                     header=TRUE,stringsAsFactors=FALSE);
For1Pt3 <- data.frame(nDays=Data5All$nDays,
                      Duration=difftime(time1=Data5All$DateLst,
                                        time2=Data5All$Date1st,
                                        units=c("days")),
                      Freq=0,
                      nCont=Data5All$nCont,
                      ContPED=Data5All$nCont/Data5All$nDays,
                      EntPCont=Data5All$nEntries/Data5All$nCont,
                      FeesPCont=Data5All$TotFees/Data5All$nCont,
                      AvgBuyIn=Data5All$AvgBuyIn,
                      LineupPE=Data5All$nLineups/Data5All$nEntries,
                      TotFees=Data5All$TotFees,
                      FeesPDay=Data5All$TotFees/Data5All$nDays,
                      NetLoss=Data5All$TotFees-Data5All$TotWinnings,
                      PctLoss=0,
                      PctProfit=100*Data5All$nUserUp/Data5All$nCont,
                      UserID=Data5All$UserID,
                      stringsAsFactors=FALSE);
   For1Pt3$Duration = round(as.numeric(For1Pt3$Duration),digits=0);
   For1Pt3$Duration = For1Pt3$Duration + 1;
   For1Pt3$Freq = 100 * For1Pt3$nDays / For1Pt3$Duration;
   For1Pt3$PctLoss = 100 * For1Pt3$NetLoss / For1Pt3$TotFees;
EpiTab1 <- data.frame(VarLab=c("Number of Active Days",
                               "Duration -- Numbers of Days",
                               "Percentage of Entry Days",
                               "Number of Contests",
                               "Contests Per Entry Day",
                               "Entries Per Contest",
                               "Entry Fees Per Contest",
                               "Average Entry Fee Size",
                               "Distinct Lineups Per Entry",
                               "Total Entry Fees",
                               "Entry Fees Per Day",
                               "Net Loss",
                               "Percent Lost",
                               "Percent of Contests Won"),
                      NFLMeans=rep(0,14),
                      NFLSDs=rep(0,14),
                      NFLP50s=rep(0,14),
                      OthMeans=rep(0,14),
                      OthSDs=rep(0,14),
                      OthP50s=rep(0,14),
                      AllMeans=rep(0,14),
                      AllSDs=rep(0,14),
                      AllP50s=rep(0,14),
                      stringsAsFactors=FALSE);
i1 <- 1; while (i1 <= 14) {
   EpiTab1$NFLMeans[i1] = mean(For1Pt1[,i1]);
   EpiTab1$NFLSDs[i1] = sd(For1Pt1[,i1]);
   EpiTab1$NFLP50s[i1] = median(For1Pt1[,i1]);
   EpiTab1$OthMeans[i1] = mean(For1Pt2[,i1],na.rm=TRUE);
   EpiTab1$OthSDs[i1] = sd(For1Pt2[,i1],na.rm=TRUE);
   EpiTab1$OthP50s[i1] = median(For1Pt2[,i1],na.rm=TRUE);
   EpiTab1$AllMeans[i1] = mean(For1Pt3[,i1]);
   EpiTab1$AllSDs[i1] = sd(For1Pt3[,i1]);
   EpiTab1$AllP50s[i1] = median(For1Pt3[,i1]);
   i1 = i1 + 1;
}
Scrap1 <- EpiTab1; Scrap1[,2:10] = round(Scrap1[,2:10],digits=1);
Scrap1[,c(1,2,3,4)]
#                        VarLab NFLMeans NFLSDs NFLP50s
#1        Number of Active Days     11.4    9.7     8.0
#2  Duration -- Numbers of Days    109.3   47.1   133.0
#3     Percentage of Entry Days     16.2   21.0    10.6
#4           Number of Contests     35.4   82.8    16.0
#5       Contests Per Entry Day      2.7    3.3     2.0
#6          Entries Per Contest      1.2    0.6     1.0
#7       Entry Fees Per Contest      8.8   22.5     4.5
#8       Average Entry Fee Size      7.6   20.6     4.1
#9   Distinct Lineups Per Entry      1.0    0.1     1.0
#10            Total Entry Fees    369.5 3668.7    71.5
#11          Entry Fees Per Day     22.9  118.3     9.7
#12                    Net Loss    -25.1 9894.4    28.0
#13                Percent Lost     46.3  142.3    55.0
#14     Percent of Contests Won     18.9   16.6    17.6
Scrap1[,c(1,5,6,7)]
#                        VarLab OthMeans OthSDs OthP50s
#1        Number of Active Days     15.3   23.0     5.0
#2  Duration -- Numbers of Days     91.2   54.1   109.0
#3     Percentage of Entry Days     28.5   33.4    12.2
#4           Number of Contests     41.9  139.1     7.0
#5       Contests Per Entry Day      1.9    2.1     1.3
#6          Entries Per Contest      1.1    0.4     1.0
#7       Entry Fees Per Contest      5.3   15.3     2.1
#8       Average Entry Fee Size      4.8   13.3     2.0
#9   Distinct Lineups Per Entry      1.0    0.1     1.0
#10            Total Entry Fees    297.4 2057.5    19.0
#11          Entry Fees Per Day     10.7   33.3     3.3
#12                    Net Loss     46.5  764.6     8.2
#13                Percent Lost     50.4  136.7    70.0
#14     Percent of Contests Won     15.2   19.5    10.7
Scrap1[,c(1,8,9,10)]
#                        VarLab AllMeans AllSDs AllP50s
#1        Number of Active Days     18.0   21.6    10.0
#2  Duration -- Numbers of Days    114.4   45.0   134.0
#3     Percentage of Entry Days     19.8   22.6    12.3
#4           Number of Contests     56.7  155.3    20.0
#5       Contests Per Entry Day      2.6    3.2     2.0
#6          Entries Per Contest      1.2    0.5     1.0
#7       Entry Fees Per Contest      8.4   21.9     4.3
#8       Average Entry Fee Size      7.3   20.1     4.0
#9   Distinct Lineups Per Entry      1.0    0.1     1.0
#10            Total Entry Fees    520.0 4154.0    87.0
#11          Entry Fees Per Day     21.4  115.6     9.2
#12                    Net Loss     -1.6 9895.9    30.7
#13                Percent Lost     46.5  124.3    53.0
#14     Percent of Contests Won     18.6   15.3    17.6
EpiTab2 = round(cor(For1Pt3[,1:14],method="spearman"),digits=2);
EpiTab2[,1:7]
#          nDays Duration  Freq nCont ContPED EntPCont FeesPCont
#nDays      1.00     0.40  0.56  0.92    0.24     0.42     -0.01
#Duration   0.40     1.00 -0.23  0.36    0.05     0.13     -0.01
#Freq       0.56    -0.23  1.00  0.53    0.21     0.28     -0.03
#nCont      0.92     0.36  0.53  1.00    0.56     0.50     -0.06
#ContPED    0.24     0.05  0.21  0.56    1.00     0.39     -0.12
#EntPCont   0.42     0.13  0.28  0.50    0.39     1.00      0.08
#FeesPCont -0.01    -0.01 -0.03 -0.06   -0.12     0.08      1.00
#AvgBuyIn  -0.06    -0.02 -0.06 -0.10   -0.15    -0.05      0.98
#LineupPE  -0.26    -0.08 -0.19 -0.36   -0.37    -0.55      0.02
#TotFees    0.77     0.30  0.41  0.80    0.40     0.47      0.51
#FeesPDay   0.08     0.01  0.07  0.22    0.38     0.26      0.84
#NetLoss    0.47     0.17  0.25  0.50    0.27     0.34      0.42
#PctLoss   -0.53    -0.23 -0.26 -0.53   -0.21    -0.22     -0.08
#PctProfit  0.39     0.19  0.14  0.38    0.13     0.10      0.18
EpiTab2[,8:14]
#         AvgBuyIn LineupPE TotFees FeesPDay NetLoss PctLoss PctProfit
#nDays       -0.06    -0.26    0.77     0.08    0.47   -0.53      0.39
#Duration    -0.02    -0.08    0.30     0.01    0.17   -0.23      0.19
#Freq        -0.06    -0.19    0.41     0.07    0.25   -0.26      0.14
#nCont       -0.10    -0.36    0.80     0.22    0.50   -0.53      0.38
#ContPED     -0.15    -0.37    0.40     0.38    0.27   -0.21      0.13
#EntPCont    -0.05    -0.55    0.47     0.26    0.34   -0.22      0.10
#FeesPCont    0.98     0.02    0.51     0.84    0.42   -0.08      0.18
#AvgBuyIn     1.00     0.08    0.46     0.80    0.39   -0.06      0.17
#LineupPE     0.08     1.00   -0.29    -0.17   -0.20    0.13     -0.05
#TotFees      0.46    -0.29    1.00     0.67    0.69   -0.51      0.45
#FeesPDay     0.80    -0.17    0.67     1.00    0.52   -0.17      0.23
#NetLoss      0.39    -0.20    0.69     0.52    1.00    0.09      0.01
#PctLoss     -0.06     0.13   -0.51    -0.17    0.09    1.00     -0.82
#PctProfit    0.17    -0.05    0.45     0.23    0.01   -0.82      1.00
######################################################################
# Get the data on participation by sport league and by contest type. #
######################################################################
# Construct Table 3 with the leagues.                      #
############################################################
Data6Play <- read.csv(file=paste0(Folder,"TacklingData6Play.csv"),
                      header=TRUE,stringsAsFactors=FALSE);
Scrap1 <- Data6Play[,c(1:4)];
Scrap1$Group = 1 + 1*(Scrap1$DidNBA) + 2*(Scrap1$DidOth);
EpiTab3 <- data.frame(Sport=c("NFL only","NFL & NBA only",
                              "NFL & Other only","NFL & NBA & Other",
                              "Total"),
                      Count=0,Percentage=0,
                      stringsAsFactors=FALSE);
   EpiTab3$Count[1:4] = xtabs(rep(1,nD2C)~Scrap1$Group);
   EpiTab3$Count[5] = nD2C;
   EpiTab3$Percentage[1:5] = 100 * EpiTab3$Count / nD2C;
Scrap1 <- EpiTab3;
   Scrap1$Percentage <- round(Scrap1$Percentage,digits=1); Scrap1
#             Sport Count Percentage
#          NFL only  5131       49.4
#    NFL & NBA only   825        7.9
#  NFL & Other only  2189       21.1
# NFL & NBA & Other  2240       21.6
#             Total 10385      100.0
############################################################
# Construct Table 4 with the payout structure types.       #
# * We were unable to recreate the exact numbers of users  #
#   in the "Number with more than half of their contests   #
#   within contest type" column.                           #
# -- The differences were so small that the resulting      #
#    percentage (rounded to the nearest tenth of a         #
#    percentage point) did not change.                     #
# -- The column should have said "at least half" rather    #
#    than "more than half."                                #
#                       Class Manuscript This Run          #
# 50/50 (except Head-to-Head)        727      724          #
#                Head-to-Head        395      397          #
#                  Multiplier        238      234          #
#       League (except 50/50)        301      296          #
#            Move your way up        156      154          #
#              Other contests       6761     6757          #
############################################################
Scrap1 <- Data6Play[,c(1,5:10)];
   Scrap1$CntT <- apply(Scrap1[,2:7],FUN=sum,1);
   Scrap1$CntM = apply(Scrap1[,2:6],1,max);
   Scrap1$Pct1 = Scrap1$Cnt1 / Scrap1$CntT;
   Scrap1$Pct2 = Scrap1$Cnt2 / Scrap1$CntT;
   Scrap1$Pct3 = Scrap1$Cnt3 / Scrap1$CntT;
   Scrap1$Pct4 = Scrap1$Cnt4 / Scrap1$CntT;
   Scrap1$Pct5 = Scrap1$Cnt5 / Scrap1$CntT;
   Scrap1$Pct6 = Scrap1$Cnt6 / Scrap1$CntT;
Scrap1$Pref = 7;
   Scrap2 = 1.1 * (Scrap1$Cnt1==Scrap1$CntM);
   Scrap2 = Scrap2 + 2.1 * (Scrap1$Cnt2==Scrap1$CntM);
   Scrap2 = Scrap2 + 3.1 * (Scrap1$Cnt3==Scrap1$CntM);
   Scrap2 = Scrap2 + 4.1 * (Scrap1$Cnt4==Scrap1$CntM);
   Scrap2 = Scrap2 + 5.1 * (Scrap1$Cnt5==Scrap1$CntM);
   Scrap2 = Scrap2 + 6.1 * (Scrap1$Cnt6==Scrap1$CntM);
   Scrap1$Pref = Scrap1$Pref - 6*(Scrap2==1.1);
   Scrap1$Pref = Scrap1$Pref - 5*(Scrap2==2.1);
   Scrap1$Pref = Scrap1$Pref - 4*(Scrap2==3.1);
   Scrap1$Pref = Scrap1$Pref - 3*(Scrap2==4.1);
   Scrap1$Pref = Scrap1$Pref - 2*(Scrap2==5.1);
   Scrap1$Pref = Scrap1$Pref - 1*(Scrap2==6.1);
EpiTab4 <- data.frame(Class=c("50/50 (except Head-to-Head)",
                              "Head-to-Head",
                              "Multiplier",
                              "League (except 50/50)",
                              "Move your way up",
                              "Other contests"),
                      AtLeastOnceCnt=rep(0,6),
                      AtLeastOncePct=rep(0,6),
                      AtLeastHalfCnt=rep(0,6),
                      AtLeastHalfPct=rep(0,6),
                      stringsAsFactors=FALSE);
   EpiTab4$AtLeastOnceCnt[1] = sum(Scrap1$Cnt1>0);
   EpiTab4$AtLeastOnceCnt[2] = sum(Scrap1$Cnt2>0);
   EpiTab4$AtLeastOnceCnt[3] = sum(Scrap1$Cnt3>0);
   EpiTab4$AtLeastOnceCnt[4] = sum(Scrap1$Cnt4>0);
   EpiTab4$AtLeastOnceCnt[5] = sum(Scrap1$Cnt5>0);
   EpiTab4$AtLeastOnceCnt[6] = sum(Scrap1$Cnt6>0);
   EpiTab4$AtLeastOncePct = 100 * EpiTab4$AtLeastOnceCnt / nD2C;
   EpiTab4$AtLeastHalfCnt[1] = sum(Scrap1$Pct1>=0.5);
   EpiTab4$AtLeastHalfCnt[2] = sum(Scrap1$Pct2>=0.5);
   EpiTab4$AtLeastHalfCnt[3] = sum(Scrap1$Pct3>=0.5);
   EpiTab4$AtLeastHalfCnt[4] = sum(Scrap1$Pct4>=0.5);
   EpiTab4$AtLeastHalfCnt[5] = sum(Scrap1$Pct5>=0.5);
   EpiTab4$AtLeastHalfCnt[6] = sum(Scrap1$Pct6>=0.5);
   EpiTab4$AtLeastHalfPct = 100 * EpiTab4$AtLeastHalfCnt / nD2C;
Scrap3 <- EpiTab4;
   Scrap3[,2:5] = round(Scrap3[,2:5],digits=1); Scrap3
#                              AtLeast AtLeast AtLeast AtLeast
#                        Class OnceCnt OncePct HalfCnt HalfPct
#1 50/50 (except Head-to-Head)    6034    58.1     724     7.0
#2                Head-to-Head    3676    35.4     397     3.8
#3                  Multiplier    5111    49.2     234     2.3
#4       League (except 50/50)    5367    51.7     296     2.9
#5            Move your way up    5861    56.4     154     1.5
#6              Other contests    9962    95.9    6757    65.1
######################################################################
# Use Data5All to partition off and work with the groups of most     #
#  involved players.                                                 #
# * Make the Venn diagram with the information from Table 3.         #
######################################################################
# Order the players in the analytic sample by TotFees, and #
#  then get the sums for a centile histogram plot.         #
############################################################
Data5All <- Data5All[order(Data5All$TotFees),];
Scrap1 <- data.frame(Centile=ceiling(100*(1:nD2C)/nD2C),
                     TotFees=Data5All$TotFees);
Scrap2 <- xtabs(TotFees~Centile,data=Scrap1);
plot(x=1:100,y=as.numeric(Scrap2),
     main=paste0("Fig. 1. Total entry fees by centile groups, ",
                 "2014 NFL Season"),
     xlab="Percentile",ylab="Total paid in entry fees",
     pch="*",col="red");
############################################################
# Create the data set for the last few tables.             #
# * Merge For1Pt3 and Data2Cut$RiskScore.                  #
# * Create columns T01nC, T01NL, and T01EF to note who is  #
#   in the top 1% by nCont, NetLoss, and TotFees,          #
#   respectively.                                          #
############################################################
nTop01 <- ceiling(nD2C/100);
nBot99 <- nD2C - nTop01;
For5Pt0 <- merge(For1Pt3,Data2Cut[,c("UserID","RiskScore")],
                 all.x=TRUE,all.y=TRUE,by=c("UserID"));
   For5Pt0 <- For5Pt0[order(-For5Pt0$nCont),];
   For5Pt0$T01nC <- rep(c(1,0),c(nTop01,nBot99));
   For5Pt0 <- For5Pt0[order(-For5Pt0$NetLoss),];
   For5Pt0$T01NL <- rep(c(1,0),c(nTop01,nBot99));
   For5Pt0 <- For5Pt0[order(-For5Pt0$TotFees),];
   For5Pt0$T01EF <- rep(c(1,0),c(nTop01,nBot99));
   For5Pt0$NotT1 <- 1*(For5Pt0$T01EF+For5Pt0$T01NL+For5Pt0$T01nC==0);
##################################################
# Create a Venn diagram that shows the overlap   #
#  between the three Top 1% group and the group  #
3  of remaining players.                         #
##################################################
For5Pt6 <- xtabs(rep(1,nD2C)~T01EF+T01NL+T01nC,data=For5Pt0);
nNotT1 <- For5Pt6[1,1,1]; For5Pt6
#     T01nC = 0              T01nC = 1
#     T01NL                  T01NL
#T01EF     0     1     T01EF     0     1
#    0 10167    44         0    64     6
#    1    29    41         1    21    13
########################################
# Start with some preparation.         #
########################################
Space1 <- 5;
Space2 <- 0.1;
Space3 <- 1 - Space2;
For5Pt1 <- data.frame(x=rep(0,6),y=rep(0,6));
   For5Pt1$y[1] = 0;
   For5Pt1$x[2] = 0;
   For5Pt1$y[6] = sqrt(2*nTop01-For5Pt6[1,2,2]-For5Pt6[2,2,2]);
      For5Pt1$y[6] = For5Pt1$y[6] * sqrt(2);
   For5Pt1$x[4] = For5Pt1$y[6] / 2;
   For5Pt1$y[3] = (For5Pt6[1,1,2]+For5Pt6[2,1,2]) / For5Pt1$x[4];
   For5Pt1$y[4] = For5Pt1$y[6] - For5Pt1$y[3];
   For5Pt1$x[3] = For5Pt6[2,2,2] / (For5Pt1$y[4]-For5Pt1$y[3]);
   For5Pt1$y[2] = For5Pt1$y[3] - For5Pt6[2,1,2]/For5Pt1$x[3];
   For5Pt1$y[5] = For5Pt1$y[4] + For5Pt6[2,2,1]/For5Pt1$x[3];
   For5Pt1$x[1] = -For5Pt6[2,1,1] / (For5Pt1$y[5]-For5Pt1$y[2]);
For5Pt2 <- For5Pt1;
   For5Pt2$x = For5Pt2$x - Space1 - 0.50*(For5Pt1$x[4]-For5Pt1$x[1]);
For5Pt3 <- For5Pt1;
   For5Pt3$x = For5Pt3$x + Space1 + 0.50*(For5Pt1$x[4]-For5Pt1$x[1]);
   For5Pt3$y = For5Pt3$y + Space1 + 0.25*(For5Pt1$y[6]-For5Pt1$y[1]);
For5Pt4 <- For5Pt1;
   For5Pt4$x = For5Pt4$x + Space1 + 0.50*(For5Pt1$x[4]-For5Pt1$x[1]);
   For5Pt4$y = For5Pt4$y - Space1 - 0.25*(For5Pt1$y[6]-For5Pt1$y[1]);
For5Pt5 <- data.frame(x0=rep(0,3),x1=rep(0,3),
                      y0=rep(0,3),y1=rep(0,3),
                      xt=rep(0,3),yt=rep(0,3),pos=c(4,4,4));
   For5Pt5$x0[1] = For5Pt1$x[1];
   For5Pt5$x1[1] = For5Pt2$x[3];
   For5Pt5$y0[1] = 0.5*(For5Pt1$y[1]+For5Pt2$y[6]);
   For5Pt5$y1[1] = 0.5*(For5Pt1$y[1]+For5Pt2$y[6]);
   For5Pt5$x0[2] = 0.5*(For5Pt1$x[1]+For5Pt1$x[4]);
   For5Pt5$x1[2] = For5Pt3$x[2];
   For5Pt5$y0[2] = For5Pt1$y[6];
   For5Pt5$y1[2] = 0.5*(For5Pt3$y[4]+For5Pt3$y[5]);
   For5Pt5$x0[3] = 0.5*(For5Pt1$x[1]+For5Pt1$x[4]);
   For5Pt5$x1[3] = For5Pt4$x[2];
   For5Pt5$y0[3] = For5Pt1$y[1];
   For5Pt5$y1[3] = For5Pt4$y[2];
   For5Pt5$xt[1] = For5Pt2$x[1];
   For5Pt5$xt[2] = For5Pt3$x[2];
   For5Pt5$xt[3] = For5Pt4$x[2];
   For5Pt5$yt[1] = For5Pt2$y[5] + 0.50*(For5Pt2$y[6]-For5Pt2$y[5]);
   For5Pt5$yt[2] = For5Pt3$y[2] + 0.40*(For5Pt3$y[3]-For5Pt3$y[2]);
   For5Pt5$yt[3] = For5Pt4$y[4] + 0.25*(For5Pt4$y[5]-For5Pt4$y[4]);
LabText <- c("Top 1% by\nTotal Entry Fees,\nn = 104",
             "Top 1% by\nNet Loss,\nn = 104",
             "Top 1% by\nNumber of Contests,\nn = 104");
WallL <- min(For5Pt1$x,For5Pt2$x,For5Pt3$x,For5Pt4$x) - 0.00*Space1;
WallR <- max(For5Pt1$x,For5Pt2$x,For5Pt3$x,For5Pt4$x) + 0.00*Space1;
WallB <- min(For5Pt1$y,For5Pt2$y,For5Pt3$y,For5Pt4$y) - 0.00*Space1;
WallT <- max(For5Pt1$y,For5Pt2$y,For5Pt3$y,For5Pt4$y) + 0.00*Space1;
########################################
# Prepare the canvas.                  #
########################################
par(mar=c(0,0,0,0));
plot(x=c(For5Pt1$x[2],For5Pt1$x[3]),
     y=c(For5Pt1$y[2],For5Pt1$y[2]),type="l",
     xlab=" ",xaxt="n",xlim=c(WallL,WallR),
     ylab=" ",yaxt="n",ylim=c(WallB,WallT)
    );
########################################
# Draw the main Venn diagram.          #
########################################
polygon(x=c(For5Pt1$x[1],For5Pt1$x[1],For5Pt1$x[3],For5Pt1$x[3]),
        y=c(For5Pt1$y[2],For5Pt1$y[5],For5Pt1$y[5],For5Pt1$y[2]),
       );
polygon(x=c(For5Pt1$x[2],For5Pt1$x[2],For5Pt1$x[4],For5Pt1$x[4]),
        y=c(For5Pt1$y[3],For5Pt1$y[6],For5Pt1$y[6],For5Pt1$y[3]),
       );
polygon(x=c(For5Pt1$x[2],For5Pt1$x[2],For5Pt1$x[4],For5Pt1$x[4]),
        y=c(For5Pt1$y[1],For5Pt1$y[4],For5Pt1$y[4],For5Pt1$y[1]),
       );
text(lab=paste(For5Pt6[1,1,2]," (",
               round(100*For5Pt6[1,1,2]/(nD2C-For5Pt6[1,1,1]),
                     digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt1$x[2]+For5Pt1$x[4]),
     y=0.5*(For5Pt1$y[1]+For5Pt1$y[2]));
text(lab=paste(For5Pt6[1,2,1]," (",
               round(100*For5Pt6[1,2,1]/(nD2C-For5Pt6[1,1,1]),
                     digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt1$x[2]+For5Pt1$x[4]),
     y=0.5*(For5Pt1$y[5]+For5Pt1$y[6]));
text(lab=paste(For5Pt6[1,2,2]," (",
               round(100*For5Pt6[1,2,2]/(nD2C-For5Pt6[1,1,1]),
                     digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt1$x[3]+For5Pt1$x[4]),
     y=0.5*(For5Pt1$y[3]+For5Pt1$y[4]));
text(lab=paste(For5Pt6[2,1,1],"\n(",
               round(100*For5Pt6[2,1,1]/(nD2C-For5Pt6[1,1,1]),
                     digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt1$x[1]+For5Pt1$x[2]),
     y=0.5*(For5Pt1$y[5]+For5Pt1$y[2]));
text(lab=paste(For5Pt6[2,1,2]," (",
               round(100*For5Pt6[2,1,2]/(nD2C-For5Pt6[1,1,1]),
                     digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt1$x[2]+For5Pt1$x[3]),
     y=0.5*(For5Pt1$y[2]+For5Pt1$y[3]));
text(lab=paste(For5Pt6[2,2,1]," (",
               round(100*For5Pt6[2,2,1]/(nD2C-For5Pt6[1,1,1]),
                     digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt1$x[2]+For5Pt1$x[3]),
     y=0.5*(For5Pt1$y[4]+For5Pt1$y[5]));
text(lab=paste(For5Pt6[2,2,2]," (",
               round(100*For5Pt6[2,2,2]/(nD2C-For5Pt6[1,1,1]),
                     digits=1),
               ".0%)",sep=""),
     x=0.5*(For5Pt1$x[2]+For5Pt1$x[3]),
     y=0.5*(For5Pt1$y[3]+For5Pt1$y[4]));
########################################
# Add the T01EF part.                  #
########################################
polygon(x=c(For5Pt2$x[1],For5Pt2$x[1],For5Pt2$x[3],For5Pt2$x[3]),
        y=c(For5Pt2$y[2],For5Pt2$y[5],For5Pt2$y[5],For5Pt2$y[2]),
       );
polygon(x=c(For5Pt2$x[2],For5Pt2$x[2],For5Pt2$x[3],For5Pt2$x[3]),
        y=c(For5Pt2$y[3],For5Pt2$y[5],For5Pt2$y[5],For5Pt2$y[3]),
       );
polygon(x=c(For5Pt2$x[2],For5Pt2$x[2],For5Pt2$x[3],For5Pt2$x[3]),
        y=c(For5Pt2$y[2],For5Pt2$y[4],For5Pt2$y[4],For5Pt2$y[2]),
       );
text(lab=paste(For5Pt6[2,1,1],"\n(",
               round(100*For5Pt6[2,1,1]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt2$x[1]+For5Pt2$x[2]),
     y=0.5*(For5Pt2$y[5]+For5Pt2$y[2]));
text(lab=paste(For5Pt6[2,1,2]," (",
               round(100*For5Pt6[2,1,2]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]),
     y=0.5*(For5Pt2$y[2]+For5Pt2$y[3]));
text(lab=paste(For5Pt6[2,2,1]," (",
               round(100*For5Pt6[2,2,1]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]),
     y=0.5*(For5Pt2$y[4]+For5Pt2$y[5]));
text(lab=paste(For5Pt6[2,2,2]," (",
               round(100*For5Pt6[2,2,2]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]),
     y=0.5*(For5Pt2$y[3]+For5Pt2$y[4]));
#text(lab=For5Pt6[2,1,1],x=0.5*(For5Pt2$x[1]+For5Pt2$x[2]),
#                        y=0.5*(For5Pt2$y[5]+For5Pt2$y[2]));
#text(lab=For5Pt6[2,1,2],x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]),
#                        y=0.5*(For5Pt2$y[2]+For5Pt2$y[3]));
#text(lab=For5Pt6[2,2,1],x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]),
#                        y=0.5*(For5Pt2$y[4]+For5Pt2$y[5]));
#text(lab=For5Pt6[2,2,2],x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]),
#                        y=0.5*(For5Pt2$y[3]+For5Pt2$y[4]));
########################################
# Add the T01NL part.                  #
########################################
polygon(x=c(For5Pt3$x[2],For5Pt3$x[2],For5Pt3$x[3],For5Pt3$x[3]),
        y=c(For5Pt3$y[3],For5Pt3$y[5],For5Pt3$y[5],For5Pt3$y[3]),
       );
polygon(x=c(For5Pt3$x[2],For5Pt3$x[2],For5Pt3$x[4],For5Pt3$x[4]),
        y=c(For5Pt3$y[3],For5Pt3$y[6],For5Pt3$y[6],For5Pt3$y[3]),
       );
polygon(x=c(For5Pt3$x[2],For5Pt3$x[2],For5Pt3$x[4],For5Pt3$x[4]),
        y=c(For5Pt3$y[3],For5Pt3$y[4],For5Pt3$y[4],For5Pt3$y[3]),
       );
text(lab=paste(For5Pt6[1,2,1]," (",
               round(100*For5Pt6[1,2,1]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt3$x[2]+For5Pt3$x[4]),
     y=0.5*(For5Pt3$y[5]+For5Pt3$y[6]));
text(lab=paste(For5Pt6[1,2,2]," (",
               round(100*For5Pt6[1,2,2]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt3$x[3]+For5Pt3$x[4]),
     y=0.5*(For5Pt3$y[3]+For5Pt3$y[4]));
text(lab=paste(For5Pt6[2,2,1]," (",
               round(100*For5Pt6[2,2,1]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt3$x[2]+For5Pt3$x[3]),
     y=0.5*(For5Pt3$y[4]+For5Pt3$y[5]));
text(lab=paste(For5Pt6[2,2,2]," (",
               round(100*For5Pt6[2,2,2]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt3$x[2]+For5Pt3$x[3]),
     y=0.5*(For5Pt3$y[3]+For5Pt3$y[4]));
#text(lab=For5Pt6[1,2,1],x=0.5*(For5Pt3$x[2]+For5Pt3$x[4]),
#                        y=0.5*(For5Pt3$y[5]+For5Pt3$y[6]));
#text(lab=For5Pt6[1,2,2],x=0.5*(For5Pt3$x[3]+For5Pt3$x[4]),
#                        y=0.5*(For5Pt3$y[3]+For5Pt3$y[4]));
#text(lab=For5Pt6[2,2,1],x=0.5*(For5Pt3$x[2]+For5Pt3$x[3]),
#                        y=0.5*(For5Pt3$y[4]+For5Pt3$y[5]));
#text(lab=For5Pt6[2,2,2],x=0.5*(For5Pt3$x[2]+For5Pt3$x[3]),
#                        y=0.5*(For5Pt3$y[3]+For5Pt3$y[4]));
########################################
# Add the T01nC part.                  #
########################################
polygon(x=c(For5Pt4$x[2],For5Pt4$x[2],For5Pt4$x[3],For5Pt4$x[3]),
        y=c(For5Pt4$y[2],For5Pt4$y[4],For5Pt4$y[4],For5Pt4$y[2]),
       );
polygon(x=c(For5Pt4$x[2],For5Pt4$x[2],For5Pt4$x[4],For5Pt4$x[4]),
        y=c(For5Pt4$y[3],For5Pt4$y[4],For5Pt4$y[4],For5Pt4$y[3]),
       );
polygon(x=c(For5Pt4$x[2],For5Pt4$x[2],For5Pt4$x[4],For5Pt4$x[4]),
        y=c(For5Pt4$y[1],For5Pt4$y[4],For5Pt4$y[4],For5Pt4$y[1]),
       );
text(lab=paste(For5Pt6[1,1,2]," (",
               round(100*For5Pt6[1,1,2]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt4$x[2]+For5Pt4$x[4]),
     y=0.5*(For5Pt4$y[1]+For5Pt4$y[2]));
text(lab=paste(For5Pt6[1,2,2]," (",
               round(100*For5Pt6[1,2,2]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt4$x[3]+For5Pt4$x[4]),
     y=0.5*(For5Pt4$y[3]+For5Pt4$y[4]));
text(lab=paste(For5Pt6[2,1,2]," (",
               round(100*For5Pt6[2,1,2]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt4$x[2]+For5Pt4$x[3]),
     y=0.5*(For5Pt4$y[2]+For5Pt4$y[3]));
text(lab=paste(For5Pt6[2,2,2]," (",
               round(100*For5Pt6[2,2,2]/nTop01,digits=1),
               "%)",sep=""),
     x=0.5*(For5Pt4$x[2]+For5Pt4$x[3]),
     y=0.5*(For5Pt4$y[3]+For5Pt4$y[4]));
#text(lab=For5Pt6[1,1,2],x=0.5*(For5Pt4$x[2]+For5Pt4$x[4]),
#                        y=0.5*(For5Pt4$y[1]+For5Pt4$y[2]));
#text(lab=For5Pt6[1,2,2],x=0.5*(For5Pt4$x[3]+For5Pt4$x[4]),
#                        y=0.5*(For5Pt4$y[3]+For5Pt4$y[4]));
#text(lab=For5Pt6[2,1,2],x=0.5*(For5Pt4$x[2]+For5Pt4$x[3]),
#                        y=0.5*(For5Pt4$y[2]+For5Pt4$y[3]));
#text(lab=For5Pt6[2,2,2],x=0.5*(For5Pt4$x[2]+For5Pt4$x[3]),
#                        y=0.5*(For5Pt4$y[3]+For5Pt4$y[4]));
########################################
# Add arrows and labels.               #
########################################
i1 <- 1; while (i1 <= 3) {
   text(lab=LabText[i1],pos=For5Pt5$pos[i1],
        x=For5Pt5$xt[i1],y=For5Pt5$yt[i1]);
   arrows(x0=For5Pt5$x0[i1]+(For5Pt5$x1[i1]-For5Pt5$x0[i1])*Space2,
          x1=For5Pt5$x0[i1]+(For5Pt5$x1[i1]-For5Pt5$x0[i1])*Space3,
          y0=For5Pt5$y0[i1]+(For5Pt5$y1[i1]-For5Pt5$y0[i1])*Space2,
          y1=For5Pt5$y0[i1]+(For5Pt5$y1[i1]-For5Pt5$y0[i1])*Space3,
          length=0.05);
   i1 = i1 + 1;
}
text(x=For5Pt1$x[1],y=For5Pt1$y[6]+0.40*(For5Pt1$y[6]-For5Pt1$y[5]),
     lab="All three Top 1% groups,\nn = 218",pos=4);
par(mar=c(5.1,4.1,4.1,2.1));
######################################################################
# Table 6 looks at the same variables as Table 1, only for each of   #
#  the 1% groups and for those majority that didn't fall into any of #
#  the 1% groups.                                                    #
# * That's means, standard deviations, and medians for all of the    #
#   measures of interest listed in the Methods section.              #
######################################################################
For5Pt0 <- For5Pt0[,c("nDays","Duration","Freq","nCont","ContPED",
                      "EntPCont","FeesPCont","AvgBuyIn","LineupPE",
                      "TotFees","FeesPDay","NetLoss","PctLoss",
                      "PctProfit","RiskScore",
                      "UserID","T01EF","T01NL","T01nC","NotT1")];
For6Pt1 <- subset(For5Pt0,For5Pt0$T01EF==1);
For6Pt2 <- subset(For5Pt0,For5Pt0$T01NL==1);
For6Pt3 <- subset(For5Pt0,For5Pt0$T01nC==1);
For6Pt4 <- subset(For5Pt0,For5Pt0$NotT1==1);
############################################################
# Construct Table 6.                                       #
# * THERE ARE DISCREPANCIES WITH THE MANUSCRIPT.           #
# * Some of the numbers in the "Top 1% on number of        #
#   contests" section are not exactly equal to what is in  #
#   the manuscript.                                        #
#           Manuscript             This Run                #
#              Mean      SD    Med    Mean      SD    Med  #
# EntryFees 11207.8 34771.8 3617.9 11205.9 34772.3 3617.9  #
# E.F.P.D.    208.1  1003.8   45.0   208.0  1003.8   45.0  #
# Net Loss    554.6  4998.5  271.1   553.8  4998.6  264.1  #
# Pct. Lost    10.7    53.5   15.9    10.6    53.5   15.9  #
# Pct. Won     29.3    15.0   26.0    29.3    14.9   26.0  #
# * Two of the numbers in the "Remainder of the player     #
#   pool" section don't exactly match what is in the       #
#   manuscript.                                            #
#                        Manuscript       This Run         #
#                         Mean    SD  Med  Mean    SD  Med #
# Number of Active Days   16.9  19.8 10.0  16.9  19.7 10.0 #
#      Total Entry Fees  278.7 560.1 83.3 278.7 560.1 83.2 #
############################################################
EpiTab6 <- data.frame(VarLab=c("Number of Active Days",
                               "Duration -- Numbers of Days",
                               "Percentage of Entry Days",
                               "Number of Contests",
                               "Contests Per Entry Day",
                               "Entries Per Contest",
                               "Entry Fees Per Contest",
                               "Average Entry Fee Size",
                               "Distinct Lineups Per Entry",
                               "Total Entry Fees",
                               "Entry Fees Per Day",
                               "Net Loss",
                               "Percent Lost",
                               "Percent of Contests Won",
                               "Player Risk Score"),
                      T01EFMeans=0,T01EFSDs=0,T01EFP50s=0,
                      T01NLMeans=0,T01NLSDs=0,T01NLP50s=0,
                      T01nCMeans=0,T01nCSDs=0,T01nCP50s=0,
                      NotT1Means=0,NotT1SDs=0,NotT1P50s=0,
                      stringsAsFactors=FALSE);
i1 <- nrow(EpiTab6); while (i1 > 0) {
   EpiTab6$T01EFMeans[i1] = mean(For6Pt1[,i1]);
   EpiTab6$T01EFSDs[i1] = sd(For6Pt1[,i1]);
   EpiTab6$T01EFP50s[i1] = median(For6Pt1[,i1]);
   EpiTab6$T01NLMeans[i1] = mean(For6Pt2[,i1]);
   EpiTab6$T01NLSDs[i1] = sd(For6Pt2[,i1]);
   EpiTab6$T01NLP50s[i1] = median(For6Pt2[,i1]);
   EpiTab6$T01nCMeans[i1] = mean(For6Pt3[,i1]);
   EpiTab6$T01nCSDs[i1] = sd(For6Pt3[,i1]);
   EpiTab6$T01nCP50s[i1] = median(For6Pt3[,i1]);
   EpiTab6$NotT1Means[i1] = mean(For6Pt4[,i1]);
   EpiTab6$NotT1SDs[i1] = sd(For6Pt4[,i1]);
   EpiTab6$NotT1P50s[i1] = median(For6Pt4[,i1]);
   i1 = i1 - 1;
}
Scrap1 <- EpiTab6[,c(1,2:4)];
   Scrap1[,c(2:4)] = round(Scrap1[,c(2:4)],digits=1); Scrap1
#                      VarLab T01EFMeans T01EFSDs T01EFP50s
#       Number of Active Days       70.3     36.1      67.5
# Duration -- Numbers of Days      139.7     11.7     141.0
#    Percentage of Entry Days       50.1     24.5      49.2
#          Number of Contests      651.7    967.1     320.5
#      Contests Per Entry Day       10.3     16.3       4.6
#         Entries Per Contest        2.5      2.4       1.6
#      Entry Fees Per Contest       81.0    150.8      49.3
#      Average Entry Fee Size       59.8    146.8      22.2
#  Distinct Lineups Per Entry        0.9      0.2       1.0
#            Total Entry Fees    21281.6  35457.8   11692.9
#          Entry Fees Per Day      434.9   1043.3     207.5
#                    Net Loss    -7494.0  97876.0    1791.9
#                Percent Lost      -72.0    840.7      19.2
#     Percent of Contests Won       28.5     12.2      26.3
#           Player Risk Score       10.8      7.6       9.7
Scrap1 <- EpiTab6[,c(1,5:7)];
   Scrap1[,c(2:4)] = round(Scrap1[,c(2:4)],digits=1); Scrap1
#                      VarLab T01NLMeans T01NLSDs T01NLP50s
#       Number of Active Days       58.4     34.6      52.0
# Duration -- Numbers of Days      137.3     14.3     141.0
#    Percentage of Entry Days       42.3     24.0      37.4
#          Number of Contests      380.4    718.5     191.0
#      Contests Per Entry Day        5.8      7.7       3.6
#         Entries Per Contest        2.2      1.7       1.5
#      Entry Fees Per Contest       82.6    161.5      42.9
#      Average Entry Fee Size       62.0    158.1      24.9
#  Distinct Lineups Per Entry        0.9      0.1       1.0
#            Total Entry Fees    13036.5  16457.2    6375.0
#          Entry Fees Per Day      284.5    411.9     140.8
#                    Net Loss     4475.9   4748.1    2667.5
#                Percent Lost       43.5     17.2      42.1
#     Percent of Contests Won       22.9      9.4      20.4
#           Player Risk Score       14.8     10.7      13.5
Scrap1 <- EpiTab6[,c(1,8:10)];
   Scrap1[,c(2:4)] = round(Scrap1[,c(2:4)],digits=1); Scrap1
#                      VarLab T01nCMeans T01nCSDs T01nCP50s
#       Number of Active Days       85.8     33.7      87.0
# Duration -- Numbers of Days      138.1     14.4     141.0
#    Percentage of Entry Days       62.2     23.1      63.6
#          Number of Contests     1091.9    927.7     729.5
#      Contests Per Entry Day       15.6     16.2       9.9
#         Entries Per Contest        1.6      1.0       1.3
#      Entry Fees Per Contest        8.3     13.1       3.8
#      Average Entry Fee Size        6.2     11.8       2.7
#  Distinct Lineups Per Entry        0.9      0.1       1.0
#            Total Entry Fees    11205.9  34772.3    3617.9
#          Entry Fees Per Day      208.0   1003.8      45.0
#                    Net Loss      553.8   4998.6     264.1
#                Percent Lost       10.6     53.5      15.9
#     Percent of Contests Won       29.3     14.9      26.0
#           Player Risk Score        5.2      2.9       4.9
Scrap1 <- EpiTab6[,c(1,11:13)];
   Scrap1[,c(2:4)] = round(Scrap1[,c(2:4)],digits=1); Scrap1
#The median for Entry Fees Per Contest is slightly off.
#                      VarLab NotT1Means NotT1SDs NotT1P50s
#       Number of Active Days       16.9     19.7      10.0
# Duration -- Numbers of Days      113.9     45.3     134.0
#    Percentage of Entry Days       19.1     22.1      11.9
#          Number of Contests       44.5     66.4      19.0
#      Contests Per Entry Day        2.5      2.4       2.0
#         Entries Per Contest        1.2      0.4       1.0
#      Entry Fees Per Contest        7.4     12.1       4.2
#      Average Entry Fee Size        6.6     10.7       3.9
#  Distinct Lineups Per Entry        1.0      0.1       1.0
#            Total Entry Fees      278.7    560.1      83.2
#          Entry Fees Per Day       16.5     27.1       8.9
#                    Net Loss       64.4   1523.6      30.0
#                Percent Lost       47.9     91.9      53.8
#     Percent of Contests Won       18.5     15.3      17.5
#           Player Risk Score       12.5     12.1       9.7
############################################################
# Perform the Mann-Whitney U-tests comparing each of the   #
#  1% groups to the NotT1 group.                           #
# * We used an alpha of 0.001.                             #
# * THERE ARE DISCREPANCIES WITH THE MANUSCRIPT.           #
# -- The median of the risk scores for the T01EF was not   #
#    significantly different from the median of the risk   #
#    scores for the NotT1 group (Both are listed as 9.7.). #
# -- The median of the risk score for the T01NL group is   #
#    BARELY significantly different from that of the NotT1 #
#    group (p = 0.0008675694).                             #
############################################################
Scrap1 <- data.frame(VarLab=c("Number of Active Days",
                              "Duration -- Numbers of Days",
                              "Percentage of Entry Days",
                              "Number of Contests",
                              "Contests Per Entry Day",
                              "Entries Per Contest",
                              "Entry Fees Per Contest",
                              "Average Entry Fee Size",
                              "Distinct Lineups Per Entry",
                              "Total Entry Fees",
                              "Entry Fees Per Day",
                              "Net Loss",
                              "Percent Lost",
                              "Percent of Contests Won",
                              "Player Risk Score"),
                      pEF=0,pNL=0,pnC=0,
                      SigEF="Accept",SigNL="Accept",SignC="Accept",
                      stringsAsFactors=FALSE);
i1 <- nrow(Scrap1); while (i1 > 0) {
   Scrap2 <- data.frame(Measure=c(For6Pt1[,i1],For6Pt4[,i1]),
                        Group=rep(c("Top01","NotT1"),
                                  c(nTop01,nNotT1)),
                        stringsAsFactors=FALSE);
   Scrap3 <- wilcox.test(Measure~Group,data=Scrap2);
      Scrap1[i1,2] = Scrap3$p.value;
   Scrap2 <- data.frame(Measure=c(For6Pt2[,i1],For6Pt4[,i1]),
                        Group=rep(c("Top01","NotT1"),
                                  c(nTop01,nNotT1)),
                        stringsAsFactors=FALSE);
   Scrap3 <- wilcox.test(Measure~Group,data=Scrap2);
      Scrap1[i1,3] = Scrap3$p.value;
   Scrap2 <- data.frame(Measure=c(For6Pt3[,i1],For6Pt4[,i1]),
                        Group=rep(c("Top01","NotT1"),
                                  c(nTop01,nNotT1)),
                        stringsAsFactors=FALSE);
   Scrap3 <- wilcox.test(Measure~Group,data=Scrap2);
      Scrap1[i1,4] = Scrap3$p.value;
   i1 = i1 - 1;
}
Scrap1$SigEF[Scrap1$pEF<0.001] = "Reject";
Scrap1$SigNL[Scrap1$pNL<0.001] = "Reject";
Scrap1$SignC[Scrap1$pnC<0.001] = "Reject";
Scrap4 <- Scrap1; Scrap4[,2:4] = round(Scrap4[,2:4],digits=3); Scrap4
#                      VarLab   pEF   pNL   pnC  SigEF  SigNL  SignC
#       Number of Active Days 0.000 0.000 0.000 Reject Reject Reject
# Duration -- Numbers of Days 0.000 0.000 0.000 Reject Reject Reject
#    Percentage of Entry Days 0.000 0.000 0.000 Reject Reject Reject
#          Number of Contests 0.000 0.000 0.000 Reject Reject Reject
#      Contests Per Entry Day 0.000 0.000 0.000 Reject Reject Reject
#         Entries Per Contest 0.000 0.000 0.000 Reject Reject Reject
#      Entry Fees Per Contest 0.000 0.000 0.303 Reject Reject Accept
#      Average Entry Fee Size 0.000 0.000 0.001 Reject Reject Accept
#  Distinct Lineups Per Entry 0.000 0.000 0.000 Reject Reject Reject
#            Total Entry Fees 0.000 0.000 0.000 Reject Reject Reject
#          Entry Fees Per Day 0.000 0.000 0.000 Reject Reject Reject
#                    Net Loss 0.000 0.000 0.000 Reject Reject Reject
#                Percent Lost 0.000 0.000 0.000 Reject Reject Reject
#     Percent of Contests Won 0.000 0.000 0.000 Reject Reject Reject
#           Player Risk Score 0.579 0.001 0.000 Accept Reject Reject
######################################################################
# Table 7 looks at the same variables as Table 3, only for each of   #
#  the 1% groups and for those majority that didn't fall into any of #
#  the 1% groups.                                                    #
# * Here, we're looking at participation in NFL contests, NBA        #
#   contests, and other sports' contests, all within the 2014 NFL    #
#   Season.                                                          #
######################################################################
For7Pt0 <- merge(Data6Play[,c("UserID","DidNFL","DidNBA","DidOth")],
                For5Pt0[,c("UserID","T01EF","T01NL","T01nC","NotT1")],
                all.x=TRUE,all.y=TRUE,by=c("UserID"));
   For7Pt0$Group = 1 + 1*(For7Pt0$DidNBA) + 2*(For7Pt0$DidOth);
   For7Pt0 <- For7Pt0[,c("Group","T01EF","T01NL","T01nC","NotT1",
                         "UserID","DidNFL","DidNBA","DidOth")];
   For7Pt1 <- subset(For7Pt0,For7Pt0$T01EF==1);
   For7Pt2 <- subset(For7Pt0,For7Pt0$T01NL==1);
   For7Pt3 <- subset(For7Pt0,For7Pt0$T01nC==1);
   For7Pt4 <- subset(For7Pt0,For7Pt0$NotT1==1);
############################################################
# Construct Table 7.                                       #
############################################################
EpiTab7 <- data.frame(Sport=c("NFL only","NFL & NBA only",
                              "NFL & Other only","NFL & NBA & Other",
                              "Total"),
                      T01EFCnt=rep(0,5),T01EFPct=rep(0,5),
                      T01NLCnt=rep(0,5),T01NLPct=rep(0,5),
                      T01nCCnt=rep(0,5),T01nCPct=rep(0,5),
                      NotT1Cnt=rep(0,5),NotT1Pct=rep(0,5),
                      stringsAsFactors=FALSE);
   EpiTab7$T01EFCnt[1:4] = xtabs(rep(1,nTop01)~Group,data=For7Pt1);
   EpiTab7$T01EFCnt[5] = nTop01;
   EpiTab7$T01EFPct[1:5] = 100 * EpiTab7$T01EFCnt / nTop01;
   EpiTab7$T01NLCnt[1:4] = xtabs(rep(1,nTop01)~Group,data=For7Pt2);
   EpiTab7$T01NLCnt[5] = nTop01;
   EpiTab7$T01NLPct[1:5] = 100 * EpiTab7$T01NLCnt / nTop01;
   EpiTab7$T01nCCnt[1:4] = xtabs(rep(1,nTop01)~Group,data=For7Pt3);
   EpiTab7$T01nCCnt[5] = nTop01;
   EpiTab7$T01nCPct[1:5] = 100 * EpiTab7$T01nCCnt / nTop01;
   EpiTab7$NotT1Cnt[1:4] = xtabs(rep(1,nNotT1)~Group,data=For7Pt4);
   EpiTab7$NotT1Cnt[5] = nNotT1;
   EpiTab7$NotT1Pct[1:5] = 100 * EpiTab7$NotT1Cnt / nNotT1;
Scrap1 <- EpiTab7; Scrap1[,2:9] = round(Scrap1[,2:9],digits=1);
   Scrap1
#                     T01EF     T01NL     T01nC       NotT1
#             Sport Cnt   Pct Cnt   Pct Cnt   Pct   Cnt   Pct
#          NFL only   9   8.7  14  13.5   6   5.8  5108  50.2
#    NFL & NBA only  12  11.5  16  15.4   3   2.9   801   7.9
#  NFL & Other only   9   8.7  14  13.5   7   6.7  2168  21.3
# NFL & NBA & Other  74  71.2  60  57.7  88  84.6  2090  20.6
#             Total 104 100.0 104 100.0 104 100.0 10167 100.0
############################################################
# Perform the Fisher's exact tests to see where the        #
#  percentages are significantly different.                #
# * For the "NFL & Other only" category, the proportions   #
#   of the T01EF group and the NotT1 group are barely      #
#   significantly different.                               #
# * We used an alpha of 0.001.                             #
############################################################
Scrap1 <- data.frame(Sport=c("NFL only","NFL & NBA only",
                             "NFL & Other only","NFL & NBA & Other"),
                      pEF=0,pNL=0,pnC=0,
                      SigEF="Accept",SigNL="Accept",SignC="Accept",
                      stringsAsFactors=FALSE);
i1 <- 4; while (i1 > 0) {
   Scrap2 <- matrix(c(EpiTab7$T01EFCnt[i1],
                      nTop01-EpiTab7$T01EFCnt[i1],
                      EpiTab7$NotT1Cnt[i1],
                      nNotT1-EpiTab7$NotT1Cnt[i1]),2,2);
   Scrap3 <- fisher.test(Scrap2);
      Scrap1$pEF[i1] = Scrap3$p.value;
   Scrap2 <- matrix(c(EpiTab7$T01NLCnt[i1],
                      nTop01-EpiTab7$T01NLCnt[i1],
                      EpiTab7$NotT1Cnt[i1],
                      nNotT1-EpiTab7$NotT1Cnt[i1]),2,2);
   Scrap3 <- fisher.test(Scrap2);
      Scrap1$pNL[i1] = Scrap3$p.value;
   Scrap2 <- matrix(c(EpiTab7$T01nCCnt[i1],
                      nTop01-EpiTab7$T01nCCnt[i1],
                      EpiTab7$NotT1Cnt[i1],
                      nNotT1-EpiTab7$NotT1Cnt[i1]),2,2);
   Scrap3 <- fisher.test(Scrap2);
      Scrap1$pnC[i1] = Scrap3$p.value;
   i1 = i1 - 1;
}
Scrap1$SigEF[Scrap1$pEF<0.001] = "Reject";
Scrap1$SigNL[Scrap1$pNL<0.001] = "Reject";
Scrap1$SignC[Scrap1$pnC<0.001] = "Reject";
Scrap4 <- Scrap1; Scrap4[,2:4] = round(Scrap4[,2:4],digits=3); Scrap4
#             Sport   pEF   pNL   pnC  SigEF  SigNL  SignC
#          NFL only 0.000 0.000 0.000 Reject Reject Reject
#    NFL & NBA only 0.197 0.010 0.064 Accept Accept Accept
#  NFL & Other only 0.001 0.054 0.000 Reject Accept Reject
# NFL & NBA & Other 0.000 0.000 0.000 Reject Reject Reject
######################################################################
# Table 8 looks at the same variables as Table 4, only for each of   #
#  the 1% groups and for those majority that didn't fall into any of #
#  the 1% groups.                                                    #
# * Here, we're looking at participation in the different categories #
#   of contests, all within the 2014 NFL Season.                     #
######################################################################
Scrap1 <- Data6Play[,c(1,5:10)];
   Scrap1$CntT <- apply(Scrap1[,2:7],FUN=sum,1);
   Scrap1$CntM = apply(Scrap1[,2:6],1,max);
   Scrap1$Pct1 = Scrap1$Cnt1 / Scrap1$CntT;
   Scrap1$Pct2 = Scrap1$Cnt2 / Scrap1$CntT;
   Scrap1$Pct3 = Scrap1$Cnt3 / Scrap1$CntT;
   Scrap1$Pct4 = Scrap1$Cnt4 / Scrap1$CntT;
   Scrap1$Pct5 = Scrap1$Cnt5 / Scrap1$CntT;
   Scrap1$Pct6 = Scrap1$Cnt6 / Scrap1$CntT;
Scrap1$Pref = 7;
   Scrap2 = 1.1 * (Scrap1$Cnt1==Scrap1$CntM);
   Scrap2 = Scrap2 + 2.1 * (Scrap1$Cnt2==Scrap1$CntM);
   Scrap2 = Scrap2 + 3.1 * (Scrap1$Cnt3==Scrap1$CntM);
   Scrap2 = Scrap2 + 4.1 * (Scrap1$Cnt4==Scrap1$CntM);
   Scrap2 = Scrap2 + 5.1 * (Scrap1$Cnt5==Scrap1$CntM);
   Scrap2 = Scrap2 + 6.1 * (Scrap1$Cnt6==Scrap1$CntM);
   Scrap1$Pref = Scrap1$Pref - 6*(Scrap2==1.1);
   Scrap1$Pref = Scrap1$Pref - 5*(Scrap2==2.1);
   Scrap1$Pref = Scrap1$Pref - 4*(Scrap2==3.1);
   Scrap1$Pref = Scrap1$Pref - 3*(Scrap2==4.1);
   Scrap1$Pref = Scrap1$Pref - 2*(Scrap2==5.1);
   Scrap1$Pref = Scrap1$Pref - 1*(Scrap2==6.1);
For8Pt0 <- merge(Scrap1[,c("Cnt1","Cnt2","Cnt3","Cnt4","Cnt5","Cnt6",
                           "Pct1","Pct2","Pct3","Pct4","Pct5","Pct6",
                           "UserID")],
                For5Pt0[,c("UserID","T01EF","T01NL","T01nC","NotT1")],
                all.x=TRUE,all.y=TRUE,by=c("UserID"));
   For8Pt0 <- For8Pt0[,c("Cnt1","Cnt2","Cnt3","Cnt4","Cnt5","Cnt6",
                         "Pct1","Pct2","Pct3","Pct4","Pct5","Pct6",
                         "T01EF","T01NL","T01nC","NotT1","UserID")];
   For8Pt1 <- subset(For8Pt0,For8Pt0$T01EF==1);
   For8Pt2 <- subset(For8Pt0,For8Pt0$T01NL==1);
   For8Pt3 <- subset(For8Pt0,For8Pt0$T01nC==1);
   For8Pt4 <- subset(For8Pt0,For8Pt0$NotT1==1);
############################################################
# Construct Table 8.                                       #
# * THERE ARE DISCREPANCIES WITH THE MANUSCRIPT.           #
# * In this run, we found one fewer person from the "1% by #
#   number of contests" group who played in at least one   #
#   50/50 contest.                                         #
#            Manuscript             This Run               #
#            AtLeastOne AtLeastHalf AtLeastOne AtLeastHalf #
#            Count  Pct Count  Pct  Count  Pct Count  Pct  #
# 50/50        101 97.1    15 14.4    102 98.1    15 14.4  #
# * Many of the counts for the "Remainder of the player    #
#   pool" group do not match the counts in the manuscript, #
#   but they are not different enough for the listed       #
#   percentages to change.                                 #
#            Manuscript             This Run               #
#            AtLeastOne AtLeastHalf AtLeastOne AtLeastHalf #
#            Count  Pct Count  Pct  Count  Pct Count  Pct  #
#      50/50  5848 57.5   702  6.9   5849 57.5   699  6.9  #
#   Heads Up  3528 34.7   386  3.8   3528 34.7   385  3.8  #
#      Mults  4948 48.7   234  2.3   4948 48.7   234  2.3  #
#     League  5191 51.1   295  2.9   5191 51.1   295  2.9  #
#    Move up  5659 55.7   153  1.5   5659 55.7   151  1.5  #
#     Others  9744 95.8  6639 65.3   9744 95.8  6644 65.3  #
############################################################
EpiTab8 <- data.frame(Class=c("50/50 (except Head-to-Head)",
                              "Head-to-Head","Multiplier",
                              "League (except 50/50)",
                              "Move your way up","Other contests"),
                     ALOCT01EF=0,ALOPT01EF=0,ALHCT01EF=0,ALHPT01EF=0,
                     ALOCT01NL=0,ALOPT01NL=0,ALHCT01NL=0,ALHPT01NL=0,
                     ALOCT01nC=0,ALOPT01nC=0,ALHCT01nC=0,ALHPT01nC=0,
                     ALOCNotT1=0,ALOPNotT1=0,ALHCNotT1=0,ALHPNotT1=0,
                     stringsAsFactors=FALSE);
   # Counts and percentages for the top 1% by Total Entry Fees
   EpiTab8$ALOCT01EF[1] = sum(For8Pt1$Cnt1>0);
   EpiTab8$ALOCT01EF[2] = sum(For8Pt1$Cnt2>0);
   EpiTab8$ALOCT01EF[3] = sum(For8Pt1$Cnt3>0);
   EpiTab8$ALOCT01EF[4] = sum(For8Pt1$Cnt4>0);
   EpiTab8$ALOCT01EF[5] = sum(For8Pt1$Cnt5>0);
   EpiTab8$ALOCT01EF[6] = sum(For8Pt1$Cnt6>0);
   EpiTab8$ALOPT01EF = 100 * EpiTab8$ALOCT01EF / nTop01;
   EpiTab8$ALHCT01EF[1] = sum(For8Pt1$Pct1>=0.5);
   EpiTab8$ALHCT01EF[2] = sum(For8Pt1$Pct2>=0.5);
   EpiTab8$ALHCT01EF[3] = sum(For8Pt1$Pct3>=0.5);
   EpiTab8$ALHCT01EF[4] = sum(For8Pt1$Pct4>=0.5);
   EpiTab8$ALHCT01EF[5] = sum(For8Pt1$Pct5>=0.5);
   EpiTab8$ALHCT01EF[6] = sum(For8Pt1$Pct6>=0.5);
   EpiTab8$ALHPT01EF = 100 * EpiTab8$ALHCT01EF / nTop01;
   # Counts and percentages for the top 1% by Net Loss
   EpiTab8$ALOCT01NL[1] = sum(For8Pt2$Cnt1>0);
   EpiTab8$ALOCT01NL[2] = sum(For8Pt2$Cnt2>0);
   EpiTab8$ALOCT01NL[3] = sum(For8Pt2$Cnt3>0);
   EpiTab8$ALOCT01NL[4] = sum(For8Pt2$Cnt4>0);
   EpiTab8$ALOCT01NL[5] = sum(For8Pt2$Cnt5>0);
   EpiTab8$ALOCT01NL[6] = sum(For8Pt2$Cnt6>0);
   EpiTab8$ALOPT01NL = 100 * EpiTab8$ALOCT01NL / nTop01;
   EpiTab8$ALHCT01NL[1] = sum(For8Pt2$Pct1>=0.5);
   EpiTab8$ALHCT01NL[2] = sum(For8Pt2$Pct2>=0.5);
   EpiTab8$ALHCT01NL[3] = sum(For8Pt2$Pct3>=0.5);
   EpiTab8$ALHCT01NL[4] = sum(For8Pt2$Pct4>=0.5);
   EpiTab8$ALHCT01NL[5] = sum(For8Pt2$Pct5>=0.5);
   EpiTab8$ALHCT01NL[6] = sum(For8Pt2$Pct6>=0.5);
   EpiTab8$ALHPT01NL = 100 * EpiTab8$ALHCT01NL / nTop01;
   # Counts and percentages for the top 1% by Number of Contests
   EpiTab8$ALOCT01nC[1] = sum(For8Pt3$Cnt1>0);
   EpiTab8$ALOCT01nC[2] = sum(For8Pt3$Cnt2>0);
   EpiTab8$ALOCT01nC[3] = sum(For8Pt3$Cnt3>0);
   EpiTab8$ALOCT01nC[4] = sum(For8Pt3$Cnt4>0);
   EpiTab8$ALOCT01nC[5] = sum(For8Pt3$Cnt5>0);
   EpiTab8$ALOCT01nC[6] = sum(For8Pt3$Cnt6>0);
   EpiTab8$ALOPT01nC = 100 * EpiTab8$ALOCT01nC / nTop01;
   EpiTab8$ALHCT01nC[1] = sum(For8Pt3$Pct1>=0.5);
   EpiTab8$ALHCT01nC[2] = sum(For8Pt3$Pct2>=0.5);
   EpiTab8$ALHCT01nC[3] = sum(For8Pt3$Pct3>=0.5);
   EpiTab8$ALHCT01nC[4] = sum(For8Pt3$Pct4>=0.5);
   EpiTab8$ALHCT01nC[5] = sum(For8Pt3$Pct5>=0.5);
   EpiTab8$ALHCT01nC[6] = sum(For8Pt3$Pct6>=0.5);
   EpiTab8$ALHPT01nC = 100 * EpiTab8$ALHCT01nC / nTop01;
   # Counts and percentages for those not in the top 1% groups
   EpiTab8$ALOCNotT1[1] = sum(For8Pt4$Cnt1>0);
   EpiTab8$ALOCNotT1[2] = sum(For8Pt4$Cnt2>0);
   EpiTab8$ALOCNotT1[3] = sum(For8Pt4$Cnt3>0);
   EpiTab8$ALOCNotT1[4] = sum(For8Pt4$Cnt4>0);
   EpiTab8$ALOCNotT1[5] = sum(For8Pt4$Cnt5>0);
   EpiTab8$ALOCNotT1[6] = sum(For8Pt4$Cnt6>0);
   EpiTab8$ALOPNotT1 = 100 * EpiTab8$ALOCNotT1 / nNotT1;
   EpiTab8$ALHCNotT1[1] = sum(For8Pt4$Pct1>=0.5);
   EpiTab8$ALHCNotT1[2] = sum(For8Pt4$Pct2>=0.5);
   EpiTab8$ALHCNotT1[3] = sum(For8Pt4$Pct3>=0.5);
   EpiTab8$ALHCNotT1[4] = sum(For8Pt4$Pct4>=0.5);
   EpiTab8$ALHCNotT1[5] = sum(For8Pt4$Pct5>=0.5);
   EpiTab8$ALHCNotT1[6] = sum(For8Pt4$Pct6>=0.5);
   EpiTab8$ALHPNotT1 = 100 * EpiTab8$ALHCNotT1 / nNotT1;
Scrap2 <- EpiTab8;
   Scrap2[,c(2:17)] = round(Scrap2[,c(2:17)],digits=1);
Scrap2[,c(1,2:5)]
#                             T01EF
#                       Class ALOC  ALOP ALHC ALHP
# 50/50 (except Head-to-Head)   92  88.5   10  9.6
#                Head-to-Head   69  66.3    8  7.7
#                  Multiplier   76  73.1    0  0.0
#       League (except 50/50)   79  76.0    0  0.0
#            Move your way up   99  95.2    0  0.0
#              Other contests  104 100.0   56 53.8
Scrap2[,c(1,6:9)]
#                             T01NL
#                       Class ALOC  ALOP ALHC  ALHP
# 50/50 (except Head-to-Head)   79  76.0   10   9.6
#                Head-to-Head   64  61.5    3   2.9
#                  Multiplier   68  65.4    0   0.0
#       League (except 50/50)   75  72.1    0   0.0
#            Move your way up   94  90.4    2   1.9
#              Other contests  104 100.0   64  61.5
Scrap2[,c(1,10:13)]
#                             T01nC
#                       Class ALOC  ALOP ALHC  ALHP
# 50/50 (except Head-to-Head)  101  97.1   15  14.4
#                Head-to-Head   86  82.7    9   8.7
#                  Multiplier   97  93.3    0   0.0
#       League (except 50/50)  100  96.2    1   1.0
#            Move your way up   99  95.2    1   1.0
#              Other contests  104 100.0   35  33.7
Scrap2[,c(1,14:17)]
#                              NotT1
#                        Class ALOC ALOP ALHC ALHP
# 50/50 (except Head-to-Head)  5849 57.5  699  6.9
#                Head-to-Head  3528 34.7  385  3.8
#                  Multiplier  4948 48.7  234  2.3
#       League (except 50/50)  5191 51.1  295  2.9
#            Move your way up  5659 55.7  151  1.5
#              Other contests  9744 95.8 6644 65.3
############################################################
# Perform the Fisher's exact tests.                        #
############################################################
FisherTab8 <- data.frame(Sport=EpiTab8$Class[1:6],
                         pValueALOEF=0,pValueALHEF=0,
                         pValueALONL=0,pValueALHNL=0,
                         pValueALOnC=0,pValueALHnC=0,
                         SigALOEF="Accept",SigALHEF="Accept",
                         SigALONL="Accept",SigALHNL="Accept",
                         SigALOnC="Accept",SigALHnC="Accept",
                         stringsAsFactors=FALSE);
i1 <- 6; while (i1 > 0) {
   Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1]>0)+For8Pt0$T01EF,
                  subset=(For7Pt0$T01EF+For7Pt0$NotT1==1));
   FisherTab8$pValueALOEF[i1] = fisher.test(Scrap3)$p.value;
   Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1]>0)+For8Pt0$T01NL,
                  subset=(For7Pt0$T01NL+For7Pt0$NotT1==1));
   FisherTab8$pValueALONL[i1] = fisher.test(Scrap3)$p.value;
   Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1]>0)+For8Pt0$T01nC,
                  subset=(For7Pt0$T01nC+For7Pt0$NotT1==1));
   FisherTab8$pValueALOnC[i1] = fisher.test(Scrap3)$p.value;
   Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1+6]>=0.5)+For8Pt0$T01EF,
                  subset=(For7Pt0$T01EF+For7Pt0$NotT1==1))
   FisherTab8$pValueALHEF[i1] = fisher.test(Scrap3)$p.value;
   Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1+6]>=0.5)+For8Pt0$T01NL,
                  subset=(For7Pt0$T01NL+For7Pt0$NotT1==1));
   FisherTab8$pValueALHNL[i1] = fisher.test(Scrap3)$p.value;
   Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1+6]>=0.5)+For8Pt0$T01nC,
                  subset=(For7Pt0$T01nC+For7Pt0$NotT1==1));
   FisherTab8$pValueALHnC[i1] = fisher.test(Scrap3)$p.value;
   i1 = i1 - 1;
}
FisherTab8$SigALOEF[FisherTab8$pValueALOEF<0.001] = "Reject";
FisherTab8$SigALHEF[FisherTab8$pValueALHEF<0.001] = "Reject";
FisherTab8$SigALONL[FisherTab8$pValueALONL<0.001] = "Reject";
FisherTab8$SigALHNL[FisherTab8$pValueALHNL<0.001] = "Reject";
FisherTab8$SigALOnC[FisherTab8$pValueALOnC<0.001] = "Reject";
FisherTab8$SigALHnC[FisherTab8$pValueALHnC<0.001] = "Reject";
Scrap4 <- FisherTab8;
   Scrap4[,2:7] = round(Scrap4[,2:7],digits=3);
Scrap4[,c(1,2:7)]
#                             pValue
#                       Sport ALOEF ALHEF ALONL ALHNL ALOnC ALHnC
# 50/50 (except Head-to-Head) 0.000 0.245 0.000 0.245 0.000 0.006
#                Head-to-Head 0.000 0.063 0.000 1.000 0.000 0.018
#                  Multiplier 0.000 0.179 0.001 0.179 0.000 0.179
#       League (except 50/50) 0.000 0.077 0.000 0.077 0.000 0.375
#            Move your way up 0.000 0.410 0.000 0.669 0.000 1.000
#              Other contests 0.023 0.017 0.023 0.410 0.023 0.000
Scrap4[,c(1,8:13)]
#                        Result
#                 Sport   ALOEF  ALHEF  ALONL  ALHNL  ALOnC  ALHnC
#    50/50 (except H2H)  Reject Accept Reject Accept Reject Accept
#          Head-to-Head  Reject Accept Reject Accept Reject Accept
#            Multiplier  Reject Accept Reject Accept Reject Accept
# League (except 50/50)  Reject Accept Reject Accept Reject Accept
#      Move your way up  Reject Accept Reject Accept Reject Accept
#        Other contests  Accept Accept Accept Accept Accept Reject