###################################################################### # Read in the data files. # # * After you download the data files, remember to change Folder so # # that R can find the files. # ###################################################################### # Read in Data1 with the codes for states and nations. # # * IDNumber is the integers used as ID numbers. # # -- "Unknown" corresponds to 0 or -1. # # * Name is the name of the state, province, or nation. # # -- DraftKings has more states, provinces, and nations in # # their codebook. The ones listed here are a shorter # # list for all the codes we need for this cohort. # # * Level tells us which name/number combinations are for # # states and which ones are for nations. # # -- For example, State #1 is Alabama, but Nation #1 is # # the U.S.A. # # Data1S will have the codes for states and provinces. # # * Data1N will have the codes for nations. # ############################################################ Folder <- "C:/Laboratory/DraftKings/Impacts/"; Data1 <- read.csv(file=paste0(Folder,"TacklingData1Codes.csv"), header=TRUE,stringsAsFactors=FALSE); Data1S <- subset(Data1,Data1$Level=="State"); Data1N <- subset(Data1,Data1$Level=="Nation"); ############################################################ # Read in the data for geographic location, birth year, # # and player risk score. # # * UserID are sequential numbers denoting the players in # # this cohort. The ID numbers for this package are only # # for this package. For example, UserID = 1 in this # # package does not necesarily denote the same player as # # UserID = 1 in the Social Behaviors package. # # * RegStateID lists who listed which state/province as # # their state/province of residence. # # * RegCountryID lists who listed which nation as # # their nation of residence. # # * BirthYear has the users' birth years, if known. # # -- If DraftKings did not provide a birth year, then # # BirthYear = 0. # # * RiskScore is the Player Risk Score, based on the # # contests the players entered during the 2014 NFL # # season (between 2014-08-22 and 2015-01-25). # # -- RiskScore = 0 for free contests. # # Data2All has all 12041 users. # # * Data2Cut has the 10385 users in the analytic sample. # ############################################################ Data2All <- read.csv(file=paste0(Folder,"TacklingData2Cohort.csv"), header=TRUE,stringsAsFactors=FALSE); Data2Cut <- subset(Data2All,!is.na(Data2All$RiskScore)); nD2A <- nrow(Data2All); nD2A nD2C <- nrow(Data2Cut); nD2C ############################################################ # Read in the metrics for players' entries into NFL-based # # contests, non-NFL-based contests, and all contests # # (Data3NFL, Data4Non, and Data5All, respectively). # # * Date1st is the start date for the first contest the # # user entered. # # * DateLst is defined differently based on whether the # # player has any activity after the 2014 NFL season. # # -- If the player has activity after the 2014 NFL season, # # then DateLst is 2015-01-25, the last date of # # DraftKings' 2014 NFL season. # # -- If the player only has activity during the 2014 NFL # # season, then DateLst is the start date of the # # player's last contest. # # * nDays is the number of unique contest start dates. # # * nCont is the number of contests the player entered. # # * nEntries is the total number of entries over the # # contests during the 2014 NFL season. # # * nLineups is the total number of lineups over the # # contests during the 2014 NFL season. # # * TotFees is the total amount spent on entry fees. # # * AvgBuyIn is the average entry fee over the contests # # the player entered. # # * TotWinnings is the player's total winnings. # # * nUserUp is the number of contests where the player # # made a profit (i.e., the amount won in prizes exceeds # # the amount spent on entry fees). # # * UserID is the ID number that links this data to the # # demographic data in Data2All and Data2Cut. # ############################################################ Data3NFL <- read.csv(file=paste0(Folder,"TacklingData3NFL.csv"), header=TRUE,stringsAsFactors=FALSE); Data4Not <- read.csv(file=paste0(Folder,"TacklingData4Not.csv"), header=TRUE,stringsAsFactors=FALSE); Data5All <- read.csv(file=paste0(Folder,"TacklingData5All.csv"), header=TRUE,stringsAsFactors=FALSE); ############################################################ # Read in data on the types of contests the players # # entered, both in terms of sport and payout structure. # # * UserID is the ID number that links this data to the # # demographic data in Data2All and Data2Cut. # # * DidNFL is TRUE if the player played in at least one # # NFL-based contest. # # * DidNBA is TRUE if the player played in at least one # # NBA-based contest. # # * DidOth is TRUE if the player played in at least one # # contest based on some other league. # # * Cnt1 has the numbers of 50/50 contests. # # * Cnt2 has the numbers of head-to-head contests. # # * Cnt3 has the numbers of multiplier contests. # # * Cnt4 has the numbers of league contests. # # * Cnt5 has the numbers of "move your way up" contests. # # * Cnt6 has the numbers of other contests. # ############################################################ Data6Play <- read.csv(file=paste0(Folder,"TacklingData6Play.csv"), header=TRUE,stringsAsFactors=FALSE); ###################################################################### # Get the results for the "Player Characteristics" section of the # # paper. # ###################################################################### # Among the initial sample of 12,041 daily fantasy sports # # players who deposited money with DraftKings between # # August 1st and September 30th, 2014, 86.2% (10,385) # # made at least one paid entry into an NFL contest during # # the 2014 NFL season. # ############################################################ round(100*nD2C/nD2A,digits=1) nD2C ############################################################ # In their records, DraftKings had birth years for 3,990 # # out of the 12,041, and 3,795 out of the 10,385. # # To the nearest tenth of a year, the ranges and mean of # # the birth years were the same for the 3,990 and the # # 3,795 -- 1932 to 1999, with a mean of 1980.5 and a # # standard deviation of 9.4 years. # ############################################################ Scrap1 <- data.frame(Cohort=rep(0,6),AnalyticSample=0); Scrap2 <- subset(Data2All,Data2All$BirthYear>0); Scrap3 <- subset(Scrap2,!is.na(Scrap2$RiskScore)); Scrap1[1,1] = nrow(Scrap2); Scrap1[2,1] = nD2A; Scrap1[3,1] = min(Scrap2$BirthYear); Scrap1[4,1] = max(Scrap2$BirthYear); Scrap1[5,1] = mean(Scrap2$BirthYear); Scrap1[6,1] = sd(Scrap2$BirthYear); Scrap1[1,2] = nrow(Scrap3); Scrap1[2,2] = nD2C; Scrap1[3,2] = min(Scrap3$BirthYear); Scrap1[4,2] = max(Scrap3$BirthYear); Scrap1[5,2] = mean(Scrap3$BirthYear); Scrap1[6,2] = sd(Scrap3$BirthYear); rownames(Scrap1) =c("Count","All","Min","Max","Mean","SD"); round(Scrap1,digits=1) # Cohort AnalyticSample #Count 3990.0 3795.0 #All 12041.0 10385.0 #Min 1932.0 1932.0 #Max 1999.0 1999.0 #Mean 1980.5 1980.5 #SD 9.4 9.4 ############################################################ # When they initially registered, players had the # # opportunity to list a country and a state. # # * All 12,041 players were from North America. # # * Almost all of the 12,041 listed either the United # # States or Canada. # # * Of the 10,385 in Data2Cut, 9,815 listed the United # # States, 180 listed Canada, # # and 390 did not provide a country. # # * Of the other 1,656, 1,534 listed the United States, # # 80 listed Canada, and 42 did not provide a country. # ############################################################ Scrap1 <- cbind(xtabs(rep(1,nD2C)~Data2Cut$RegCountryID), c(0,0,0), xtabs(rep(1,nD2A)~Data2All$RegCountryID)) Scrap1[1:3,2] = Scrap1[1:3,3] - Scrap1[1:3,1]; colnames(Scrap1) = c("InD2C","NotD2C","Total"); rownames(Scrap1) = c("None","USA","CAN"); Scrap1 <- Scrap1[c(2,3,1),]; Scrap1 # InD2C NotD2C Total # USA 9815 1534 11349 # CAN 180 80 260 # None 390 42 432 ############################################################ # Most listed the United States (11,349 out of the 12,041; # # 9,815 out of the 10,385). # # * They were distributed across 49 states (no one listed # # Montana), the District of Columbia, and armed forces # # stations in the Americas, Europe, and the Pacific. # # * Looking at the 9,815, the states with the most players # # were California (9.0%), Texas (9.0%), and New York # # (6.9%). # ############################################################ PlayUSA <- subset(Data2All,Data2All$RegCountryID==1) nUSA <- length(PlayUSA$UserID); nStates <- length(unique(PlayUSA$RegStateID)); Scrap1 <- xtabs(rep(1,nUSA)~(!is.na(PlayUSA$RiskScore)) +PlayUSA$RegStateID); StateTab <- data.frame(#Name=rep("a",nStates+1), IDNumber=rep(0,nStates+1), AnaInCnt=rep(0,nStates+1), AnaInPct=rep(0,nStates+1), AnaOutCnt=rep(0,nStates+1), AnaOutPct=rep(0,nStates+1), AnaTotCnt=rep(0,nStates+1), AnaTotPct=rep(0,nStates+1), stringsAsFactors=FALSE); StateTab$AnaInCnt[1:nStates] = Scrap1[2,]; StateTab$AnaOutCnt[1:nStates] = Scrap1[1,]; StateTab$IDNumber[1:nStates] = sort(unique(PlayUSA$RegStateID)); StateTab <- merge(StateTab,Data1S[,c("IDNumber","Name")], by=c("IDNumber"),all.x=TRUE); StateTab$Name[1] = "Total"; StateTab$AnaInCnt[1] = -1; StateTab <- StateTab[,c(8,2,3,4,5,6,7,1)]; StateTab <- StateTab[order(-StateTab$AnaInCnt),]; StateTab$AnaInCnt[nStates+1] = sum(StateTab$AnaInCnt[1:nStates]); StateTab$AnaOutCnt[nStates+1] = sum(StateTab$AnaOutCnt[1:nStates]); StateTab$AnaTotCnt = StateTab$AnaInCnt + StateTab$AnaOutCnt; StateTab$AnaInPct = 100 * StateTab$AnaInCnt / StateTab$AnaInCnt[nStates+1]; StateTab$AnaOutPct = 100 * StateTab$AnaOutCnt / StateTab$AnaOutCnt[nStates+1]; StateTab$AnaTotPct = 100 * StateTab$AnaTotCnt / StateTab$AnaTotCnt[nStates+1]; StateTab$AnaInPct = round(StateTab$AnaInPct,digits=1); StateTab$AnaOutPct = round(StateTab$AnaOutPct,digits=1); StateTab$AnaTotPct = round(StateTab$AnaTotPct,digits=1); StateTab # Ana Ana Ana Ana Ana Ana # Name InCnt InPct OutCnt OutPct TotCnt TotPct IDNumber # California 885 9.0 187 12.2 1072 9.4 5 # Texas 885 9.0 148 9.6 1033 9.1 44 # New York 680 6.9 104 6.8 784 6.9 33 # Massachusetts 558 5.7 61 4.0 619 5.5 22 # Florida 530 5.4 75 4.9 605 5.3 10 # Pennsylvania 523 5.3 63 4.1 586 5.2 39 # Illinois 511 5.2 81 5.3 592 5.2 14 # New Jersey 413 4.2 53 3.5 466 4.1 31 # Ohio 395 4.0 39 2.5 434 3.8 36 # Alabama 321 3.3 95 6.2 416 3.7 1 # Michigan 296 3.0 46 3.0 342 3.0 23 #ArmedForces:Americas 272 2.8 28 1.8 300 2.6 65 # Georgia 239 2.4 36 2.3 275 2.4 11 # Virginia 239 2.4 44 2.9 283 2.5 47 # Maryland 237 2.4 29 1.9 266 2.3 21 # North Carolina 218 2.2 28 1.8 246 2.2 34 # Colorado 215 2.2 31 2.0 246 2.2 6 # Minnesota 210 2.1 43 2.8 253 2.2 24 # Indiana 198 2.0 26 1.7 224 2.0 15 # Wisconsin 196 2.0 28 1.8 224 2.0 50 # Missouri 162 1.7 22 1.4 184 1.6 26 # Tennessee 156 1.6 22 1.4 178 1.6 43 # Kentucky 133 1.4 22 1.4 155 1.4 18 # Connecticut 132 1.3 18 1.2 150 1.3 7 # South Carolina 105 1.1 14 0.9 119 1.0 41 # Nevada 99 1.0 20 1.3 119 1.0 29 # Oklahoma 95 1.0 10 0.7 105 0.9 37 # Kansas 81 0.8 13 0.8 94 0.8 17 # Utah 71 0.7 7 0.5 78 0.7 45 # Oregon 70 0.7 29 1.9 99 0.9 38 # New Hampshire 68 0.7 7 0.5 75 0.7 30 # Arkansas 63 0.6 13 0.8 76 0.7 4 # Nebraska 58 0.6 12 0.8 70 0.6 28 # Rhode Island 54 0.6 6 0.4 60 0.5 40 # Mississippi 52 0.5 5 0.3 57 0.5 25 # North Dakota 51 0.5 8 0.5 59 0.5 35 # Hawaii 40 0.4 9 0.6 49 0.4 12 # Maine 40 0.4 4 0.3 44 0.4 20 # West Virginia 39 0.4 2 0.1 41 0.4 49 # New Mexico 38 0.4 6 0.4 44 0.4 32 # South Dakota 38 0.4 2 0.1 40 0.4 42 # Delaware 34 0.3 6 0.4 40 0.4 8 # Idaho 29 0.3 9 0.6 38 0.3 13 #District of Columbia 26 0.3 6 0.4 32 0.3 9 # Vermont 16 0.2 1 0.1 17 0.1 46 # Wyoming 16 0.2 0 0.0 16 0.1 51 # Alaska 14 0.1 3 0.2 17 0.1 2 # ArmedForces: Europe 7 0.1 1 0.1 8 0.1 66 #ArmedForces: Pacific 3 0.0 0 0.0 3 0.0 67 # Iowa 2 0.0 0 0.0 2 0.0 16 # Louisiana 1 0.0 4 0.3 5 0.0 19 # Washington 1 0.0 3 0.2 4 0.0 48 # Arizona 0 0.0 5 0.3 5 0.0 3 # Total 9815 100.0 1534 100.0 11349 100.0 0 ############################################################ # A few players listed Canada (260 out of the 12,041; # # 180 out of the 10,385). # ############################################################ PlayCAN <- subset(Data2All,Data2All$RegCountryID==2) nCAN <- length(PlayCAN$UserID); nProv <- length(unique(PlayCAN$RegStateID)); Scrap1 <- xtabs(rep(1,nCAN)~(!is.na(PlayCAN$RiskScore)) +PlayCAN$RegStateID); StateTab <- data.frame(IDNumber=rep(0,nProv+1), AnaInCnt=rep(0,nProv+1), AnaInPct=rep(0,nProv+1), AnaOutCnt=rep(0,nProv+1), AnaOutPct=rep(0,nProv+1), AnaTotCnt=rep(0,nProv+1), AnaTotPct=rep(0,nProv+1), stringsAsFactors=FALSE); StateTab$AnaInCnt[1:nProv] = Scrap1[2,]; StateTab$AnaOutCnt[1:nProv] = Scrap1[1,]; StateTab$IDNumber[1:nProv] = sort(unique(PlayCAN$RegStateID)); StateTab <- merge(StateTab,Data1S[,c("IDNumber","Name")], by=c("IDNumber"),all.x=TRUE); StateTab$Name[1] = "Total"; StateTab$AnaInCnt[1] = -1; StateTab <- StateTab[,c(8,2,3,4,5,6,7,1)]; StateTab <- StateTab[order(-StateTab$AnaInCnt),]; StateTab$AnaInCnt[nProv+1] = sum(StateTab$AnaInCnt[1:nProv]); StateTab$AnaOutCnt[nProv+1] = sum(StateTab$AnaOutCnt[1:nProv]); StateTab$AnaTotCnt = StateTab$AnaInCnt + StateTab$AnaOutCnt; StateTab$AnaInPct = 100 * StateTab$AnaInCnt / StateTab$AnaInCnt[nProv+1]; StateTab$AnaOutPct = 100 * StateTab$AnaOutCnt / StateTab$AnaOutCnt[nProv+1]; StateTab$AnaTotPct = 100 * StateTab$AnaTotCnt / StateTab$AnaTotCnt[nProv+1]; StateTab$AnaInPct = round(StateTab$AnaInPct,digits=1); StateTab$AnaOutPct = round(StateTab$AnaOutPct,digits=1); StateTab$AnaTotPct = round(StateTab$AnaTotPct,digits=1); StateTab # Ana Ana Ana AnA Ana Ana # Name InCnt InPct OutCnt OutPct TotCnt TotPct IDNumber # Ontario 82 45.6 47 58.8 129 49.6 60 #British Columbia 38 21.1 14 17.5 52 20.0 53 # Alberta 34 18.9 9 11.2 43 16.5 52 # Manitoba 10 5.6 0 0.0 10 3.8 54 # New Brunswick 6 3.3 1 1.2 7 2.7 55 # Nova Scotia 6 3.3 1 1.2 7 2.7 58 # Saskatchewan 4 2.2 7 8.8 11 4.2 63 # Quebec 0 0.0 1 1.2 1 0.4 62 # Total 180 100.0 80 100.0 260 100.0 0 ###################################################################### # We start with Data3NFL, Data4Not, and Data5All, and then calculate # # more metrics based on the data we read in. # # * Duration is DateLst minus Date1st plus 1. # # * Freq = 100*nDays/Duration is the percentage of days in Duration # # where at least one of the player's contests started. # # * ContPED = nCont/nDays is the number of contests per entry day. # # * EntPCont = nEntries/nCont is the average number of entries # # per contest. # # * FeesPCont = TotFees/nCont is the average amount spent on entry # # fees per contest. # # * LineupPE = nLineups/nEntries is the number of distinct lineups # # per entry. For most, it'll be 1.00. For others, it'll be less. # # * FeesPDay = TotFees/nDays is the average amount the player spends # # on a day he actually plays. # # * NetLoss = TotFees-TotWinnings is the total amount lost over the # # 2004 NFL season. # # * PctLoss = 100*NetLoss/TotFees tells us what percent of the total # # entry fees the player lost. # # * PctProfit = 100*nUserUp/nCont is the percentage of contests # # where the user made a profit (meaning that the total prizes # # were greater than the total amount paid in entry fees. # # Make Table 1 and Table 2. # ###################################################################### For1Pt1 <- data.frame(nDays=Data3NFL$nDays, Duration=difftime(time1=Data3NFL$DateLst, time2=Data3NFL$Date1st, units=c("days")), Freq=0, nCont=Data3NFL$nCont, ContPED=Data3NFL$nCont/Data3NFL$nDays, EntPCont=Data3NFL$nEntries/Data3NFL$nCont, FeesPCont=Data3NFL$TotFees/Data3NFL$nCont, AvgBuyIn=Data3NFL$AvgBuyIn, LineupPE=Data3NFL$nLineups/Data3NFL$nEntries, TotFees=Data3NFL$TotFees, FeesPDay=Data3NFL$TotFees/Data3NFL$nDays, NetLoss=Data3NFL$TotFees-Data3NFL$TotWinnings, PctLoss=0, PctProfit=100*Data3NFL$nUserUp/Data3NFL$nCont, UserID=Data3NFL$UserID, stringsAsFactors=FALSE); For1Pt1$Duration = round(as.numeric(For1Pt1$Duration),digits=0); For1Pt1$Duration = For1Pt1$Duration + 1; For1Pt1$Freq = 100 * For1Pt1$nDays / For1Pt1$Duration; For1Pt1$PctLoss = 100 * For1Pt1$NetLoss / For1Pt1$TotFees; Data4Not <- read.csv(file=paste0(Folder,"TacklingData4Not.csv"), header=TRUE,stringsAsFactors=FALSE); For1Pt2 <- data.frame(nDays=Data4Not$nDays, Duration=difftime(time1=Data4Not$DateLst, time2=Data4Not$Date1st, units=c("days")), Freq=0, nCont=Data4Not$nCont, ContPED=Data4Not$nCont/Data4Not$nDays, EntPCont=Data4Not$nEntries/Data4Not$nCont, FeesPCont=Data4Not$TotFees/Data4Not$nCont, AvgBuyIn=Data4Not$AvgBuyIn, LineupPE=Data4Not$nLineups/Data4Not$nEntries, TotFees=Data4Not$TotFees, FeesPDay=Data4Not$TotFees/Data4Not$nDays, NetLoss=Data4Not$TotFees-Data4Not$TotWinnings, PctLoss=0, PctProfit=100*Data4Not$nUserUp/Data4Not$nCont, UserID=Data4Not$UserID, stringsAsFactors=FALSE); For1Pt2$Duration = round(as.numeric(For1Pt2$Duration),digits=0); For1Pt2$Duration = For1Pt2$Duration + 1; For1Pt2$Freq = 100 * For1Pt2$nDays / For1Pt2$Duration; For1Pt2$PctLoss = 100 * For1Pt2$NetLoss / For1Pt2$TotFees; Data5All <- read.csv(file=paste0(Folder,"TacklingData5All.csv"), header=TRUE,stringsAsFactors=FALSE); For1Pt3 <- data.frame(nDays=Data5All$nDays, Duration=difftime(time1=Data5All$DateLst, time2=Data5All$Date1st, units=c("days")), Freq=0, nCont=Data5All$nCont, ContPED=Data5All$nCont/Data5All$nDays, EntPCont=Data5All$nEntries/Data5All$nCont, FeesPCont=Data5All$TotFees/Data5All$nCont, AvgBuyIn=Data5All$AvgBuyIn, LineupPE=Data5All$nLineups/Data5All$nEntries, TotFees=Data5All$TotFees, FeesPDay=Data5All$TotFees/Data5All$nDays, NetLoss=Data5All$TotFees-Data5All$TotWinnings, PctLoss=0, PctProfit=100*Data5All$nUserUp/Data5All$nCont, UserID=Data5All$UserID, stringsAsFactors=FALSE); For1Pt3$Duration = round(as.numeric(For1Pt3$Duration),digits=0); For1Pt3$Duration = For1Pt3$Duration + 1; For1Pt3$Freq = 100 * For1Pt3$nDays / For1Pt3$Duration; For1Pt3$PctLoss = 100 * For1Pt3$NetLoss / For1Pt3$TotFees; EpiTab1 <- data.frame(VarLab=c("Number of Active Days", "Duration -- Numbers of Days", "Percentage of Entry Days", "Number of Contests", "Contests Per Entry Day", "Entries Per Contest", "Entry Fees Per Contest", "Average Entry Fee Size", "Distinct Lineups Per Entry", "Total Entry Fees", "Entry Fees Per Day", "Net Loss", "Percent Lost", "Percent of Contests Won"), NFLMeans=rep(0,14), NFLSDs=rep(0,14), NFLP50s=rep(0,14), OthMeans=rep(0,14), OthSDs=rep(0,14), OthP50s=rep(0,14), AllMeans=rep(0,14), AllSDs=rep(0,14), AllP50s=rep(0,14), stringsAsFactors=FALSE); i1 <- 1; while (i1 <= 14) { EpiTab1$NFLMeans[i1] = mean(For1Pt1[,i1]); EpiTab1$NFLSDs[i1] = sd(For1Pt1[,i1]); EpiTab1$NFLP50s[i1] = median(For1Pt1[,i1]); EpiTab1$OthMeans[i1] = mean(For1Pt2[,i1],na.rm=TRUE); EpiTab1$OthSDs[i1] = sd(For1Pt2[,i1],na.rm=TRUE); EpiTab1$OthP50s[i1] = median(For1Pt2[,i1],na.rm=TRUE); EpiTab1$AllMeans[i1] = mean(For1Pt3[,i1]); EpiTab1$AllSDs[i1] = sd(For1Pt3[,i1]); EpiTab1$AllP50s[i1] = median(For1Pt3[,i1]); i1 = i1 + 1; } Scrap1 <- EpiTab1; Scrap1[,2:10] = round(Scrap1[,2:10],digits=1); Scrap1[,c(1,2,3,4)] # VarLab NFLMeans NFLSDs NFLP50s #1 Number of Active Days 11.4 9.7 8.0 #2 Duration -- Numbers of Days 109.3 47.1 133.0 #3 Percentage of Entry Days 16.2 21.0 10.6 #4 Number of Contests 35.4 82.8 16.0 #5 Contests Per Entry Day 2.7 3.3 2.0 #6 Entries Per Contest 1.2 0.6 1.0 #7 Entry Fees Per Contest 8.8 22.5 4.5 #8 Average Entry Fee Size 7.6 20.6 4.1 #9 Distinct Lineups Per Entry 1.0 0.1 1.0 #10 Total Entry Fees 369.5 3668.7 71.5 #11 Entry Fees Per Day 22.9 118.3 9.7 #12 Net Loss -25.1 9894.4 28.0 #13 Percent Lost 46.3 142.3 55.0 #14 Percent of Contests Won 18.9 16.6 17.6 Scrap1[,c(1,5,6,7)] # VarLab OthMeans OthSDs OthP50s #1 Number of Active Days 15.3 23.0 5.0 #2 Duration -- Numbers of Days 91.2 54.1 109.0 #3 Percentage of Entry Days 28.5 33.4 12.2 #4 Number of Contests 41.9 139.1 7.0 #5 Contests Per Entry Day 1.9 2.1 1.3 #6 Entries Per Contest 1.1 0.4 1.0 #7 Entry Fees Per Contest 5.3 15.3 2.1 #8 Average Entry Fee Size 4.8 13.3 2.0 #9 Distinct Lineups Per Entry 1.0 0.1 1.0 #10 Total Entry Fees 297.4 2057.5 19.0 #11 Entry Fees Per Day 10.7 33.3 3.3 #12 Net Loss 46.5 764.6 8.2 #13 Percent Lost 50.4 136.7 70.0 #14 Percent of Contests Won 15.2 19.5 10.7 Scrap1[,c(1,8,9,10)] # VarLab AllMeans AllSDs AllP50s #1 Number of Active Days 18.0 21.6 10.0 #2 Duration -- Numbers of Days 114.4 45.0 134.0 #3 Percentage of Entry Days 19.8 22.6 12.3 #4 Number of Contests 56.7 155.3 20.0 #5 Contests Per Entry Day 2.6 3.2 2.0 #6 Entries Per Contest 1.2 0.5 1.0 #7 Entry Fees Per Contest 8.4 21.9 4.3 #8 Average Entry Fee Size 7.3 20.1 4.0 #9 Distinct Lineups Per Entry 1.0 0.1 1.0 #10 Total Entry Fees 520.0 4154.0 87.0 #11 Entry Fees Per Day 21.4 115.6 9.2 #12 Net Loss -1.6 9895.9 30.7 #13 Percent Lost 46.5 124.3 53.0 #14 Percent of Contests Won 18.6 15.3 17.6 EpiTab2 = round(cor(For1Pt3[,1:14],method="spearman"),digits=2); EpiTab2[,1:7] # nDays Duration Freq nCont ContPED EntPCont FeesPCont #nDays 1.00 0.40 0.56 0.92 0.24 0.42 -0.01 #Duration 0.40 1.00 -0.23 0.36 0.05 0.13 -0.01 #Freq 0.56 -0.23 1.00 0.53 0.21 0.28 -0.03 #nCont 0.92 0.36 0.53 1.00 0.56 0.50 -0.06 #ContPED 0.24 0.05 0.21 0.56 1.00 0.39 -0.12 #EntPCont 0.42 0.13 0.28 0.50 0.39 1.00 0.08 #FeesPCont -0.01 -0.01 -0.03 -0.06 -0.12 0.08 1.00 #AvgBuyIn -0.06 -0.02 -0.06 -0.10 -0.15 -0.05 0.98 #LineupPE -0.26 -0.08 -0.19 -0.36 -0.37 -0.55 0.02 #TotFees 0.77 0.30 0.41 0.80 0.40 0.47 0.51 #FeesPDay 0.08 0.01 0.07 0.22 0.38 0.26 0.84 #NetLoss 0.47 0.17 0.25 0.50 0.27 0.34 0.42 #PctLoss -0.53 -0.23 -0.26 -0.53 -0.21 -0.22 -0.08 #PctProfit 0.39 0.19 0.14 0.38 0.13 0.10 0.18 EpiTab2[,8:14] # AvgBuyIn LineupPE TotFees FeesPDay NetLoss PctLoss PctProfit #nDays -0.06 -0.26 0.77 0.08 0.47 -0.53 0.39 #Duration -0.02 -0.08 0.30 0.01 0.17 -0.23 0.19 #Freq -0.06 -0.19 0.41 0.07 0.25 -0.26 0.14 #nCont -0.10 -0.36 0.80 0.22 0.50 -0.53 0.38 #ContPED -0.15 -0.37 0.40 0.38 0.27 -0.21 0.13 #EntPCont -0.05 -0.55 0.47 0.26 0.34 -0.22 0.10 #FeesPCont 0.98 0.02 0.51 0.84 0.42 -0.08 0.18 #AvgBuyIn 1.00 0.08 0.46 0.80 0.39 -0.06 0.17 #LineupPE 0.08 1.00 -0.29 -0.17 -0.20 0.13 -0.05 #TotFees 0.46 -0.29 1.00 0.67 0.69 -0.51 0.45 #FeesPDay 0.80 -0.17 0.67 1.00 0.52 -0.17 0.23 #NetLoss 0.39 -0.20 0.69 0.52 1.00 0.09 0.01 #PctLoss -0.06 0.13 -0.51 -0.17 0.09 1.00 -0.82 #PctProfit 0.17 -0.05 0.45 0.23 0.01 -0.82 1.00 ###################################################################### # Get the data on participation by sport league and by contest type. # ###################################################################### # Construct Table 3 with the leagues. # ############################################################ Data6Play <- read.csv(file=paste0(Folder,"TacklingData6Play.csv"), header=TRUE,stringsAsFactors=FALSE); Scrap1 <- Data6Play[,c(1:4)]; Scrap1$Group = 1 + 1*(Scrap1$DidNBA) + 2*(Scrap1$DidOth); EpiTab3 <- data.frame(Sport=c("NFL only","NFL & NBA only", "NFL & Other only","NFL & NBA & Other", "Total"), Count=0,Percentage=0, stringsAsFactors=FALSE); EpiTab3$Count[1:4] = xtabs(rep(1,nD2C)~Scrap1$Group); EpiTab3$Count[5] = nD2C; EpiTab3$Percentage[1:5] = 100 * EpiTab3$Count / nD2C; Scrap1 <- EpiTab3; Scrap1$Percentage <- round(Scrap1$Percentage,digits=1); Scrap1 # Sport Count Percentage # NFL only 5131 49.4 # NFL & NBA only 825 7.9 # NFL & Other only 2189 21.1 # NFL & NBA & Other 2240 21.6 # Total 10385 100.0 ############################################################ # Construct Table 4 with the payout structure types. # # * We were unable to recreate the exact numbers of users # # in the "Number with more than half of their contests # # within contest type" column. # # -- The differences were so small that the resulting # # percentage (rounded to the nearest tenth of a # # percentage point) did not change. # # -- The column should have said "at least half" rather # # than "more than half." # # Class Manuscript This Run # # 50/50 (except Head-to-Head) 727 724 # # Head-to-Head 395 397 # # Multiplier 238 234 # # League (except 50/50) 301 296 # # Move your way up 156 154 # # Other contests 6761 6757 # ############################################################ Scrap1 <- Data6Play[,c(1,5:10)]; Scrap1$CntT <- apply(Scrap1[,2:7],FUN=sum,1); Scrap1$CntM = apply(Scrap1[,2:6],1,max); Scrap1$Pct1 = Scrap1$Cnt1 / Scrap1$CntT; Scrap1$Pct2 = Scrap1$Cnt2 / Scrap1$CntT; Scrap1$Pct3 = Scrap1$Cnt3 / Scrap1$CntT; Scrap1$Pct4 = Scrap1$Cnt4 / Scrap1$CntT; Scrap1$Pct5 = Scrap1$Cnt5 / Scrap1$CntT; Scrap1$Pct6 = Scrap1$Cnt6 / Scrap1$CntT; Scrap1$Pref = 7; Scrap2 = 1.1 * (Scrap1$Cnt1==Scrap1$CntM); Scrap2 = Scrap2 + 2.1 * (Scrap1$Cnt2==Scrap1$CntM); Scrap2 = Scrap2 + 3.1 * (Scrap1$Cnt3==Scrap1$CntM); Scrap2 = Scrap2 + 4.1 * (Scrap1$Cnt4==Scrap1$CntM); Scrap2 = Scrap2 + 5.1 * (Scrap1$Cnt5==Scrap1$CntM); Scrap2 = Scrap2 + 6.1 * (Scrap1$Cnt6==Scrap1$CntM); Scrap1$Pref = Scrap1$Pref - 6*(Scrap2==1.1); Scrap1$Pref = Scrap1$Pref - 5*(Scrap2==2.1); Scrap1$Pref = Scrap1$Pref - 4*(Scrap2==3.1); Scrap1$Pref = Scrap1$Pref - 3*(Scrap2==4.1); Scrap1$Pref = Scrap1$Pref - 2*(Scrap2==5.1); Scrap1$Pref = Scrap1$Pref - 1*(Scrap2==6.1); EpiTab4 <- data.frame(Class=c("50/50 (except Head-to-Head)", "Head-to-Head", "Multiplier", "League (except 50/50)", "Move your way up", "Other contests"), AtLeastOnceCnt=rep(0,6), AtLeastOncePct=rep(0,6), AtLeastHalfCnt=rep(0,6), AtLeastHalfPct=rep(0,6), stringsAsFactors=FALSE); EpiTab4$AtLeastOnceCnt[1] = sum(Scrap1$Cnt1>0); EpiTab4$AtLeastOnceCnt[2] = sum(Scrap1$Cnt2>0); EpiTab4$AtLeastOnceCnt[3] = sum(Scrap1$Cnt3>0); EpiTab4$AtLeastOnceCnt[4] = sum(Scrap1$Cnt4>0); EpiTab4$AtLeastOnceCnt[5] = sum(Scrap1$Cnt5>0); EpiTab4$AtLeastOnceCnt[6] = sum(Scrap1$Cnt6>0); EpiTab4$AtLeastOncePct = 100 * EpiTab4$AtLeastOnceCnt / nD2C; EpiTab4$AtLeastHalfCnt[1] = sum(Scrap1$Pct1>=0.5); EpiTab4$AtLeastHalfCnt[2] = sum(Scrap1$Pct2>=0.5); EpiTab4$AtLeastHalfCnt[3] = sum(Scrap1$Pct3>=0.5); EpiTab4$AtLeastHalfCnt[4] = sum(Scrap1$Pct4>=0.5); EpiTab4$AtLeastHalfCnt[5] = sum(Scrap1$Pct5>=0.5); EpiTab4$AtLeastHalfCnt[6] = sum(Scrap1$Pct6>=0.5); EpiTab4$AtLeastHalfPct = 100 * EpiTab4$AtLeastHalfCnt / nD2C; Scrap3 <- EpiTab4; Scrap3[,2:5] = round(Scrap3[,2:5],digits=1); Scrap3 # AtLeast AtLeast AtLeast AtLeast # Class OnceCnt OncePct HalfCnt HalfPct #1 50/50 (except Head-to-Head) 6034 58.1 724 7.0 #2 Head-to-Head 3676 35.4 397 3.8 #3 Multiplier 5111 49.2 234 2.3 #4 League (except 50/50) 5367 51.7 296 2.9 #5 Move your way up 5861 56.4 154 1.5 #6 Other contests 9962 95.9 6757 65.1 ###################################################################### # Use Data5All to partition off and work with the groups of most # # involved players. # # * Make the Venn diagram with the information from Table 3. # ###################################################################### # Order the players in the analytic sample by TotFees, and # # then get the sums for a centile histogram plot. # ############################################################ Data5All <- Data5All[order(Data5All$TotFees),]; Scrap1 <- data.frame(Centile=ceiling(100*(1:nD2C)/nD2C), TotFees=Data5All$TotFees); Scrap2 <- xtabs(TotFees~Centile,data=Scrap1); plot(x=1:100,y=as.numeric(Scrap2), main=paste0("Fig. 1. Total entry fees by centile groups, ", "2014 NFL Season"), xlab="Percentile",ylab="Total paid in entry fees", pch="*",col="red"); ############################################################ # Create the data set for the last few tables. # # * Merge For1Pt3 and Data2Cut$RiskScore. # # * Create columns T01nC, T01NL, and T01EF to note who is # # in the top 1% by nCont, NetLoss, and TotFees, # # respectively. # ############################################################ nTop01 <- ceiling(nD2C/100); nBot99 <- nD2C - nTop01; For5Pt0 <- merge(For1Pt3,Data2Cut[,c("UserID","RiskScore")], all.x=TRUE,all.y=TRUE,by=c("UserID")); For5Pt0 <- For5Pt0[order(-For5Pt0$nCont),]; For5Pt0$T01nC <- rep(c(1,0),c(nTop01,nBot99)); For5Pt0 <- For5Pt0[order(-For5Pt0$NetLoss),]; For5Pt0$T01NL <- rep(c(1,0),c(nTop01,nBot99)); For5Pt0 <- For5Pt0[order(-For5Pt0$TotFees),]; For5Pt0$T01EF <- rep(c(1,0),c(nTop01,nBot99)); For5Pt0$NotT1 <- 1*(For5Pt0$T01EF+For5Pt0$T01NL+For5Pt0$T01nC==0); ################################################## # Create a Venn diagram that shows the overlap # # between the three Top 1% group and the group # 3 of remaining players. # ################################################## For5Pt6 <- xtabs(rep(1,nD2C)~T01EF+T01NL+T01nC,data=For5Pt0); nNotT1 <- For5Pt6[1,1,1]; For5Pt6 # T01nC = 0 T01nC = 1 # T01NL T01NL #T01EF 0 1 T01EF 0 1 # 0 10167 44 0 64 6 # 1 29 41 1 21 13 ######################################## # Start with some preparation. # ######################################## Space1 <- 5; Space2 <- 0.1; Space3 <- 1 - Space2; For5Pt1 <- data.frame(x=rep(0,6),y=rep(0,6)); For5Pt1$y[1] = 0; For5Pt1$x[2] = 0; For5Pt1$y[6] = sqrt(2*nTop01-For5Pt6[1,2,2]-For5Pt6[2,2,2]); For5Pt1$y[6] = For5Pt1$y[6] * sqrt(2); For5Pt1$x[4] = For5Pt1$y[6] / 2; For5Pt1$y[3] = (For5Pt6[1,1,2]+For5Pt6[2,1,2]) / For5Pt1$x[4]; For5Pt1$y[4] = For5Pt1$y[6] - For5Pt1$y[3]; For5Pt1$x[3] = For5Pt6[2,2,2] / (For5Pt1$y[4]-For5Pt1$y[3]); For5Pt1$y[2] = For5Pt1$y[3] - For5Pt6[2,1,2]/For5Pt1$x[3]; For5Pt1$y[5] = For5Pt1$y[4] + For5Pt6[2,2,1]/For5Pt1$x[3]; For5Pt1$x[1] = -For5Pt6[2,1,1] / (For5Pt1$y[5]-For5Pt1$y[2]); For5Pt2 <- For5Pt1; For5Pt2$x = For5Pt2$x - Space1 - 0.50*(For5Pt1$x[4]-For5Pt1$x[1]); For5Pt3 <- For5Pt1; For5Pt3$x = For5Pt3$x + Space1 + 0.50*(For5Pt1$x[4]-For5Pt1$x[1]); For5Pt3$y = For5Pt3$y + Space1 + 0.25*(For5Pt1$y[6]-For5Pt1$y[1]); For5Pt4 <- For5Pt1; For5Pt4$x = For5Pt4$x + Space1 + 0.50*(For5Pt1$x[4]-For5Pt1$x[1]); For5Pt4$y = For5Pt4$y - Space1 - 0.25*(For5Pt1$y[6]-For5Pt1$y[1]); For5Pt5 <- data.frame(x0=rep(0,3),x1=rep(0,3), y0=rep(0,3),y1=rep(0,3), xt=rep(0,3),yt=rep(0,3),pos=c(4,4,4)); For5Pt5$x0[1] = For5Pt1$x[1]; For5Pt5$x1[1] = For5Pt2$x[3]; For5Pt5$y0[1] = 0.5*(For5Pt1$y[1]+For5Pt2$y[6]); For5Pt5$y1[1] = 0.5*(For5Pt1$y[1]+For5Pt2$y[6]); For5Pt5$x0[2] = 0.5*(For5Pt1$x[1]+For5Pt1$x[4]); For5Pt5$x1[2] = For5Pt3$x[2]; For5Pt5$y0[2] = For5Pt1$y[6]; For5Pt5$y1[2] = 0.5*(For5Pt3$y[4]+For5Pt3$y[5]); For5Pt5$x0[3] = 0.5*(For5Pt1$x[1]+For5Pt1$x[4]); For5Pt5$x1[3] = For5Pt4$x[2]; For5Pt5$y0[3] = For5Pt1$y[1]; For5Pt5$y1[3] = For5Pt4$y[2]; For5Pt5$xt[1] = For5Pt2$x[1]; For5Pt5$xt[2] = For5Pt3$x[2]; For5Pt5$xt[3] = For5Pt4$x[2]; For5Pt5$yt[1] = For5Pt2$y[5] + 0.50*(For5Pt2$y[6]-For5Pt2$y[5]); For5Pt5$yt[2] = For5Pt3$y[2] + 0.40*(For5Pt3$y[3]-For5Pt3$y[2]); For5Pt5$yt[3] = For5Pt4$y[4] + 0.25*(For5Pt4$y[5]-For5Pt4$y[4]); LabText <- c("Top 1% by\nTotal Entry Fees,\nn = 104", "Top 1% by\nNet Loss,\nn = 104", "Top 1% by\nNumber of Contests,\nn = 104"); WallL <- min(For5Pt1$x,For5Pt2$x,For5Pt3$x,For5Pt4$x) - 0.00*Space1; WallR <- max(For5Pt1$x,For5Pt2$x,For5Pt3$x,For5Pt4$x) + 0.00*Space1; WallB <- min(For5Pt1$y,For5Pt2$y,For5Pt3$y,For5Pt4$y) - 0.00*Space1; WallT <- max(For5Pt1$y,For5Pt2$y,For5Pt3$y,For5Pt4$y) + 0.00*Space1; ######################################## # Prepare the canvas. # ######################################## par(mar=c(0,0,0,0)); plot(x=c(For5Pt1$x[2],For5Pt1$x[3]), y=c(For5Pt1$y[2],For5Pt1$y[2]),type="l", xlab=" ",xaxt="n",xlim=c(WallL,WallR), ylab=" ",yaxt="n",ylim=c(WallB,WallT) ); ######################################## # Draw the main Venn diagram. # ######################################## polygon(x=c(For5Pt1$x[1],For5Pt1$x[1],For5Pt1$x[3],For5Pt1$x[3]), y=c(For5Pt1$y[2],For5Pt1$y[5],For5Pt1$y[5],For5Pt1$y[2]), ); polygon(x=c(For5Pt1$x[2],For5Pt1$x[2],For5Pt1$x[4],For5Pt1$x[4]), y=c(For5Pt1$y[3],For5Pt1$y[6],For5Pt1$y[6],For5Pt1$y[3]), ); polygon(x=c(For5Pt1$x[2],For5Pt1$x[2],For5Pt1$x[4],For5Pt1$x[4]), y=c(For5Pt1$y[1],For5Pt1$y[4],For5Pt1$y[4],For5Pt1$y[1]), ); text(lab=paste(For5Pt6[1,1,2]," (", round(100*For5Pt6[1,1,2]/(nD2C-For5Pt6[1,1,1]), digits=1), "%)",sep=""), x=0.5*(For5Pt1$x[2]+For5Pt1$x[4]), y=0.5*(For5Pt1$y[1]+For5Pt1$y[2])); text(lab=paste(For5Pt6[1,2,1]," (", round(100*For5Pt6[1,2,1]/(nD2C-For5Pt6[1,1,1]), digits=1), "%)",sep=""), x=0.5*(For5Pt1$x[2]+For5Pt1$x[4]), y=0.5*(For5Pt1$y[5]+For5Pt1$y[6])); text(lab=paste(For5Pt6[1,2,2]," (", round(100*For5Pt6[1,2,2]/(nD2C-For5Pt6[1,1,1]), digits=1), "%)",sep=""), x=0.5*(For5Pt1$x[3]+For5Pt1$x[4]), y=0.5*(For5Pt1$y[3]+For5Pt1$y[4])); text(lab=paste(For5Pt6[2,1,1],"\n(", round(100*For5Pt6[2,1,1]/(nD2C-For5Pt6[1,1,1]), digits=1), "%)",sep=""), x=0.5*(For5Pt1$x[1]+For5Pt1$x[2]), y=0.5*(For5Pt1$y[5]+For5Pt1$y[2])); text(lab=paste(For5Pt6[2,1,2]," (", round(100*For5Pt6[2,1,2]/(nD2C-For5Pt6[1,1,1]), digits=1), "%)",sep=""), x=0.5*(For5Pt1$x[2]+For5Pt1$x[3]), y=0.5*(For5Pt1$y[2]+For5Pt1$y[3])); text(lab=paste(For5Pt6[2,2,1]," (", round(100*For5Pt6[2,2,1]/(nD2C-For5Pt6[1,1,1]), digits=1), "%)",sep=""), x=0.5*(For5Pt1$x[2]+For5Pt1$x[3]), y=0.5*(For5Pt1$y[4]+For5Pt1$y[5])); text(lab=paste(For5Pt6[2,2,2]," (", round(100*For5Pt6[2,2,2]/(nD2C-For5Pt6[1,1,1]), digits=1), ".0%)",sep=""), x=0.5*(For5Pt1$x[2]+For5Pt1$x[3]), y=0.5*(For5Pt1$y[3]+For5Pt1$y[4])); ######################################## # Add the T01EF part. # ######################################## polygon(x=c(For5Pt2$x[1],For5Pt2$x[1],For5Pt2$x[3],For5Pt2$x[3]), y=c(For5Pt2$y[2],For5Pt2$y[5],For5Pt2$y[5],For5Pt2$y[2]), ); polygon(x=c(For5Pt2$x[2],For5Pt2$x[2],For5Pt2$x[3],For5Pt2$x[3]), y=c(For5Pt2$y[3],For5Pt2$y[5],For5Pt2$y[5],For5Pt2$y[3]), ); polygon(x=c(For5Pt2$x[2],For5Pt2$x[2],For5Pt2$x[3],For5Pt2$x[3]), y=c(For5Pt2$y[2],For5Pt2$y[4],For5Pt2$y[4],For5Pt2$y[2]), ); text(lab=paste(For5Pt6[2,1,1],"\n(", round(100*For5Pt6[2,1,1]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt2$x[1]+For5Pt2$x[2]), y=0.5*(For5Pt2$y[5]+For5Pt2$y[2])); text(lab=paste(For5Pt6[2,1,2]," (", round(100*For5Pt6[2,1,2]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]), y=0.5*(For5Pt2$y[2]+For5Pt2$y[3])); text(lab=paste(For5Pt6[2,2,1]," (", round(100*For5Pt6[2,2,1]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]), y=0.5*(For5Pt2$y[4]+For5Pt2$y[5])); text(lab=paste(For5Pt6[2,2,2]," (", round(100*For5Pt6[2,2,2]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]), y=0.5*(For5Pt2$y[3]+For5Pt2$y[4])); #text(lab=For5Pt6[2,1,1],x=0.5*(For5Pt2$x[1]+For5Pt2$x[2]), # y=0.5*(For5Pt2$y[5]+For5Pt2$y[2])); #text(lab=For5Pt6[2,1,2],x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]), # y=0.5*(For5Pt2$y[2]+For5Pt2$y[3])); #text(lab=For5Pt6[2,2,1],x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]), # y=0.5*(For5Pt2$y[4]+For5Pt2$y[5])); #text(lab=For5Pt6[2,2,2],x=0.5*(For5Pt2$x[2]+For5Pt2$x[3]), # y=0.5*(For5Pt2$y[3]+For5Pt2$y[4])); ######################################## # Add the T01NL part. # ######################################## polygon(x=c(For5Pt3$x[2],For5Pt3$x[2],For5Pt3$x[3],For5Pt3$x[3]), y=c(For5Pt3$y[3],For5Pt3$y[5],For5Pt3$y[5],For5Pt3$y[3]), ); polygon(x=c(For5Pt3$x[2],For5Pt3$x[2],For5Pt3$x[4],For5Pt3$x[4]), y=c(For5Pt3$y[3],For5Pt3$y[6],For5Pt3$y[6],For5Pt3$y[3]), ); polygon(x=c(For5Pt3$x[2],For5Pt3$x[2],For5Pt3$x[4],For5Pt3$x[4]), y=c(For5Pt3$y[3],For5Pt3$y[4],For5Pt3$y[4],For5Pt3$y[3]), ); text(lab=paste(For5Pt6[1,2,1]," (", round(100*For5Pt6[1,2,1]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt3$x[2]+For5Pt3$x[4]), y=0.5*(For5Pt3$y[5]+For5Pt3$y[6])); text(lab=paste(For5Pt6[1,2,2]," (", round(100*For5Pt6[1,2,2]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt3$x[3]+For5Pt3$x[4]), y=0.5*(For5Pt3$y[3]+For5Pt3$y[4])); text(lab=paste(For5Pt6[2,2,1]," (", round(100*For5Pt6[2,2,1]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt3$x[2]+For5Pt3$x[3]), y=0.5*(For5Pt3$y[4]+For5Pt3$y[5])); text(lab=paste(For5Pt6[2,2,2]," (", round(100*For5Pt6[2,2,2]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt3$x[2]+For5Pt3$x[3]), y=0.5*(For5Pt3$y[3]+For5Pt3$y[4])); #text(lab=For5Pt6[1,2,1],x=0.5*(For5Pt3$x[2]+For5Pt3$x[4]), # y=0.5*(For5Pt3$y[5]+For5Pt3$y[6])); #text(lab=For5Pt6[1,2,2],x=0.5*(For5Pt3$x[3]+For5Pt3$x[4]), # y=0.5*(For5Pt3$y[3]+For5Pt3$y[4])); #text(lab=For5Pt6[2,2,1],x=0.5*(For5Pt3$x[2]+For5Pt3$x[3]), # y=0.5*(For5Pt3$y[4]+For5Pt3$y[5])); #text(lab=For5Pt6[2,2,2],x=0.5*(For5Pt3$x[2]+For5Pt3$x[3]), # y=0.5*(For5Pt3$y[3]+For5Pt3$y[4])); ######################################## # Add the T01nC part. # ######################################## polygon(x=c(For5Pt4$x[2],For5Pt4$x[2],For5Pt4$x[3],For5Pt4$x[3]), y=c(For5Pt4$y[2],For5Pt4$y[4],For5Pt4$y[4],For5Pt4$y[2]), ); polygon(x=c(For5Pt4$x[2],For5Pt4$x[2],For5Pt4$x[4],For5Pt4$x[4]), y=c(For5Pt4$y[3],For5Pt4$y[4],For5Pt4$y[4],For5Pt4$y[3]), ); polygon(x=c(For5Pt4$x[2],For5Pt4$x[2],For5Pt4$x[4],For5Pt4$x[4]), y=c(For5Pt4$y[1],For5Pt4$y[4],For5Pt4$y[4],For5Pt4$y[1]), ); text(lab=paste(For5Pt6[1,1,2]," (", round(100*For5Pt6[1,1,2]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt4$x[2]+For5Pt4$x[4]), y=0.5*(For5Pt4$y[1]+For5Pt4$y[2])); text(lab=paste(For5Pt6[1,2,2]," (", round(100*For5Pt6[1,2,2]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt4$x[3]+For5Pt4$x[4]), y=0.5*(For5Pt4$y[3]+For5Pt4$y[4])); text(lab=paste(For5Pt6[2,1,2]," (", round(100*For5Pt6[2,1,2]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt4$x[2]+For5Pt4$x[3]), y=0.5*(For5Pt4$y[2]+For5Pt4$y[3])); text(lab=paste(For5Pt6[2,2,2]," (", round(100*For5Pt6[2,2,2]/nTop01,digits=1), "%)",sep=""), x=0.5*(For5Pt4$x[2]+For5Pt4$x[3]), y=0.5*(For5Pt4$y[3]+For5Pt4$y[4])); #text(lab=For5Pt6[1,1,2],x=0.5*(For5Pt4$x[2]+For5Pt4$x[4]), # y=0.5*(For5Pt4$y[1]+For5Pt4$y[2])); #text(lab=For5Pt6[1,2,2],x=0.5*(For5Pt4$x[3]+For5Pt4$x[4]), # y=0.5*(For5Pt4$y[3]+For5Pt4$y[4])); #text(lab=For5Pt6[2,1,2],x=0.5*(For5Pt4$x[2]+For5Pt4$x[3]), # y=0.5*(For5Pt4$y[2]+For5Pt4$y[3])); #text(lab=For5Pt6[2,2,2],x=0.5*(For5Pt4$x[2]+For5Pt4$x[3]), # y=0.5*(For5Pt4$y[3]+For5Pt4$y[4])); ######################################## # Add arrows and labels. # ######################################## i1 <- 1; while (i1 <= 3) { text(lab=LabText[i1],pos=For5Pt5$pos[i1], x=For5Pt5$xt[i1],y=For5Pt5$yt[i1]); arrows(x0=For5Pt5$x0[i1]+(For5Pt5$x1[i1]-For5Pt5$x0[i1])*Space2, x1=For5Pt5$x0[i1]+(For5Pt5$x1[i1]-For5Pt5$x0[i1])*Space3, y0=For5Pt5$y0[i1]+(For5Pt5$y1[i1]-For5Pt5$y0[i1])*Space2, y1=For5Pt5$y0[i1]+(For5Pt5$y1[i1]-For5Pt5$y0[i1])*Space3, length=0.05); i1 = i1 + 1; } text(x=For5Pt1$x[1],y=For5Pt1$y[6]+0.40*(For5Pt1$y[6]-For5Pt1$y[5]), lab="All three Top 1% groups,\nn = 218",pos=4); par(mar=c(5.1,4.1,4.1,2.1)); ###################################################################### # Table 6 looks at the same variables as Table 1, only for each of # # the 1% groups and for those majority that didn't fall into any of # # the 1% groups. # # * That's means, standard deviations, and medians for all of the # # measures of interest listed in the Methods section. # ###################################################################### For5Pt0 <- For5Pt0[,c("nDays","Duration","Freq","nCont","ContPED", "EntPCont","FeesPCont","AvgBuyIn","LineupPE", "TotFees","FeesPDay","NetLoss","PctLoss", "PctProfit","RiskScore", "UserID","T01EF","T01NL","T01nC","NotT1")]; For6Pt1 <- subset(For5Pt0,For5Pt0$T01EF==1); For6Pt2 <- subset(For5Pt0,For5Pt0$T01NL==1); For6Pt3 <- subset(For5Pt0,For5Pt0$T01nC==1); For6Pt4 <- subset(For5Pt0,For5Pt0$NotT1==1); ############################################################ # Construct Table 6. # # * THERE ARE DISCREPANCIES WITH THE MANUSCRIPT. # # * Some of the numbers in the "Top 1% on number of # # contests" section are not exactly equal to what is in # # the manuscript. # # Manuscript This Run # # Mean SD Med Mean SD Med # # EntryFees 11207.8 34771.8 3617.9 11205.9 34772.3 3617.9 # # E.F.P.D. 208.1 1003.8 45.0 208.0 1003.8 45.0 # # Net Loss 554.6 4998.5 271.1 553.8 4998.6 264.1 # # Pct. Lost 10.7 53.5 15.9 10.6 53.5 15.9 # # Pct. Won 29.3 15.0 26.0 29.3 14.9 26.0 # # * Two of the numbers in the "Remainder of the player # # pool" section don't exactly match what is in the # # manuscript. # # Manuscript This Run # # Mean SD Med Mean SD Med # # Number of Active Days 16.9 19.8 10.0 16.9 19.7 10.0 # # Total Entry Fees 278.7 560.1 83.3 278.7 560.1 83.2 # ############################################################ EpiTab6 <- data.frame(VarLab=c("Number of Active Days", "Duration -- Numbers of Days", "Percentage of Entry Days", "Number of Contests", "Contests Per Entry Day", "Entries Per Contest", "Entry Fees Per Contest", "Average Entry Fee Size", "Distinct Lineups Per Entry", "Total Entry Fees", "Entry Fees Per Day", "Net Loss", "Percent Lost", "Percent of Contests Won", "Player Risk Score"), T01EFMeans=0,T01EFSDs=0,T01EFP50s=0, T01NLMeans=0,T01NLSDs=0,T01NLP50s=0, T01nCMeans=0,T01nCSDs=0,T01nCP50s=0, NotT1Means=0,NotT1SDs=0,NotT1P50s=0, stringsAsFactors=FALSE); i1 <- nrow(EpiTab6); while (i1 > 0) { EpiTab6$T01EFMeans[i1] = mean(For6Pt1[,i1]); EpiTab6$T01EFSDs[i1] = sd(For6Pt1[,i1]); EpiTab6$T01EFP50s[i1] = median(For6Pt1[,i1]); EpiTab6$T01NLMeans[i1] = mean(For6Pt2[,i1]); EpiTab6$T01NLSDs[i1] = sd(For6Pt2[,i1]); EpiTab6$T01NLP50s[i1] = median(For6Pt2[,i1]); EpiTab6$T01nCMeans[i1] = mean(For6Pt3[,i1]); EpiTab6$T01nCSDs[i1] = sd(For6Pt3[,i1]); EpiTab6$T01nCP50s[i1] = median(For6Pt3[,i1]); EpiTab6$NotT1Means[i1] = mean(For6Pt4[,i1]); EpiTab6$NotT1SDs[i1] = sd(For6Pt4[,i1]); EpiTab6$NotT1P50s[i1] = median(For6Pt4[,i1]); i1 = i1 - 1; } Scrap1 <- EpiTab6[,c(1,2:4)]; Scrap1[,c(2:4)] = round(Scrap1[,c(2:4)],digits=1); Scrap1 # VarLab T01EFMeans T01EFSDs T01EFP50s # Number of Active Days 70.3 36.1 67.5 # Duration -- Numbers of Days 139.7 11.7 141.0 # Percentage of Entry Days 50.1 24.5 49.2 # Number of Contests 651.7 967.1 320.5 # Contests Per Entry Day 10.3 16.3 4.6 # Entries Per Contest 2.5 2.4 1.6 # Entry Fees Per Contest 81.0 150.8 49.3 # Average Entry Fee Size 59.8 146.8 22.2 # Distinct Lineups Per Entry 0.9 0.2 1.0 # Total Entry Fees 21281.6 35457.8 11692.9 # Entry Fees Per Day 434.9 1043.3 207.5 # Net Loss -7494.0 97876.0 1791.9 # Percent Lost -72.0 840.7 19.2 # Percent of Contests Won 28.5 12.2 26.3 # Player Risk Score 10.8 7.6 9.7 Scrap1 <- EpiTab6[,c(1,5:7)]; Scrap1[,c(2:4)] = round(Scrap1[,c(2:4)],digits=1); Scrap1 # VarLab T01NLMeans T01NLSDs T01NLP50s # Number of Active Days 58.4 34.6 52.0 # Duration -- Numbers of Days 137.3 14.3 141.0 # Percentage of Entry Days 42.3 24.0 37.4 # Number of Contests 380.4 718.5 191.0 # Contests Per Entry Day 5.8 7.7 3.6 # Entries Per Contest 2.2 1.7 1.5 # Entry Fees Per Contest 82.6 161.5 42.9 # Average Entry Fee Size 62.0 158.1 24.9 # Distinct Lineups Per Entry 0.9 0.1 1.0 # Total Entry Fees 13036.5 16457.2 6375.0 # Entry Fees Per Day 284.5 411.9 140.8 # Net Loss 4475.9 4748.1 2667.5 # Percent Lost 43.5 17.2 42.1 # Percent of Contests Won 22.9 9.4 20.4 # Player Risk Score 14.8 10.7 13.5 Scrap1 <- EpiTab6[,c(1,8:10)]; Scrap1[,c(2:4)] = round(Scrap1[,c(2:4)],digits=1); Scrap1 # VarLab T01nCMeans T01nCSDs T01nCP50s # Number of Active Days 85.8 33.7 87.0 # Duration -- Numbers of Days 138.1 14.4 141.0 # Percentage of Entry Days 62.2 23.1 63.6 # Number of Contests 1091.9 927.7 729.5 # Contests Per Entry Day 15.6 16.2 9.9 # Entries Per Contest 1.6 1.0 1.3 # Entry Fees Per Contest 8.3 13.1 3.8 # Average Entry Fee Size 6.2 11.8 2.7 # Distinct Lineups Per Entry 0.9 0.1 1.0 # Total Entry Fees 11205.9 34772.3 3617.9 # Entry Fees Per Day 208.0 1003.8 45.0 # Net Loss 553.8 4998.6 264.1 # Percent Lost 10.6 53.5 15.9 # Percent of Contests Won 29.3 14.9 26.0 # Player Risk Score 5.2 2.9 4.9 Scrap1 <- EpiTab6[,c(1,11:13)]; Scrap1[,c(2:4)] = round(Scrap1[,c(2:4)],digits=1); Scrap1 #The median for Entry Fees Per Contest is slightly off. # VarLab NotT1Means NotT1SDs NotT1P50s # Number of Active Days 16.9 19.7 10.0 # Duration -- Numbers of Days 113.9 45.3 134.0 # Percentage of Entry Days 19.1 22.1 11.9 # Number of Contests 44.5 66.4 19.0 # Contests Per Entry Day 2.5 2.4 2.0 # Entries Per Contest 1.2 0.4 1.0 # Entry Fees Per Contest 7.4 12.1 4.2 # Average Entry Fee Size 6.6 10.7 3.9 # Distinct Lineups Per Entry 1.0 0.1 1.0 # Total Entry Fees 278.7 560.1 83.2 # Entry Fees Per Day 16.5 27.1 8.9 # Net Loss 64.4 1523.6 30.0 # Percent Lost 47.9 91.9 53.8 # Percent of Contests Won 18.5 15.3 17.5 # Player Risk Score 12.5 12.1 9.7 ############################################################ # Perform the Mann-Whitney U-tests comparing each of the # # 1% groups to the NotT1 group. # # * We used an alpha of 0.001. # # * THERE ARE DISCREPANCIES WITH THE MANUSCRIPT. # # -- The median of the risk scores for the T01EF was not # # significantly different from the median of the risk # # scores for the NotT1 group (Both are listed as 9.7.). # # -- The median of the risk score for the T01NL group is # # BARELY significantly different from that of the NotT1 # # group (p = 0.0008675694). # ############################################################ Scrap1 <- data.frame(VarLab=c("Number of Active Days", "Duration -- Numbers of Days", "Percentage of Entry Days", "Number of Contests", "Contests Per Entry Day", "Entries Per Contest", "Entry Fees Per Contest", "Average Entry Fee Size", "Distinct Lineups Per Entry", "Total Entry Fees", "Entry Fees Per Day", "Net Loss", "Percent Lost", "Percent of Contests Won", "Player Risk Score"), pEF=0,pNL=0,pnC=0, SigEF="Accept",SigNL="Accept",SignC="Accept", stringsAsFactors=FALSE); i1 <- nrow(Scrap1); while (i1 > 0) { Scrap2 <- data.frame(Measure=c(For6Pt1[,i1],For6Pt4[,i1]), Group=rep(c("Top01","NotT1"), c(nTop01,nNotT1)), stringsAsFactors=FALSE); Scrap3 <- wilcox.test(Measure~Group,data=Scrap2); Scrap1[i1,2] = Scrap3$p.value; Scrap2 <- data.frame(Measure=c(For6Pt2[,i1],For6Pt4[,i1]), Group=rep(c("Top01","NotT1"), c(nTop01,nNotT1)), stringsAsFactors=FALSE); Scrap3 <- wilcox.test(Measure~Group,data=Scrap2); Scrap1[i1,3] = Scrap3$p.value; Scrap2 <- data.frame(Measure=c(For6Pt3[,i1],For6Pt4[,i1]), Group=rep(c("Top01","NotT1"), c(nTop01,nNotT1)), stringsAsFactors=FALSE); Scrap3 <- wilcox.test(Measure~Group,data=Scrap2); Scrap1[i1,4] = Scrap3$p.value; i1 = i1 - 1; } Scrap1$SigEF[Scrap1$pEF<0.001] = "Reject"; Scrap1$SigNL[Scrap1$pNL<0.001] = "Reject"; Scrap1$SignC[Scrap1$pnC<0.001] = "Reject"; Scrap4 <- Scrap1; Scrap4[,2:4] = round(Scrap4[,2:4],digits=3); Scrap4 # VarLab pEF pNL pnC SigEF SigNL SignC # Number of Active Days 0.000 0.000 0.000 Reject Reject Reject # Duration -- Numbers of Days 0.000 0.000 0.000 Reject Reject Reject # Percentage of Entry Days 0.000 0.000 0.000 Reject Reject Reject # Number of Contests 0.000 0.000 0.000 Reject Reject Reject # Contests Per Entry Day 0.000 0.000 0.000 Reject Reject Reject # Entries Per Contest 0.000 0.000 0.000 Reject Reject Reject # Entry Fees Per Contest 0.000 0.000 0.303 Reject Reject Accept # Average Entry Fee Size 0.000 0.000 0.001 Reject Reject Accept # Distinct Lineups Per Entry 0.000 0.000 0.000 Reject Reject Reject # Total Entry Fees 0.000 0.000 0.000 Reject Reject Reject # Entry Fees Per Day 0.000 0.000 0.000 Reject Reject Reject # Net Loss 0.000 0.000 0.000 Reject Reject Reject # Percent Lost 0.000 0.000 0.000 Reject Reject Reject # Percent of Contests Won 0.000 0.000 0.000 Reject Reject Reject # Player Risk Score 0.579 0.001 0.000 Accept Reject Reject ###################################################################### # Table 7 looks at the same variables as Table 3, only for each of # # the 1% groups and for those majority that didn't fall into any of # # the 1% groups. # # * Here, we're looking at participation in NFL contests, NBA # # contests, and other sports' contests, all within the 2014 NFL # # Season. # ###################################################################### For7Pt0 <- merge(Data6Play[,c("UserID","DidNFL","DidNBA","DidOth")], For5Pt0[,c("UserID","T01EF","T01NL","T01nC","NotT1")], all.x=TRUE,all.y=TRUE,by=c("UserID")); For7Pt0$Group = 1 + 1*(For7Pt0$DidNBA) + 2*(For7Pt0$DidOth); For7Pt0 <- For7Pt0[,c("Group","T01EF","T01NL","T01nC","NotT1", "UserID","DidNFL","DidNBA","DidOth")]; For7Pt1 <- subset(For7Pt0,For7Pt0$T01EF==1); For7Pt2 <- subset(For7Pt0,For7Pt0$T01NL==1); For7Pt3 <- subset(For7Pt0,For7Pt0$T01nC==1); For7Pt4 <- subset(For7Pt0,For7Pt0$NotT1==1); ############################################################ # Construct Table 7. # ############################################################ EpiTab7 <- data.frame(Sport=c("NFL only","NFL & NBA only", "NFL & Other only","NFL & NBA & Other", "Total"), T01EFCnt=rep(0,5),T01EFPct=rep(0,5), T01NLCnt=rep(0,5),T01NLPct=rep(0,5), T01nCCnt=rep(0,5),T01nCPct=rep(0,5), NotT1Cnt=rep(0,5),NotT1Pct=rep(0,5), stringsAsFactors=FALSE); EpiTab7$T01EFCnt[1:4] = xtabs(rep(1,nTop01)~Group,data=For7Pt1); EpiTab7$T01EFCnt[5] = nTop01; EpiTab7$T01EFPct[1:5] = 100 * EpiTab7$T01EFCnt / nTop01; EpiTab7$T01NLCnt[1:4] = xtabs(rep(1,nTop01)~Group,data=For7Pt2); EpiTab7$T01NLCnt[5] = nTop01; EpiTab7$T01NLPct[1:5] = 100 * EpiTab7$T01NLCnt / nTop01; EpiTab7$T01nCCnt[1:4] = xtabs(rep(1,nTop01)~Group,data=For7Pt3); EpiTab7$T01nCCnt[5] = nTop01; EpiTab7$T01nCPct[1:5] = 100 * EpiTab7$T01nCCnt / nTop01; EpiTab7$NotT1Cnt[1:4] = xtabs(rep(1,nNotT1)~Group,data=For7Pt4); EpiTab7$NotT1Cnt[5] = nNotT1; EpiTab7$NotT1Pct[1:5] = 100 * EpiTab7$NotT1Cnt / nNotT1; Scrap1 <- EpiTab7; Scrap1[,2:9] = round(Scrap1[,2:9],digits=1); Scrap1 # T01EF T01NL T01nC NotT1 # Sport Cnt Pct Cnt Pct Cnt Pct Cnt Pct # NFL only 9 8.7 14 13.5 6 5.8 5108 50.2 # NFL & NBA only 12 11.5 16 15.4 3 2.9 801 7.9 # NFL & Other only 9 8.7 14 13.5 7 6.7 2168 21.3 # NFL & NBA & Other 74 71.2 60 57.7 88 84.6 2090 20.6 # Total 104 100.0 104 100.0 104 100.0 10167 100.0 ############################################################ # Perform the Fisher's exact tests to see where the # # percentages are significantly different. # # * For the "NFL & Other only" category, the proportions # # of the T01EF group and the NotT1 group are barely # # significantly different. # # * We used an alpha of 0.001. # ############################################################ Scrap1 <- data.frame(Sport=c("NFL only","NFL & NBA only", "NFL & Other only","NFL & NBA & Other"), pEF=0,pNL=0,pnC=0, SigEF="Accept",SigNL="Accept",SignC="Accept", stringsAsFactors=FALSE); i1 <- 4; while (i1 > 0) { Scrap2 <- matrix(c(EpiTab7$T01EFCnt[i1], nTop01-EpiTab7$T01EFCnt[i1], EpiTab7$NotT1Cnt[i1], nNotT1-EpiTab7$NotT1Cnt[i1]),2,2); Scrap3 <- fisher.test(Scrap2); Scrap1$pEF[i1] = Scrap3$p.value; Scrap2 <- matrix(c(EpiTab7$T01NLCnt[i1], nTop01-EpiTab7$T01NLCnt[i1], EpiTab7$NotT1Cnt[i1], nNotT1-EpiTab7$NotT1Cnt[i1]),2,2); Scrap3 <- fisher.test(Scrap2); Scrap1$pNL[i1] = Scrap3$p.value; Scrap2 <- matrix(c(EpiTab7$T01nCCnt[i1], nTop01-EpiTab7$T01nCCnt[i1], EpiTab7$NotT1Cnt[i1], nNotT1-EpiTab7$NotT1Cnt[i1]),2,2); Scrap3 <- fisher.test(Scrap2); Scrap1$pnC[i1] = Scrap3$p.value; i1 = i1 - 1; } Scrap1$SigEF[Scrap1$pEF<0.001] = "Reject"; Scrap1$SigNL[Scrap1$pNL<0.001] = "Reject"; Scrap1$SignC[Scrap1$pnC<0.001] = "Reject"; Scrap4 <- Scrap1; Scrap4[,2:4] = round(Scrap4[,2:4],digits=3); Scrap4 # Sport pEF pNL pnC SigEF SigNL SignC # NFL only 0.000 0.000 0.000 Reject Reject Reject # NFL & NBA only 0.197 0.010 0.064 Accept Accept Accept # NFL & Other only 0.001 0.054 0.000 Reject Accept Reject # NFL & NBA & Other 0.000 0.000 0.000 Reject Reject Reject ###################################################################### # Table 8 looks at the same variables as Table 4, only for each of # # the 1% groups and for those majority that didn't fall into any of # # the 1% groups. # # * Here, we're looking at participation in the different categories # # of contests, all within the 2014 NFL Season. # ###################################################################### Scrap1 <- Data6Play[,c(1,5:10)]; Scrap1$CntT <- apply(Scrap1[,2:7],FUN=sum,1); Scrap1$CntM = apply(Scrap1[,2:6],1,max); Scrap1$Pct1 = Scrap1$Cnt1 / Scrap1$CntT; Scrap1$Pct2 = Scrap1$Cnt2 / Scrap1$CntT; Scrap1$Pct3 = Scrap1$Cnt3 / Scrap1$CntT; Scrap1$Pct4 = Scrap1$Cnt4 / Scrap1$CntT; Scrap1$Pct5 = Scrap1$Cnt5 / Scrap1$CntT; Scrap1$Pct6 = Scrap1$Cnt6 / Scrap1$CntT; Scrap1$Pref = 7; Scrap2 = 1.1 * (Scrap1$Cnt1==Scrap1$CntM); Scrap2 = Scrap2 + 2.1 * (Scrap1$Cnt2==Scrap1$CntM); Scrap2 = Scrap2 + 3.1 * (Scrap1$Cnt3==Scrap1$CntM); Scrap2 = Scrap2 + 4.1 * (Scrap1$Cnt4==Scrap1$CntM); Scrap2 = Scrap2 + 5.1 * (Scrap1$Cnt5==Scrap1$CntM); Scrap2 = Scrap2 + 6.1 * (Scrap1$Cnt6==Scrap1$CntM); Scrap1$Pref = Scrap1$Pref - 6*(Scrap2==1.1); Scrap1$Pref = Scrap1$Pref - 5*(Scrap2==2.1); Scrap1$Pref = Scrap1$Pref - 4*(Scrap2==3.1); Scrap1$Pref = Scrap1$Pref - 3*(Scrap2==4.1); Scrap1$Pref = Scrap1$Pref - 2*(Scrap2==5.1); Scrap1$Pref = Scrap1$Pref - 1*(Scrap2==6.1); For8Pt0 <- merge(Scrap1[,c("Cnt1","Cnt2","Cnt3","Cnt4","Cnt5","Cnt6", "Pct1","Pct2","Pct3","Pct4","Pct5","Pct6", "UserID")], For5Pt0[,c("UserID","T01EF","T01NL","T01nC","NotT1")], all.x=TRUE,all.y=TRUE,by=c("UserID")); For8Pt0 <- For8Pt0[,c("Cnt1","Cnt2","Cnt3","Cnt4","Cnt5","Cnt6", "Pct1","Pct2","Pct3","Pct4","Pct5","Pct6", "T01EF","T01NL","T01nC","NotT1","UserID")]; For8Pt1 <- subset(For8Pt0,For8Pt0$T01EF==1); For8Pt2 <- subset(For8Pt0,For8Pt0$T01NL==1); For8Pt3 <- subset(For8Pt0,For8Pt0$T01nC==1); For8Pt4 <- subset(For8Pt0,For8Pt0$NotT1==1); ############################################################ # Construct Table 8. # # * THERE ARE DISCREPANCIES WITH THE MANUSCRIPT. # # * In this run, we found one fewer person from the "1% by # # number of contests" group who played in at least one # # 50/50 contest. # # Manuscript This Run # # AtLeastOne AtLeastHalf AtLeastOne AtLeastHalf # # Count Pct Count Pct Count Pct Count Pct # # 50/50 101 97.1 15 14.4 102 98.1 15 14.4 # # * Many of the counts for the "Remainder of the player # # pool" group do not match the counts in the manuscript, # # but they are not different enough for the listed # # percentages to change. # # Manuscript This Run # # AtLeastOne AtLeastHalf AtLeastOne AtLeastHalf # # Count Pct Count Pct Count Pct Count Pct # # 50/50 5848 57.5 702 6.9 5849 57.5 699 6.9 # # Heads Up 3528 34.7 386 3.8 3528 34.7 385 3.8 # # Mults 4948 48.7 234 2.3 4948 48.7 234 2.3 # # League 5191 51.1 295 2.9 5191 51.1 295 2.9 # # Move up 5659 55.7 153 1.5 5659 55.7 151 1.5 # # Others 9744 95.8 6639 65.3 9744 95.8 6644 65.3 # ############################################################ EpiTab8 <- data.frame(Class=c("50/50 (except Head-to-Head)", "Head-to-Head","Multiplier", "League (except 50/50)", "Move your way up","Other contests"), ALOCT01EF=0,ALOPT01EF=0,ALHCT01EF=0,ALHPT01EF=0, ALOCT01NL=0,ALOPT01NL=0,ALHCT01NL=0,ALHPT01NL=0, ALOCT01nC=0,ALOPT01nC=0,ALHCT01nC=0,ALHPT01nC=0, ALOCNotT1=0,ALOPNotT1=0,ALHCNotT1=0,ALHPNotT1=0, stringsAsFactors=FALSE); # Counts and percentages for the top 1% by Total Entry Fees EpiTab8$ALOCT01EF[1] = sum(For8Pt1$Cnt1>0); EpiTab8$ALOCT01EF[2] = sum(For8Pt1$Cnt2>0); EpiTab8$ALOCT01EF[3] = sum(For8Pt1$Cnt3>0); EpiTab8$ALOCT01EF[4] = sum(For8Pt1$Cnt4>0); EpiTab8$ALOCT01EF[5] = sum(For8Pt1$Cnt5>0); EpiTab8$ALOCT01EF[6] = sum(For8Pt1$Cnt6>0); EpiTab8$ALOPT01EF = 100 * EpiTab8$ALOCT01EF / nTop01; EpiTab8$ALHCT01EF[1] = sum(For8Pt1$Pct1>=0.5); EpiTab8$ALHCT01EF[2] = sum(For8Pt1$Pct2>=0.5); EpiTab8$ALHCT01EF[3] = sum(For8Pt1$Pct3>=0.5); EpiTab8$ALHCT01EF[4] = sum(For8Pt1$Pct4>=0.5); EpiTab8$ALHCT01EF[5] = sum(For8Pt1$Pct5>=0.5); EpiTab8$ALHCT01EF[6] = sum(For8Pt1$Pct6>=0.5); EpiTab8$ALHPT01EF = 100 * EpiTab8$ALHCT01EF / nTop01; # Counts and percentages for the top 1% by Net Loss EpiTab8$ALOCT01NL[1] = sum(For8Pt2$Cnt1>0); EpiTab8$ALOCT01NL[2] = sum(For8Pt2$Cnt2>0); EpiTab8$ALOCT01NL[3] = sum(For8Pt2$Cnt3>0); EpiTab8$ALOCT01NL[4] = sum(For8Pt2$Cnt4>0); EpiTab8$ALOCT01NL[5] = sum(For8Pt2$Cnt5>0); EpiTab8$ALOCT01NL[6] = sum(For8Pt2$Cnt6>0); EpiTab8$ALOPT01NL = 100 * EpiTab8$ALOCT01NL / nTop01; EpiTab8$ALHCT01NL[1] = sum(For8Pt2$Pct1>=0.5); EpiTab8$ALHCT01NL[2] = sum(For8Pt2$Pct2>=0.5); EpiTab8$ALHCT01NL[3] = sum(For8Pt2$Pct3>=0.5); EpiTab8$ALHCT01NL[4] = sum(For8Pt2$Pct4>=0.5); EpiTab8$ALHCT01NL[5] = sum(For8Pt2$Pct5>=0.5); EpiTab8$ALHCT01NL[6] = sum(For8Pt2$Pct6>=0.5); EpiTab8$ALHPT01NL = 100 * EpiTab8$ALHCT01NL / nTop01; # Counts and percentages for the top 1% by Number of Contests EpiTab8$ALOCT01nC[1] = sum(For8Pt3$Cnt1>0); EpiTab8$ALOCT01nC[2] = sum(For8Pt3$Cnt2>0); EpiTab8$ALOCT01nC[3] = sum(For8Pt3$Cnt3>0); EpiTab8$ALOCT01nC[4] = sum(For8Pt3$Cnt4>0); EpiTab8$ALOCT01nC[5] = sum(For8Pt3$Cnt5>0); EpiTab8$ALOCT01nC[6] = sum(For8Pt3$Cnt6>0); EpiTab8$ALOPT01nC = 100 * EpiTab8$ALOCT01nC / nTop01; EpiTab8$ALHCT01nC[1] = sum(For8Pt3$Pct1>=0.5); EpiTab8$ALHCT01nC[2] = sum(For8Pt3$Pct2>=0.5); EpiTab8$ALHCT01nC[3] = sum(For8Pt3$Pct3>=0.5); EpiTab8$ALHCT01nC[4] = sum(For8Pt3$Pct4>=0.5); EpiTab8$ALHCT01nC[5] = sum(For8Pt3$Pct5>=0.5); EpiTab8$ALHCT01nC[6] = sum(For8Pt3$Pct6>=0.5); EpiTab8$ALHPT01nC = 100 * EpiTab8$ALHCT01nC / nTop01; # Counts and percentages for those not in the top 1% groups EpiTab8$ALOCNotT1[1] = sum(For8Pt4$Cnt1>0); EpiTab8$ALOCNotT1[2] = sum(For8Pt4$Cnt2>0); EpiTab8$ALOCNotT1[3] = sum(For8Pt4$Cnt3>0); EpiTab8$ALOCNotT1[4] = sum(For8Pt4$Cnt4>0); EpiTab8$ALOCNotT1[5] = sum(For8Pt4$Cnt5>0); EpiTab8$ALOCNotT1[6] = sum(For8Pt4$Cnt6>0); EpiTab8$ALOPNotT1 = 100 * EpiTab8$ALOCNotT1 / nNotT1; EpiTab8$ALHCNotT1[1] = sum(For8Pt4$Pct1>=0.5); EpiTab8$ALHCNotT1[2] = sum(For8Pt4$Pct2>=0.5); EpiTab8$ALHCNotT1[3] = sum(For8Pt4$Pct3>=0.5); EpiTab8$ALHCNotT1[4] = sum(For8Pt4$Pct4>=0.5); EpiTab8$ALHCNotT1[5] = sum(For8Pt4$Pct5>=0.5); EpiTab8$ALHCNotT1[6] = sum(For8Pt4$Pct6>=0.5); EpiTab8$ALHPNotT1 = 100 * EpiTab8$ALHCNotT1 / nNotT1; Scrap2 <- EpiTab8; Scrap2[,c(2:17)] = round(Scrap2[,c(2:17)],digits=1); Scrap2[,c(1,2:5)] # T01EF # Class ALOC ALOP ALHC ALHP # 50/50 (except Head-to-Head) 92 88.5 10 9.6 # Head-to-Head 69 66.3 8 7.7 # Multiplier 76 73.1 0 0.0 # League (except 50/50) 79 76.0 0 0.0 # Move your way up 99 95.2 0 0.0 # Other contests 104 100.0 56 53.8 Scrap2[,c(1,6:9)] # T01NL # Class ALOC ALOP ALHC ALHP # 50/50 (except Head-to-Head) 79 76.0 10 9.6 # Head-to-Head 64 61.5 3 2.9 # Multiplier 68 65.4 0 0.0 # League (except 50/50) 75 72.1 0 0.0 # Move your way up 94 90.4 2 1.9 # Other contests 104 100.0 64 61.5 Scrap2[,c(1,10:13)] # T01nC # Class ALOC ALOP ALHC ALHP # 50/50 (except Head-to-Head) 101 97.1 15 14.4 # Head-to-Head 86 82.7 9 8.7 # Multiplier 97 93.3 0 0.0 # League (except 50/50) 100 96.2 1 1.0 # Move your way up 99 95.2 1 1.0 # Other contests 104 100.0 35 33.7 Scrap2[,c(1,14:17)] # NotT1 # Class ALOC ALOP ALHC ALHP # 50/50 (except Head-to-Head) 5849 57.5 699 6.9 # Head-to-Head 3528 34.7 385 3.8 # Multiplier 4948 48.7 234 2.3 # League (except 50/50) 5191 51.1 295 2.9 # Move your way up 5659 55.7 151 1.5 # Other contests 9744 95.8 6644 65.3 ############################################################ # Perform the Fisher's exact tests. # ############################################################ FisherTab8 <- data.frame(Sport=EpiTab8$Class[1:6], pValueALOEF=0,pValueALHEF=0, pValueALONL=0,pValueALHNL=0, pValueALOnC=0,pValueALHnC=0, SigALOEF="Accept",SigALHEF="Accept", SigALONL="Accept",SigALHNL="Accept", SigALOnC="Accept",SigALHnC="Accept", stringsAsFactors=FALSE); i1 <- 6; while (i1 > 0) { Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1]>0)+For8Pt0$T01EF, subset=(For7Pt0$T01EF+For7Pt0$NotT1==1)); FisherTab8$pValueALOEF[i1] = fisher.test(Scrap3)$p.value; Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1]>0)+For8Pt0$T01NL, subset=(For7Pt0$T01NL+For7Pt0$NotT1==1)); FisherTab8$pValueALONL[i1] = fisher.test(Scrap3)$p.value; Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1]>0)+For8Pt0$T01nC, subset=(For7Pt0$T01nC+For7Pt0$NotT1==1)); FisherTab8$pValueALOnC[i1] = fisher.test(Scrap3)$p.value; Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1+6]>=0.5)+For8Pt0$T01EF, subset=(For7Pt0$T01EF+For7Pt0$NotT1==1)) FisherTab8$pValueALHEF[i1] = fisher.test(Scrap3)$p.value; Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1+6]>=0.5)+For8Pt0$T01NL, subset=(For7Pt0$T01NL+For7Pt0$NotT1==1)); FisherTab8$pValueALHNL[i1] = fisher.test(Scrap3)$p.value; Scrap3 <- xtabs(rep(1,nD2C)~(For8Pt0[i1+6]>=0.5)+For8Pt0$T01nC, subset=(For7Pt0$T01nC+For7Pt0$NotT1==1)); FisherTab8$pValueALHnC[i1] = fisher.test(Scrap3)$p.value; i1 = i1 - 1; } FisherTab8$SigALOEF[FisherTab8$pValueALOEF<0.001] = "Reject"; FisherTab8$SigALHEF[FisherTab8$pValueALHEF<0.001] = "Reject"; FisherTab8$SigALONL[FisherTab8$pValueALONL<0.001] = "Reject"; FisherTab8$SigALHNL[FisherTab8$pValueALHNL<0.001] = "Reject"; FisherTab8$SigALOnC[FisherTab8$pValueALOnC<0.001] = "Reject"; FisherTab8$SigALHnC[FisherTab8$pValueALHnC<0.001] = "Reject"; Scrap4 <- FisherTab8; Scrap4[,2:7] = round(Scrap4[,2:7],digits=3); Scrap4[,c(1,2:7)] # pValue # Sport ALOEF ALHEF ALONL ALHNL ALOnC ALHnC # 50/50 (except Head-to-Head) 0.000 0.245 0.000 0.245 0.000 0.006 # Head-to-Head 0.000 0.063 0.000 1.000 0.000 0.018 # Multiplier 0.000 0.179 0.001 0.179 0.000 0.179 # League (except 50/50) 0.000 0.077 0.000 0.077 0.000 0.375 # Move your way up 0.000 0.410 0.000 0.669 0.000 1.000 # Other contests 0.023 0.017 0.023 0.410 0.023 0.000 Scrap4[,c(1,8:13)] # Result # Sport ALOEF ALHEF ALONL ALHNL ALOnC ALHnC # 50/50 (except H2H) Reject Accept Reject Accept Reject Accept # Head-to-Head Reject Accept Reject Accept Reject Accept # Multiplier Reject Accept Reject Accept Reject Accept # League (except 50/50) Reject Accept Reject Accept Reject Accept # Move your way up Reject Accept Reject Accept Reject Accept # Other contests Accept Accept Accept Accept Accept Reject