Bootstrap
is a derivation of Monte Carlo technique introduced by Efron in
1979. It uses the
resampling
with replacement method (unlike the resampling with no
replacement method that we used in the Lotto Number Generator
example). It is a convenient tool to extract estimates (such as
standard deviation and confident interval) from a non-parametric data
set (a data set with no underling distribution is assumed) or estimates
that do not have a closed form (cannot be expressed in an
equation). Another use of Bootstrap is to populate sample data
when the original sample size is small (but notice that Bootstrap works
the best with large sample size as all other statistical methods do).
In this example, we want to obtain the standard deviation of median
from 15 GPA scores using Bootstrap. The bootstrap process is as
followed:
1. Obtain 20 GPA scores from
the original 15 using resampling with placement
method.
2. Compute the median from
these 20 GPA scores and store the median value.
3. Repeat process step one and
two 500 times.
4. Compute the mean and
standard deviation from these 500 medians.
The bootstrap shows that the mean median is 3.0792 and the standard
deviation is 0.0732. The chart in Figure 1 displays the
distribution of the bootstrapped median.
'***********************************************************************
'*
Median
Computation
*
'***********************************************************************
Function median(Arr() As Single)
Call Sort(Arr)
If UBound(Arr) Mod 2 = 1 Then
median = Arr(Int(UBound(Arr)
/ 2) + 1)
Else
median = (Arr(UBound(Arr) /
2) + Arr(Int(UBound(Arr) / 2) + 1)) / 2
End If
End Function
'***********************************************************************
'*
Mean
*
'***********************************************************************
Function Mean(Arr() As Single)
Dim Sum As Single
Dim i As Integer
Sum = 0
For i = 1 To
UBound(Arr)
Sum = Sum + Arr(i)
Next i
Mean = Sum /
UBound(Arr)
End Function
'************************************************************************
'*
Standard
Deviation
*
'************************************************************************
Function StdDev(Arr() As Single)
Dim i As Integer
Dim avg As
Single, SumSq As Single
avg = Mean(Arr)
For i = 1 To
UBound(Arr)
SumSq = SumSq + (Arr(i) - avg) ^ 2
Next i
StdDev =
Sqr(SumSq / (UBound(Arr) - 1))
End Function
'*************************************************************************
'*
Computation
*
'*************************************************************************
Sub compute()
Dim Arr(15) As
Single
Dim Average As
Single
Dim Std_Dev As
Single
For i = 1 To 15
Arr(i) = Sheets("Sheet1").Cells(i + 4, 2)
Next i
Average =
Mean(Arr)
Std_Dev =
StdDev(Arr)
Sheets("Sheet1").Cells(12, 1) = Average
Sheets("Sheet1").Cells(13, 1) = Std_Dev
End Sub
'***********************************************************************
'*
Bootstrap Resampling
Process
*
'***********************************************************************
Sub Resample()
Const n =
15
'Original sample size
Dim i As Integer
Dim intBootstrapN As Integer 'Bootstrap sample size
Dim intIteration As Long
'Number
of bootstrap iteration
intBootstrapN = Range("G6").Value
intIteration = Range("G7").Value
ReDim Arr(n) As
Single
'Array for original values
ReDim Hold(intBootstrapN) As
Single 'Bootstrapped array
ReDim Hold2(intIteration) As
Single 'Array for
bootstrapped medians
Randomize
'Read GPA
values into array
For i = 1 To n
Arr(i) = Sheet1.Cells(i + 4,
2)
Next i
For j = 1 To intIteration
'Read bootstrapped GPA values into array
For i = 1 To intBootstrapN
Hold(i) = Arr(Int(Rnd *
n) + 1)
Next i
'Store
computed medians into array
Hold2(j) = median(Hold)
Cells(3, 7) =
j
Next j
Average = Mean(Hold2)
Std_Dev = StdDev(Hold2)
Cells(9, 7) = Average
Cells(10, 7) = Std_Dev
Call Sort(Hold2)
Call Hist(intIteration, 40, Hold2(1),
Hold2(intIteration), Hold2)
End Sub
'***********************************************************************************
'*
Single Sorting
Process
*
'***********************************************************************************
Sub Sort(Arr() As Single)
Dim Temp As Single
Dim i As Long
Dim j As Long
For j = 2 To UBound(Arr)
Temp = Arr(j)
For i = j - 1 To 1 Step -1
If
(Arr(i) <= Temp) Then GoTo 10
Arr(i + 1) = Arr(i)
Next i
i = 0
10 Arr(i + 1) = Temp
Next j
End Sub
'**********************************************************************************
'*
Construct Historgram
Distribution
*
'**********************************************************************************
Sub Hist(n As Long, M As Long, Start As Single, Right As Single, Arr()
As Single)
Dim i As Long, j As Long, Find As Long
Dim Length As Single
ReDim breaks(M) As Single
ReDim freq(M) As Single
For i = 1 To M
freq(i) = 0
Next i
Length = (Right - Start) / M
For i = 1 To M
breaks(i) = Start + Length *
i
Next i
For i = 1 To n
If (Arr(i) <= breaks(1))
Then freq(1) = freq(1) + 1
If (Arr(i) >= breaks(M -
1)) Then freq(M) = freq(M) + 1
For j = 2 To M - 1
If
(Arr(i) > breaks(j - 1) And Arr(i) <= breaks(j)) Then freq(j) =
freq(j) + 1
Next j
Next i
For i = 1 To M
Cells(i + 3, 9) = breaks(i)
Cells(i + 3, 10) = freq(i)
Next i
End Sub