Bootstrap - A Non-Parametric Approach


 

     
 

Bootstrap is a derivation of Monte Carlo technique introduced by Efron in 1979.  It uses the resampling with replacement method (unlike the resampling with no replacement method that we used in the Lotto Number Generator example).  It is a convenient tool to extract estimates (such as standard deviation and confident interval) from a non-parametric data set (a data set with no underling distribution is assumed) or estimates that do not have a closed form (cannot be expressed in an equation).  Another use of Bootstrap is to populate sample data when the original sample size is small (but notice that Bootstrap works the best with large sample size as all other statistical methods do).

In this example, we want to obtain the standard deviation of median from 15 GPA scores using Bootstrap.  The bootstrap process is as followed:

1. Obtain 20 GPA scores from the original 15 using resampling with placement
    method.
2. Compute the median from these 20 GPA scores and store the median value.
3. Repeat process step one and two 500 times.
4. Compute the mean and standard deviation from these 500 medians.

The bootstrap shows that the mean median is 3.0792 and the standard deviation is 0.0732.  The chart in Figure 1 displays the distribution of the bootstrapped median.

Figure 1







'***********************************************************************
'*                                               Median Computation                              *
'***********************************************************************
Function median(Arr() As Single)
    Call Sort(Arr)
   
    If UBound(Arr) Mod 2 = 1 Then
        median = Arr(Int(UBound(Arr) / 2) + 1)
    Else
        median = (Arr(UBound(Arr) / 2) + Arr(Int(UBound(Arr) / 2) + 1)) / 2
    End If
End Function

'***********************************************************************
'*                                                          Mean                                               *
'***********************************************************************
Function Mean(Arr() As Single)
          Dim Sum As Single
          Dim i As Integer
      
          Sum = 0
          For i = 1 To UBound(Arr)
                    Sum = Sum + Arr(i)
          Next i
          Mean = Sum / UBound(Arr)
End Function

'************************************************************************
'*                                               Standard Deviation                                   *
'************************************************************************
Function StdDev(Arr() As Single)
          Dim i As Integer
          Dim avg As Single, SumSq As Single
          avg = Mean(Arr)
          For i = 1 To UBound(Arr)
                    SumSq = SumSq + (Arr(i) - avg) ^ 2
          Next i
          StdDev = Sqr(SumSq / (UBound(Arr) - 1))
End Function

'*************************************************************************
'*                                                 Computation                                              *
'*************************************************************************
Sub compute()
          Dim Arr(15) As Single
          Dim Average As Single
          Dim Std_Dev As Single
          For i = 1 To 15
                    Arr(i) = Sheets("Sheet1").Cells(i + 4, 2)
          Next i
          Average = Mean(Arr)
          Std_Dev = StdDev(Arr)
          Sheets("Sheet1").Cells(12, 1) = Average
          Sheets("Sheet1").Cells(13, 1) = Std_Dev
End Sub

'***********************************************************************
'*                               Bootstrap Resampling Process                          *
'***********************************************************************
Sub Resample()
    Const n = 15                                   'Original sample size
    Dim i As Integer
    Dim intBootstrapN As Integer     'Bootstrap sample size
    Dim intIteration As Long              'Number of bootstrap iteration
   
    intBootstrapN = Range("G6").Value
    intIteration = Range("G7").Value
   
    ReDim Arr(n) As Single                                   'Array for original values
    ReDim Hold(intBootstrapN) As Single        'Bootstrapped array
    ReDim Hold2(intIteration) As Single            'Array for bootstrapped medians
      
    Randomize
   
    'Read GPA values into array
    For i = 1 To n
        Arr(i) = Sheet1.Cells(i + 4, 2)
    Next i

    For j = 1 To intIteration  
        'Read bootstrapped GPA values into array
        For i = 1 To intBootstrapN
            Hold(i) = Arr(Int(Rnd * n) + 1)
        Next i      
        'Store computed medians into array
        Hold2(j) = median(Hold)
        Cells(3, 7) = j             
    Next j
   
    Average = Mean(Hold2)
    Std_Dev = StdDev(Hold2)
    Cells(9, 7) = Average
    Cells(10, 7) = Std_Dev
   
    Call Sort(Hold2)
    Call Hist(intIteration, 40, Hold2(1), Hold2(intIteration), Hold2)
  
End Sub

'***********************************************************************************
'*                                                       Single Sorting Process                                      *
'***********************************************************************************
Sub Sort(Arr() As Single)
    Dim Temp As Single
    Dim i As Long
    Dim j As Long
    For j = 2 To UBound(Arr)
        Temp = Arr(j)
        For i = j - 1 To 1 Step -1
            If (Arr(i) <= Temp) Then GoTo 10
            Arr(i + 1) = Arr(i)
        Next i
        i = 0
10      Arr(i + 1) = Temp
    Next j
End Sub

'**********************************************************************************
'*                                                Construct Historgram Distribution                        *
'**********************************************************************************
Sub Hist(n As Long, M As Long, Start As Single, Right As Single, Arr() As Single)
    Dim i As Long, j As Long, Find As Long
    Dim Length As Single
    ReDim breaks(M) As Single
    ReDim freq(M) As Single
   
    For i = 1 To M
        freq(i) = 0
    Next i

    Length = (Right - Start) / M
   
    For i = 1 To M
        breaks(i) = Start + Length * i
    Next i
   
    For i = 1 To n
        If (Arr(i) <= breaks(1)) Then freq(1) = freq(1) + 1
        If (Arr(i) >= breaks(M - 1)) Then freq(M) = freq(M) + 1
        For j = 2 To M - 1
            If (Arr(i) > breaks(j - 1) And Arr(i) <= breaks(j)) Then freq(j) = freq(j) + 1
        Next j
    Next i
   
    For i = 1 To M
        Cells(i + 3, 9) = breaks(i)
        Cells(i + 3, 10) = freq(i)
    Next i
End Sub

VBA Codes