
    ݽTf&                         d Z ddlZddlmc mZ ddlZddlm	Z	 ddl
mZmZmZmZmZ ddlmZmZ d Zd Zd Zd	 Zd
 Zd Zd ZdS )z5
Created on Fri May 10 10:15:23 2024

@author: atdou
    N)Regression_Outlier_Removal)Linear_RegressionDecision_Tree_RegressionGradient_Boost_RegressionQuadratic_RegressionPolynomial_Regression)create_regress_dataCM_ROR_predictionsc            	         t                      } d }d\  }}|                     dd||d           |                     |ddd	           |                     d
d           | j        }| j        }t          g dd          }d|i}t          |d
ddddd          }|                    ||           d|j	        i}	t          | j        |	          d         }
t          j        |dgd||z
  gg          }|
|k    }|j        } |            }|st          j        d|fd|
|f          dt#          j                    v st          j        |
          rt          j        |
          nddt#          j                    v st          j        |          rt          j        |          nddz  }d|t          j        |          t          j        |          dz  }t+          t          j        |                    dx}x}}dS )z
    Description
    -----------
    testing that fit method correctly classifies all points using the final iteration (same as average in this case) 
    of a polynomial regressor.  First have to create data using create_data class.
    c                 l    d| d         dz  z  d| d         z  z   d| d         dz  z  z   d| d         z  z
  S 	N
   r                      xs    dC:\Users\atdou\OneDrive\Desktop\Files\Coding\Python\Programs\XtraMLTools\tests\test_Preprocessing.py<lambda>z"test_ROR_fit_ave.<locals>.<lambda>   =    "adQY,1Q4'!AaD!G)3a!f<     i     r   r   x_minx_maxNn_outdim      funcdevmin_ymax_yIQRmetricfactorx_0x_1x_2poly_columnsdegreePRGz?r         ?r.   r/   n_maxpercentwindow	threshold	train_aver   ==z%(py0)s == %(py2)sCM	CM_actualpy0py2>assert %(py8)s
{%(py8)s = %(py6)s
{%(py6)s = %(py4)s.all
}()
}py4py6py8N)r	   set_Xset_yclassify_outliersXyr   r   fittrain_data_aver
   datanumpyarrayall
@pytest_ar_call_reprcompare@py_builtinslocals_should_repr_global_name	_safereprAssertionError_format_explanation)data_expfr"   r#   rP   rQ   r7   
model_dictROR	data_dictrC   rD   @py_assert1@py_assert5@py_assert7@py_format3@py_format9s                    r   test_ROR_fit_averj      sD    #$$H<<AHAuNN1AN>>>NNq"N555eA666
A
A	,A,A,A!	L	L	LBJ
$Zar[_hiux
y
y
yCGGAaLLLc01I	HM9	5	5k	BBeQZAeG566I""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""r   c            	      $   t                      } d\  }}d }|                     dd||d           |                     |ddd	           |                     d
d           | j        }| j        }t          g dd          }d|i}t          |d
ddddd          }|                    ||           |	                    ||           d|j
        i}	t          | j        |	          d         }
t          j        |dgd||z
  gg          }|
|k    }|j        } |            }|st!          j        d|fd|
|f          dt%          j                    v st!          j        |
          rt!          j        |
          nddt%          j                    v st!          j        |          rt!          j        |          nddz  }d|t!          j        |          t!          j        |          dz  }t-          t!          j        |                    dx}x}}dS )z
    Description
    -----------
    testing that the transform method correctly classifies all points it just fit, using a polynomial regressor.
    First have to create data using create_data class.
    r   c                 l    d| d         dz  z  d| d         z  z   d| d         dz  z  z   d| d         z  z
  S r   r   r   s    r   r   z(test_ROR_transform_ave.<locals>.<lambda>1   r   r   r   r   r   r   r%   r&   r'   r,   r-   r0   r4   r7   r8   r   r9   r:   test_aver   r@   rB   rC   rD   rE   rH   rI   N)r	   rM   rN   rO   rP   rQ   r   r   rR   	transformtest_data_aver
   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   )r`   r"   r#   ra   rP   rQ   r7   rb   rc   rd   rC   rD   re   rf   rg   rh   ri   s                    r   test_ROR_transform_averp   (   sX    #$$HHAu<<ANN1AN>>>NNq"N555eA666
A
A	,A,A,A!	L	L	LBJ
$Zar[_hiux
y
y
yCGGAaLLLMM!AS./I	HM9	5	5j	ABeQZAeG566I""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""r   c            	         t                      } d }d\  }}|                     dd||d           |                     |ddd	           |                     d
d           | j        }| j        }t                      }t          dg          }t          g dd          }t          d          }	t          d          }
t          d          }t          d          }t          dd          }||||	|
|||d}t          |d
ddddd          }|                    ||           |                    ||           |j        D ]4}|j        |         d         d         |j        |         d         d         k                                    |z  }|j        }||k    }|st'          j        d|fd ||f          d!t+          j                    v st'          j        |          rt'          j        |          nd!d"t+          j                    v st'          j        |          rt'          j        |          nd"t'          j        |          d#z  }d$d%|iz  }t3          t'          j        |                    d&x}}6d&S )'a  
    Description
    -----------
    testing that when do ROR.fit(X,y) and get final prediction of outliers, this agrees with the ROR.transform(X,y), to within
    the percent_necessary threshold - because can't guarantee they'll be the same after every fit.  Checking this is true for every 
    model in self.models.  If it isn't, then doesn't mean anything is necessarily wrong with program, but wouldn't be good for 
    results.    First have to create data using create_data class.
    c                 l    d| d         dz  z  d| d         z  z   d| d         dz  z  z   d| d         z  z
  S r   r   r   s    r   r   z2test_ROR_fit_and_transform_dicts.<locals>.<lambda>L   r   r   r   r   r   r   r   r%   r&   r'   r,   r-   r1   quad_columnsr0   r4   r   )min_samples_leafr   r   2   {Gz?ru   	ccp_alpha)LRQRr7   DTR_0DTR_1DTR_2DTR_3GBRr8   r   r9   r:   Outlying_Predictionr   )>)z8%(py0)s > %(py4)s
{%(py4)s = %(py2)s.percent_necessary
}percent_matchrc   )rF   rG   rJ   zassert %(py6)srK   N)r	   rM   rN   rO   rP   rQ   r   r   r   r   r   r   rR   rn   models
train_data	test_datasumpercent_necessaryrX   rY   rZ   r[   r\   r]   r^   r_   )r`   ra   r"   r#   rP   rQ   rz   r{   r7   r|   r}   r~   r   r   rb   rc   keyr   @py_assert3re   @py_format5@py_format7s                         r    test_ROR_fit_and_transform_dictsr   B   s    #$$H<<AHAuNN1AN>>>NNq"N555eA666
A
A			B	E7	3	3	3B	,A,A,A!	L	L	LB$a888E$a888E$b999E$b999E
#Q$
G
G
GC"BY^inwz{{J
$Zar[_hiux
y
y
yCGGAaLLLMM!Az 5 5,R01FG3=Y\K]^_K`avKww||~~  @A  A444444444444444444444444444444444444444444444444444444444444444444444444444444444444444445 5r   c            	      Z   t                      } d }|                     ddddd           |                     |ddd	
           |                     dd           | j        }| j        }t                      }t          dg          }t          g dd          }t          dd          }t          dd          }t          dd          }	t          dd          }
t          dd          }||||||	|
|d}t          |dddddd          }|                    ||dd           dS )z
    Just testing that cross_val_hist executes properly, outputting histograms for each of the folds and models, can't say if they're correct.
    c                 l    d| d         dz  z  d| d         z  z   d| d         dz  z  z   d| d         z  z
  S r   r   r   s    r   r   z)test_ROR_cross_val_hist.<locals>.<lambda>i   r   r   r   r        r   r   (   r'   r,   r-   r1   rs   r0   r4   -C6?rx   rw   皙?r   rz   r{   r7   r}   r~   r   DTR_4r   r%   r8   r   r9   r:   r   )n_foldsn_binsN)r	   rM   rN   rO   rP   rQ   r   r   r   r   r   r   cross_val_histr`   ra   rP   rQ   rz   r{   r7   r}   r~   r   r   r   Reg_Dictrc   s                 r   test_ROR_cross_val_histr   d   sb    #$$H<<ANN1CQN???NNq2N666eA666
A
A			B	E7	3	3	3B	,A,A,A!	L	L	LB$a6JJJE$a4HHHE$a3GGGE$a1EEEE
#Q$
G
G
GC"eeV[fksvwwH
$XeARY]fgsw
x
x
xCq!Qr22222r   c            	      T   t                      } d }|                     ddddd           |                     |ddd	
           |                     dd           | j        }| j        }t                      }t          dg          }t          g dd          }t          dd          }t          dd          }t          dd          }	t          dd          }
t          dd          }||||||	|
|d}t          |dddddd          }|                    ||           dS )z
    Just testing that cross_val_scores executes properly, outputting dataframe of training and testing scores.  Can't say if they're correct.
    c                 l    d| d         dz  z  d| d         z  z   d| d         dz  z  z   d| d         z  z
  S r   r   r   s    r   r   z+test_ROR_cross_val_scores.<locals>.<lambda>   r   r   r   r   r   r   r   r   r   r   r'   r,   r-   r1   rs   r0   r4   r   rx   rw   r   r   r   r%   r8   r   r9   r:   N)r	   rM   rN   rO   rP   rQ   r   r   r   r   r   r   cross_val_scoresr   s                 r   test_ROR_cross_val_scoresr   |   s[    #$$H<<ANN1CQN???NNq2N666eA666
A
A			B	E7	3	3	3B	,A,A,A!	L	L	LB$a6JJJE$a4HHHE$a3GGGE$a1EEEE
#Q$
G
G
GC"eeV[fksvwwH
$XeARY]fgsw
x
x
xCAr   c            	         t                      } d }|                     ddddd           |                     |ddd	
           |                     dd           | j        }| j        }t                      }t          dg          }t          g dd          }t          dd          }t          dd          }t          dd          }	t          dd          }
t          dd          }||||||	|
|d}t          |dddddd          }|                    ||           |j        d         d         |j        d         d         d}|                    |d !           d"S )#zP
    just testing that the res_hist method works.  Can't say if it's right.
    c                 l    d| d         dz  z  d| d         z  z   d| d         dz  z  z   d| d         z  z
  S r   r   r   s    r   r   z#test_ROR_res_hist.<locals>.<lambda>   r   r   r   r   r   r   r   r   r   r   r'   r,   r-   r1   rs   r0   r4   r   rx   rw   r   r   r   r%   r8   r   r9   r:   r   r   r7   r   r7   r   )r   N)r	   rM   rN   rO   rP   rQ   r   r   r   r   r   r   rR   r   res_histr`   ra   rP   rQ   rz   r{   r7   r}   r~   r   r   r   r   rc   rd   s                  r   test_ROR_res_histr      s    #$$H<<ANN1CQN???NNq2N666eA666
A
A			B	E7	3	3	3B	,A,A,A!	L	L	LB$a6JJJE$a4HHHE$a3GGGE$a1EEEE
#Q$
G
G
GC"eeV[fksvwwH
$XeARY]fgsw
x
x
xCGGAaLLL.1"5S^D=QRT=UVVILL2L&&&&&r   c            	         t                      } d }|                     ddddd           |                     |ddd	
           |                     dd           | j        }| j        }t                      }t          dg          }t          g dd          }t          dd          }t          dd          }t          dd          }	t          dd          }
t          dd          }||||||	|
|d}t          |dddddd          }|                    ||           |j        d         d         |j        d         d         d}|                    |           d S )!zI
    just testing that the outlier_overlap method outputs something.
    c                 l    d| d         dz  z  d| d         z  z   d| d         dz  z  z   d| d         z  z
  S r   r   r   s    r   r   z*test_ROR_outlier_overlap.<locals>.<lambda>   r   r   r   r   r   r   r   r   r   r   r'   r,   r-   r1   rs   r0   r4   r   rx   rw   r   r   r   r%   r8   r   r9   r:   r   r   r7   r   N)r	   rM   rN   rO   rP   rQ   r   r   r   r   r   r   rR   r   outlier_overlapr   s                  r   test_ROR_outlier_overlapr      s    #$$H<<ANN1CQN???NNq2N666eA666
A
A			B	E7	3	3	3B	,A,A,A!	L	L	LB$a6JJJE$a4HHHE$a3GGGE$a1EEEE
#Q$
G
G
GC"eeV[fksvwwH
$XeARY]fgsw
x
x
xCGGAaLLL.1"5S^D=QRT=UVVI	"""""r   )__doc__builtinsrZ   _pytest.assertion.rewrite	assertionrewriterX   rU   XtraMLTools.Preprocessingr   XtraMLTools.Regressionr   r   r   r   r   auxiliary_classesr	   r
   rj   rp   r   r   r   r   r   r   r   r   <module>r      s&                 @ @ @ @ @ @Q Q Q Q Q Q Q Q Q Q Q Q Q Q E E E E E E E E# # #2# # #45 5 5D3 3 30  0' ' '4# # # # #r   