
    Ug0              	       X   d Z ddlZddlmZmZ ddlZddlZddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZmZ ddlmZmZ  e
            Z e	            Z d Z!ej"        #                    deez             d             Z$d Z%d Z&d Z'd Z(d Z)ej"        #                    dddg          d             Z*d Z+d Z,d Z-d Z.d Z/ ed ed,i ddi           ej"        #                    d!d"d#g          d$                         Z0 ed ed,i dd%i           ej"        #                    d!d"d#g          d&                         Z1d' Z2ej"        #                    d(e          d)             Z3d* Z4ej"        #                    deez             d+             Z5dS )-zD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)Mockpatch)load_diabetes	load_irismake_classification)IsolationForest)_average_path_length)roc_auc_score)ParameterGridtrain_test_split)check_random_state)assert_allcloseassert_array_almost_equalassert_array_equalignore_warnings)CSC_CONTAINERSCSR_CONTAINERSc                 `   t          j        ddgddgg          }t          j        ddgddgg          }t          dgg dddgd          }t                      5  |D ]6}t	          dd	| i|                    |                              |           7	 d
d
d
           d
S # 1 swxY w Y   d
S )z6Check Isolation Forest for various parameter settings.r            )      ?      ?r   TF)n_estimatorsmax_samples	bootstraprandom_stateN )nparrayr   r   r   fitpredict)global_random_seedX_trainX_testgridparamss        b/var/www/surfInsights/venv3-11/lib/python3.11/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforestr)   "   s    hAA'((GX1v1v&''F]]]$PUWW D 
		   	 	FFF);FvFFJJ gfoooo	                 s   :B##B'*B'sparse_containerc                    t          |           }t          t          j        dd         |          \  }}t	          ddgddgd          } ||          } ||          }|D ]}t          dd	| d
|                    |          }	|	                    |          }
t          dd	| d
|                    |          }|                    |          }t          |
|           dS )z=Check IForest for various parameter settings on sparse input.N2   r   r   r   TF)r   r   
   )r   r   r   )	r   r   diabetesdatar   r   r!   r"   r   )r#   r*   rngr$   r%   r&   X_train_sparseX_test_sparser'   sparse_classifiersparse_resultsdense_classifierdense_resultss                r(   test_iforest_sparser8   2   s4    /
0
0C&x}SbS'9LLLOGV#s4-PPQQD%%g..N$$V,,M : :+ 
*<
 
@F
 

#n

 	 +22=AA + 
*<
 
@F
 

#g,, 	 )0088>=9999: :    c                  v   t           j        } d}t          j        t          |          5  t          d                              |            ddd           n# 1 swxY w Y   t          j                    5  t          j	        dt                     t          d                              |            ddd           n# 1 swxY w Y   t          j                    5  t          j	        dt                     t          t          j        d                                        |            ddd           n# 1 swxY w Y   t          j        t                    5  t                                          |                               | ddd	df                    ddd           dS # 1 swxY w Y   dS )
z7Test that it gives proper exception on deficient input.3max_samples will be set to n_samples for estimationmatch  r   Nerrorautor   r   )irisr0   pytestwarnsUserWarningr   r!   warningscatch_warningssimplefilterr   int64raises
ValueErrorr"   )Xwarn_msgs     r(   test_iforest_errorrN   L   s~   	A
 EH	k	2	2	2 1 1D)))--a0001 1 1 1 1 1 1 1 1 1 1 1 1 1 1		 	"	" 3 3g{333F+++//2223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 
	 	"	" 8 8g{333BHQKK00044Q7778 8 8 8 8 8 8 8 8 8 8 8 8 8 8
 
z	"	" 3 3a  ((111abb52223 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3sI   $AA!A8>CC	C AD<<E E  AF..F25F2c            
         t           j        } t                                          |           }|j        D ]K}|j        t          t          j        t          j	        | j
        d                                       k    sJ LdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)rB   r0   r   r!   estimators_	max_depthintr   ceillog2shape)rL   clfests      r(   test_recalculate_max_depthrX   b   sy    	A





"
"C B B}BGBGAGAJ,?,?$@$@ A AAAAAAB Br9   c                     t           j        } t                                          |           }|j        | j        d         k    sJ t          d          }d}t          j        t          |          5  |                    |            d d d            n# 1 swxY w Y   |j        | j        d         k    sJ t          d                              |           }|j        d| j        d         z  k    sJ d S )Nr   i  r?   r;   r<   g?)	rB   r0   r   r!   max_samples_rU   rC   rD   rE   )rL   rV   rM   s      r(   test_max_samples_attributer[   j   s'   	A





"
"Cqwqz))))
c
*
*
*CDH	k	2	2	2  


              qwqz))))
c
*
*
*
.
.q
1
1CsQWQZ///////s   3BBBc                    t          |           }t          t          j        |          \  }}t	          d|                               |          }|                    d           |                    |          }|                    d           |                    |          }t          ||           t	          d|                               |          }|                    |          }t          ||           dS )zCheck parallel regression.r-   r   )n_jobsr   r   )r]   r   N)	r   r   r/   r0   r   r!   
set_paramsr"   r   )r#   r1   r$   r%   ensembley1y2y3s           r(    test_iforest_parallel_regressionrc   y   s    
/
0
0C&x}3GGGOGVa6HIIIMMgVVHq!!!			&	!	!Bq!!!			&	!	!Bb"%%%a6HIIIMMgVVH			&	!	!Bb"%%%%%r9   c                    t          |           }d|                    dd          z  }|                    t          j        |dz   |dz
  f                    }|dd         }|                    ddd	          }t          j        |dd         |f          }t          j        d
gdz  dgdz  z             }t          d|                              |          }|	                    |           }t          ||          dk    sJ dS )z#Test Isolation Forest performs wellg333333?iX  r   Nr>   r   )   r   )lowhighsizer   rf   d   )r   r   g\(\?)r   randnpermutationr   vstackuniformr    r   r!   decision_functionr
   )	r#   r1   rL   r$   
X_outliersr%   y_testrV   y_preds	            r(   test_iforest_performancers      s    /
0
0CciiQA	1q5!a%.1122AhG !(;;JY$%%*-..FXqcCi1#)+,,F c
<
<
<
@
@
I
IC ##F+++F ((4//////r9   contamination      ?rA   c           	         ddgddgddgddgddgddgddgddgg}t          || 	          }|                    |           |                    |           }|                    |          }t	          j        |dd                    t	          j        |d d                   k    sJ t          |d
dgz  ddgz  z              d S )Nre   r   r         	   r   rt      )r   r!   ro   r"   r   minmaxr   )rt   r#   rL   rV   decision_funcpreds         r(   test_iforest_worksr      s     bB8b"X1v1v1v1vAwOA '9
W
W
WCGGAJJJ**1---M;;q>>D6-$%%}SbS/A(B(BBBBBtQ!WqB4x/00000r9   c                      t           j        } t                                          |           }|j        |j        k    sJ d S N)rB   r0   r   r!   rZ   _max_samples)rL   rV   s     r(   test_max_samples_consistencyr      s?    	A





"
"Cs///////r9   c                     t          d          } t          t          j        d d         t          j        d d         |           \  }}}}t          d          }|                    ||           |                    |           d S )Nr   r,   r-   g?)max_features)r   r   r/   r0   targetr   r!   r"   )r1   r$   r%   y_trainrq   rV   s         r(    test_iforest_subsampled_featuresr      s    
Q

C'7crcHOCRC0s( ( ($GVWf s
+
+
+CGGGWKKr9   c                     dt          j        d          t           j        z   z  dz
  } dt          j        d          t           j        z   z  dz
  }t          t	          dg          dg           t          t	          dg          dg           t          t	          d	g          d
g           t          t	          dg          | g           t          t	          dg          |g           t          t	          t          j        g d                    dd
| |g           t	          t          j        d                    }t          |t          j        |                     d S )N       @g      @g?g     0@g}?r   g        r   r   r        )r   r   r   r   )	r   logeuler_gammar   r	   r    aranger   sort)
result_one
result_twoavg_path_lengths      r(    test_iforest_average_path_lengthr      s@    sbn45GJu67:MMJ(!--u555(!--u555(!--u555(!--
|<<<(#//*>>>RXnnn5566	c:z*  
 +29Q<<88O(@(@AAAAAr9   c                  ,   ddgddgddgg} t          d                              |           }t                                          |           }t          |                    ddgg          |                    ddgg          |j        z              t          |                    ddgg          |                    ddgg          |j        z              t          |                    ddgg          |                    ddgg                     d S )Nr   r   皙?)rt   r   )r   r!   r   score_samplesro   offset_)r$   clf1clf2s      r(   test_score_samplesr      s,   1v1v1v&G---11'::D  ))DS#J<((c
|,,t|;   S#J<((c
|,,t|;   S#J<(($*<*<sCj\*J*J    r9   c                  f   t          d          } |                     dd          }t          dd| d          }|                    |           |j        d         }|                    d           |                    |           t          |j                  dk    sJ |j        d         |u sJ dS )	z/Test iterative addition of iTrees to an iForestr      r   r.   T)r   r   r   
warm_start)r   N)r   rk   r   r!   rP   r^   len)r1   rL   rV   tree_1s       r(   test_iforest_warm_startr      s     Q

C		"aA Rcd  C GGAJJJ_QFNNN###GGAJJJs2%%%%?1''''''r9   z*sklearn.ensemble._iforest.get_chunk_n_rowsreturn_valuer   )side_effectzcontamination, n_predict_calls)ru   r   )rA   r   c                 @    t          ||           | j        |k    sJ d S r   r   
call_countmocked_get_chunkrt   n_predict_callsr#   s       r(   test_iforest_chunks_works1r     /     }&8999&/999999r9   r.   c                 @    t          ||           | j        |k    sJ d S r   r   r   s       r(   test_iforest_chunks_works2r     r   r9   c                     t          j        d          } t                      }|                    |            t           j                            d          }t          |                    |           dk              sJ t          |                    |                    dd                    dk              sJ t          |                    | dz             dk              sJ t          |                    | dz
            dk              sJ t          j	        |                    dd          dd          } t                      }|                    |            t          |                    |           dk              sJ t          |                    |                    dd                    dk              sJ t          |                    t          j        d                    dk              sJ |                    dd          } t                      }|                    |            t          |                    |           dk              sJ t          |                    |                    dd                    dk              sJ t          |                    t          j        d                    dk              sJ dS )z=Test whether iforest predicts inliers when using uniform data)rj   r.   r   r   rj   r.   N)
r   onesr   r!   randomRandomStateallr"   rk   repeat)rL   iforestr1   s      r(   test_iforest_with_uniform_datar     sl    		AGKKNNN
)


"
"Cwq!!Q&'''''wsyyb1122a788888wq1u%%*+++++wq1u%%*+++++ 		#))Ar""C++AGKKNNNwq!!Q&'''''wsyyb1122a788888wrwy1122a788888 			!RAGKKNNNwq!!Q&'''''wsyyb1122a788888wrwy1122a78888888r9   csc_containerc                     t          ddd          \  }} | |          }t          ddd                              |           d	S )
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rj   r   	n_samples
n_featuresr   r.      r   )r   r   r]   N)r   r   r!   )r   rL   _s      r(   *test_iforest_with_n_jobs_does_not_segfaultr   =  sT     CaPPPDAqaAQ???CCAFFFFFr9   c                     t          j        d          } t          j                            d          }|                     |                    d          dg          }t          dd          }t          j	                    5  t          j
        dt                     |                    |           d	d	d	           d	S # 1 swxY w Y   d	S )
zCheck that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    pandasr   ry   a)r0   columnsg?r|   r@   N)rC   importorskipr   r   r   	DataFramerk   r   rF   rG   rH   rE   r!   )pdr1   rL   models       r(   #test_iforest_preserve_feature_namesr   H  s     
	X	&	&B
)


"
"C
#))A,,66A$???E		 	"	"  g{333		!                 s   0C  CCc                 b   t          ddd          \  }} | |          }|                                 d}t          d|d                              |          }|                    |          }|dk                                     |j        d         z  t          j        |          k    sJ dS )	zCheck that `IsolationForest` accepts sparse matrix input and float value for
    contamination.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27626
    r,   ry   r   r   r   r   )r   rt   r   N)	r   sort_indicesr   r!   ro   sumrU   rC   approx)r*   rL   r   rt   r   
X_decisions         r(   -test_iforest_sparse_input_float_contaminationr   Z  s     JJJDAqANNMm!  	c!ff  **1--JN!!AGAJ.&-2N2NNNNNNNr9   r   )6__doc__rF   unittest.mockr   r   numpyr   rC   sklearn.datasetsr   r   r   sklearn.ensembler   sklearn.ensemble._iforestr	   sklearn.metricsr
   sklearn.model_selectionr   r   sklearn.utilsr   sklearn.utils._testingr   r   r   r   sklearn.utils.fixesr   r   rB   r/   r)   markparametrizer8   rN   rX   r[   rc   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   r9   r(   <module>r      s     % % % % % % % %      J J J J J J J J J J , , , , , , : : : : : : ) ) ) ) ) ) C C C C C C C C , , , , , ,            ? > > > > > > > y{{=??    +^n-LMM: : NM:23 3 3,B B B0 0 0& & &(0 0 00 4.991 1 :910 0 0  B B B(  "( ( (. 0++*++   9I{;STT: : UT	 
: 0,,+,,   9I{;STT: : UT	 
:9 9 9D .99G G :9G  $ +^n-LMMO O NMO O Or9   