@@ -37,12 +37,23 @@ def _check_callbacks(callbacks):
37
37
def _handle_nice_params (embedding : np .ndarray , optim_params : dict ) -> None :
38
38
"""Convert the user friendly params into something the optimizer can
39
39
understand."""
40
+ n_samples = embedding .shape [0 ]
40
41
# Handle callbacks
41
42
optim_params ["callbacks" ] = _check_callbacks (optim_params .get ("callbacks" ))
42
43
optim_params ["use_callbacks" ] = optim_params ["callbacks" ] is not None
43
44
44
45
# Handle negative gradient method
45
46
negative_gradient_method = optim_params .pop ("negative_gradient_method" )
47
+ # Handle `auto` negative gradient method
48
+ if isinstance (negative_gradient_method , str ) and negative_gradient_method == "auto" :
49
+ if n_samples < 10_000 :
50
+ negative_gradient_method = "bh"
51
+ else :
52
+ negative_gradient_method = "fft"
53
+ log .info (
54
+ f"Automatically determined negative gradient method `{ negative_gradient_method } `"
55
+ )
56
+
46
57
if callable (negative_gradient_method ):
47
58
negative_gradient_method = negative_gradient_method
48
59
elif negative_gradient_method in {"bh" , "BH" , "barnes-hut" }:
@@ -78,7 +89,7 @@ def _handle_nice_params(embedding: np.ndarray, optim_params: dict) -> None:
78
89
79
90
# Determine learning rate if requested
80
91
if optim_params .get ("learning_rate" , "auto" ) == "auto" :
81
- optim_params ["learning_rate" ] = max (200 , embedding . shape [ 0 ] / 12 )
92
+ optim_params ["learning_rate" ] = max (200 , n_samples / 12 )
82
93
83
94
84
95
def __check_init_num_samples (num_samples , required_num_samples ):
@@ -169,7 +180,8 @@ class PartialTSNEEmbedding(np.ndarray):
169
180
using one of the following aliases: ``bh``, ``BH`` or ``barnes-hut``.
170
181
For larger data sets, the FFT accelerated interpolation method is more
171
182
appropriate and can be set using one of the following aliases: ``fft``,
172
- ``FFT`` or ``ìnterpolation``.
183
+ ``FFT`` or ``ìnterpolation``. Alternatively, you can use ``auto`` to
184
+ approximately select the faster method.
173
185
174
186
theta: float
175
187
This is the trade-off parameter between speed and accuracy of the tree
@@ -290,6 +302,8 @@ def optimize(
290
302
``barnes-hut``. For larger data sets, the FFT accelerated
291
303
interpolation method is more appropriate and can be set using one of
292
304
the following aliases: ``fft``, ``FFT`` or ``ìnterpolation``.
305
+ Alternatively, you can use ``auto`` to approximately select the
306
+ faster method.
293
307
294
308
theta: float
295
309
This is the trade-off parameter between speed and accuracy of the
@@ -431,7 +445,8 @@ class TSNEEmbedding(np.ndarray):
431
445
using one of the following aliases: ``bh``, ``BH`` or ``barnes-hut``.
432
446
For larger data sets, the FFT accelerated interpolation method is more
433
447
appropriate and can be set using one of the following aliases: ``fft``,
434
- ``FFT`` or ``ìnterpolation``.
448
+ ``FFT`` or ``ìnterpolation``.A lternatively, you can use ``auto`` to
449
+ approximately select the faster method.
435
450
436
451
theta: float
437
452
This is the trade-off parameter between speed and accuracy of the tree
@@ -490,7 +505,7 @@ def __new__(
490
505
n_interpolation_points = 3 ,
491
506
min_num_intervals = 50 ,
492
507
ints_in_interval = 1 ,
493
- negative_gradient_method = "fft " ,
508
+ negative_gradient_method = "auto " ,
494
509
random_state = None ,
495
510
optimizer = None ,
496
511
** gradient_descent_params ,
@@ -571,6 +586,8 @@ def optimize(
571
586
``barnes-hut``. For larger data sets, the FFT accelerated
572
587
interpolation method is more appropriate and can be set using one of
573
588
the following aliases: ``fft``, ``FFT`` or ``ìnterpolation``.
589
+ Alternatively, you can use ``auto`` to approximately select the
590
+ faster method.
574
591
575
592
theta: float
576
593
This is the trade-off parameter between speed and accuracy of the
@@ -1000,7 +1017,8 @@ class TSNE(BaseEstimator):
1000
1017
This is the trade-off parameter between speed and accuracy of the tree
1001
1018
approximation method. Typical values range from 0.2 to 0.8. The value 0
1002
1019
indicates that no approximation is to be made and produces exact results
1003
- also producing longer runtime.
1020
+ also producing longer runtime. Alternatively, you can use ``auto`` to
1021
+ approximately select the faster method.
1004
1022
1005
1023
n_interpolation_points: int
1006
1024
Only used when ``negative_gradient_method="fft"`` or its other aliases.
@@ -1071,7 +1089,8 @@ class TSNE(BaseEstimator):
1071
1089
using one of the following aliases: ``bh``, ``BH`` or ``barnes-hut``.
1072
1090
For larger data sets, the FFT accelerated interpolation method is more
1073
1091
appropriate and can be set using one of the following aliases: ``fft``,
1074
- ``FFT`` or ``ìnterpolation``.
1092
+ ``FFT`` or ``ìnterpolation``. Alternatively, you can use ``auto`` to
1093
+ approximately select the faster method.
1075
1094
1076
1095
callbacks: Union[Callable, List[Callable]]
1077
1096
Callbacks, which will be run every ``callbacks_every_iters`` iterations.
@@ -1113,7 +1132,7 @@ def __init__(
1113
1132
max_step_norm = 5 ,
1114
1133
n_jobs = 1 ,
1115
1134
neighbors = "auto" ,
1116
- negative_gradient_method = "fft " ,
1135
+ negative_gradient_method = "auto " ,
1117
1136
callbacks = None ,
1118
1137
callbacks_every_iters = 50 ,
1119
1138
random_state = None ,
@@ -1154,18 +1173,6 @@ def __init__(
1154
1173
self .random_state = random_state
1155
1174
self .verbose = verbose
1156
1175
1157
- @property
1158
- def neighbors_method (self ):
1159
- import warnings
1160
-
1161
- warnings .warn (
1162
- f"The `neighbors_method` attribute has been deprecated and will be "
1163
- f"removed in future versions. Please use the new `neighbors` "
1164
- f"attribute" ,
1165
- category = FutureWarning ,
1166
- )
1167
- return self .neighbors
1168
-
1169
1176
def fit (self , X = None , affinities = None , initialization = None ):
1170
1177
"""Fit a t-SNE embedding for a given data set.
1171
1178
@@ -1324,7 +1331,7 @@ def prepare_initial(self, X=None, affinities=None, initialization=None):
1324
1331
initialization = "spectral"
1325
1332
1326
1333
# Same spiel for precomputed distance matrices
1327
- if self .metric == "precomputed" and initialization == "pca" :
1334
+ if self .metric == "precomputed" and isinstance ( initialization , str ) and initialization == "pca" :
1328
1335
log .warning (
1329
1336
"Attempting to use `pca` initalization, but using precomputed "
1330
1337
"distance matrix! Using `spectral` initilization instead, which "
@@ -1361,7 +1368,7 @@ def prepare_initial(self, X=None, affinities=None, initialization=None):
1361
1368
)
1362
1369
elif initialization == "random" :
1363
1370
embedding = initialization_scheme .random (
1364
- X ,
1371
+ n_samples ,
1365
1372
self .n_components ,
1366
1373
random_state = self .random_state ,
1367
1374
verbose = self .verbose ,
0 commit comments