import pdb
import numpy as np
import time
from scipy.spatial.distance import pdist, squareform, cdist
from sklearn.neighbors import NearestNeighbors, KNeighborsTransformer
from scipy.sparse import csr_matrix
from . import local_views_
from . import intermed_views_
from . import global_views_
from . import visualize_
from .util_ import print_log, sparse_matrix, nearest_neighbors
import multiprocess as mp
from pandas import DataFrame
import json
def double_manifold_k_nn(data, ddX, k_nn, metric, n_proc=1):
"""Doubles the manifold represented by X and computes
k-nearest neighbors of each point on the double.
Parameters
----------
data : {array} of shape (n_samples, n_features) or {array} of shape (n_samples, n_samples)
A 2d array where each row represents a data point or
distance of a point to all other points.
ddX : array shape (n_samples,)
A 1d np.array of shape (n,) where ddX[i] = 0 iff the
i-th point X[i,:] represents a point on the boundary
of the manifold. Note that n_boundary = np.sum(ddX==0).
k_nn : int
Number of nearest neighbors to compute on the double.
n_proc : int
Number of processors to use.
Returns
-------
neigh_dist : array shape (2*n_samples-n_boundary, k_nn)
distance of k-nearest neighbors from each
point on the double.
neigh_ind : array shape (2*n_samples-n_boundary, k_nn)
indices of k-nearest neighbors from each point
on the double.
"""
n = data.shape[0] # no. of points on the original manifold
dX = ddX==0 # a boolean array s.t. dX[i] == 1 iff i-th pt is on boundary
n_dX = np.sum(dX) # no. of points on the boundary
print('No. of points on the boundary =', n_dX)
n_interior = n-n_dX # no. of points in the interior
# For points in the interior, a duplicate point
# with index offset-ed by n is created.
# The i-th point in the interior of the original
# manifold is duplicated. The index of the duplicate
# is n + i.
ind_mask = np.arange(n)
ind_mask[~dX] = n+np.arange(n_interior)
k_nn_ = 2*k_nn
neigh_dist, neigh_ind = nearest_neighbors(data, k_nn_, metric)
# Now we construct a sparse distance matrix corresp
# to the doubled manifold. The block structure is
# A11 A12 A13
# A21 A22 A23
# A31 A32 A33
# 1: interior of original manifold
# 2: boundary
# 3: duplicated interior
# So, A11 = distance matrix between pairs of
# points in the interior of original manifold.
# Overall, A11, A12, A21, A22, A32, A33
# are known or trivially computable. A13 = A31.T
# is to be computed.
# A22, A23
# A32 A33
neigh_ind_ = ind_mask[neigh_ind]
neigh_dist_ = neigh_dist
dX_of_neigh_ind = dX[neigh_ind]
pts_near_dX = np.any(dX_of_neigh_ind, 1)
int_pts_near_dX = pts_near_dX & (~dX)
int_pts_near_dX_inds = np.where(int_pts_near_dX)[0]
# Compute sparse representation of A13
row_inds13 = []
col_inds13 = []
vals13 = []
def target_proc(p_num, chunk_sz, q_):
start_ind = p_num*chunk_sz
if p_num == (n_proc-1):
end_ind = int_pts_near_dX_inds.shape[0]
else:
end_ind = (p_num+1)*chunk_sz
row_inds13_ = []
col_inds13_ = []
vals13_ = []
for i in range(start_ind, end_ind):
# index of an interior point which has
# a boundary point as nbr
k = int_pts_near_dX_inds[i]
# nbrs of k which are on the boundary
# and the distance of k to these points.
nbrs_on_dX_mask = dX_of_neigh_ind[k,:]
nbrs_on_dX = neigh_ind[k,nbrs_on_dX_mask]
dist_of_nbrs_on_dX = neigh_dist[k,nbrs_on_dX_mask][:,None]
# Let S_k be the the nbrs of k which are
# on the boundary. The nbrs of S_k in the
# duplicated manifold are
nbrs_of_S_k = neigh_ind_[nbrs_on_dX,:]
# Distance of nbrs of S_k to the corresponding
# point in S_k.
dist_of_nbrs_of_S_k = neigh_dist_[nbrs_on_dX,:]
# Distance b/w k and the nbrs of S_k
dist_of_k_from_nbrs_of_S_k = dist_of_nbrs_of_S_k + dist_of_nbrs_on_dX
# nbrs of S_k on the duplicated mainfold
# which are in the interior of it and
# distanace of k to these points.
int_nbrs_of_S_k_mask = ~dX_of_neigh_ind[nbrs_on_dX,:]
int_nbrs_of_S_k = nbrs_of_S_k[int_nbrs_of_S_k_mask]
dist_of_k_from_int_nbrs_of_S_k = dist_of_k_from_nbrs_of_S_k[int_nbrs_of_S_k_mask]
# Groupby interior point ids and min-aggregate their distances
df = DataFrame(dict(ind=int_nbrs_of_S_k,
dist=dist_of_k_from_int_nbrs_of_S_k))
gp = df.groupby('ind')['dist'].min().to_frame().reset_index()
col_inds_ = gp['ind'].to_list()
vals_ = gp['dist'].to_list()
row_inds_ = [k]*len(col_inds_)
row_inds13_ += row_inds_
col_inds13_ += col_inds_
vals13_ += vals_
q_.put((row_inds13_, col_inds13_, vals13_))
q_ = mp.Queue()
chunk_sz = int(int_pts_near_dX_inds.shape[0]/n_proc)
proc = []
for p_num in range(n_proc):
proc.append(mp.Process(target=target_proc,
args=(p_num,chunk_sz,q_),
daemon=True))
proc[-1].start()
for p_num in range(n_proc):
row_inds_, col_inds_, vals_ = q_.get()
row_inds13 += row_inds_
col_inds13 += col_inds_
vals13 += vals_
q_.close()
for p_num in range(n_proc):
proc[p_num].join()
row_inds22_23_32_33 = np.repeat(ind_mask, neigh_ind.shape[1]).tolist()
col_inds22_23_32_33 = ind_mask[neigh_ind].flatten().tolist()
vals_22_23_32_33 = neigh_dist.flatten().tolist()
row_inds11_12 = np.repeat(np.arange(n)[~dX], neigh_ind.shape[1]).tolist()
col_inds11_12 = neigh_ind[~dX,:].flatten().tolist()
vals11_12 = neigh_dist[~dX,:].flatten().tolist()
cs_graph = csr_matrix((vals11_12+vals13+vals_22_23_32_33,
(row_inds11_12+row_inds13+row_inds22_23_32_33,
col_inds11_12+col_inds13+col_inds22_23_32_33)),
shape=(n_interior+n, n_interior+n))
cs_graph = cs_graph.maximum(cs_graph.transpose())
neigh_dist, neigh_ind = nearest_neighbors(cs_graph, k_nn=k_nn, metric='precomputed')
return neigh_dist, neigh_ind
[docs]def get_default_local_opts(algo='LPCA', metric0='euclidean', k=28, k_nn=49, k_tune=7,
metric='euclidean', update_metric=True, radius=0.5, U_method='k_nn',
gl_type='unnorm', tuning='self', N=100, scale_by='gamma',
Atilde_method='LDLE_1', alpha=1, max_iter=300, reg=0.,
p=0.99, tau=50, delta=0.9, lkpca_kernel='linear', to_postprocess= True,
pp_n_thresh=32, lambda1_init=8, lambda1_decay=0.75, lambda1_min=1e-3,
power=5, max_sparsity=0.9, doubly_stochastic_max_iter=0):
"""Sets and returns a dictionary of default_local_opts, (experimental) options are work in progress.
Parameters
----------
algo : str
The algorithm to use for the construction of
local views. Options are 'LDLE', 'LPCA' and 'LKPCA'.
Experimental options are 'L1PCA', 'SparsePCA', 'LISOMAP'
and 'RPCA-GODEC'.
metric0 : str
The metric to be used for finding nearest neighbors.
(experimental) If the data is a distance matrix then use 'precomputed'.
k : int
The size of the local view per point.
k_nn : int
This is used for nearest neighbor graph construction.
The neighborhood size is kept bigger than the size of the
local views. This is because, during clustering where we
construct intermediate views, we need access to distances
between points in a bigger neighborhood around each point.
The value of k_nn must be >= k. Then the size of neighborhood
is set to max(k_nn, eta_max * k) where eta_max is in intermed_opts.
This value is used to construct the nearest neighbor graph
using metric0.
k_tune : int
Distance to k_tune-th nearest neighbor is
used as the local scaling factor. This must
be less than k_nn. This is used to construct
self-tuning kernel in LDLE.
metric : str
The local metric on the data. (experimental) If the data is a distance
matrix then use 'precomputed'.
update_metric : bool
If True, recomputes neighbor distances using metric.
If False, the neighbor distances computed using metric0
are retained. Note that neighbor indices are not changed.
radius : float
Radius of the balls to be used to compute nearest neighbors.
(experimental)
U_method : str
Method to use to construct local views.
Options are 'k_nn' and 'radius'.
gl_type : str
The type of graph Laplacian to construct in LDLE.
Options are 'unnorm' for unnormalized,
'symnorm' for symmetric normalized,
'diffusion' for density-corrected normalized.
tuning: str
The tuning method for the construction of kernel in LDLE.
Options are: 'self', 'solo', 'median' or None.
N : int
Number of smallest non-trivial eigenvectors of the
graph Laplacian to be used for the construction of
local views in LDLE.
scale_by : str
To scale the eigenvectors in LDLE. Options
are 'none', 'gamma' or a positive real value. Default is
'gamma'. Using numerical value with gl_type as 'diffusion'
uses diffusion maps to construct local views where
the numeric value is used as the power of the eigenvalues.
Atilde_method : str
Method to use for conmputing Atilde in LDLE.
Options are 'FEM' for finite element method,
and 'FeymanKac' for Feyman-Kac formula based.
p : float
A hyperparameter used in computing Atilde. The value
must be in (0,1).
tau : int
A hyperparameter used in computing parameterizations
of local views in LDLE. The value must be in (0,100).
delta : float
A hyperparameter used in computing parameterizations
of local views in LDLE. The value must be in (0,1).
alpha : float
Step size in Riemannian gradient descent when using
Smooth-LPCA (experimental).
max_iter: int
Max number of Riemannian gradient descent steps in
Smooth-LPCA (experimental).
reg : float
Desired regularization (Smoothness) in Smooth-LPCA
(experimental).
lkpca_kernel: str
The kernel for local KPCA.
to_postprocess : bool
If True the local parameterizations are postprocessed
to fix anamolous parameterizations leading to high
distortion of the local views.
pp_n_thresh : int
Threshold to use multiple processors or a single processor
while postprocessing the local parameterizations. A small
value such as 32 leads to faster postprocessing.
lambda1_init : float
Initialization of lambda1 for RPCA-GODEC (experimental).
lambda1_decay : float
Decay of lambda1 for RPCA-GODEC (experimental).
lambda1_min : float
Minimum allowed valued of lambda1 for RPCA-GODEC (experimental).
power : int
Number of power iterations for initialization in RPCA-GODEC
(experimental).
max_sparsity : float
maximum fraction of the desired sparsity (experimental).
doubly_stochastic_max_iter: int
max number of sinkhorn iterations for converting
self tuned kernel in LDLE to doubly stochastic.
"""
return {'k_nn': k_nn, 'k_tune': k_tune, 'k': k, 'metric0': metric0,
'metric': metric, 'update_metric': update_metric, 'radius': radius,
'U_method': U_method, 'gl_type': gl_type, 'tuning': tuning, 'N': N, 'scale_by': scale_by,
'Atilde_method': Atilde_method, 'p': p, 'tau': tau, 'delta': delta,
'alpha': alpha, 'max_iter': max_iter, 'reg': reg, 'lkpca_kernel': lkpca_kernel,
'to_postprocess': to_postprocess, 'algo': algo,
'pp_n_thresh': pp_n_thresh, 'lambda1_init': lambda1_init, 'lambda1_decay': lambda1_decay,
'lambda1_min': lambda1_min, 'power': power, 'max_sparsity': max_sparsity,
'doubly_stochastic_max_iter': doubly_stochastic_max_iter}
[docs]def get_default_intermed_opts(algo='best', n_times=4, eta_min=5, eta_max=25, len_S_thresh=256):
"""Sets and returns a dictionary of default_intermed_opts, (experimental) options are work in progress.
Parameters
----------
algo : str
Algo to use. Options are 'mnm' for match and merge,
'best' for the optimal algorithm (much slower
but creates intermediate views with lower distortion).
'mnm' is (experimental).
n_times : int
(experimental) Hypereparameter for 'mnm' algo. Number of times to match
and merge. If n is the #local views then the #intermediate
views will be approximately n/2^{n_times}.
eta_min : int
Hyperparameter for 'best' algo. Minimum allowed size of
the clusters underlying the intermediate views.
The values must be >= 1.
eta_max : int
Hyperparameter for 'best' algo. Maximum allowed size of
the clusters underlying the intermediate views.
The value must be > eta_min.
len_S_thresh : int
Threshold on the number of points for which
the costs are to be updated, to invoke
multiple processors. Used with 'best' algo only.
"""
return {'algo': algo, 'n_times': n_times, 'eta_min': eta_min,
'eta_max': eta_max, 'len_S_thresh': len_S_thresh}
[docs]def get_default_global_opts(align_transform='rigid', to_tear=True, nu=3, max_iter=20, color_tear=True,
vis_before_init=False, compute_error=False,
init_algo_name='procrustes', align_w_parent_only=True,
refine_algo_name='rgd',
max_internal_iter=100, alpha=0.3, eps=1e-8,
add_dim=False, beta={'align':None, 'repel': 1},
repel_by=0., repel_decay=1., n_repel=0,
far_off_points_type='reuse_fixed', patience=5, err_tol=1e-6,
tear_color_method='spectral', tear_color_eig_inds=[1,0,2],
metric='euclidean', color_cutoff_frac=0.001,
color_largest_tear_comp_only=False,
n_forced_clusters=1):
"""Sets and returns a dictionary of default_global_opts, (experimental) options are work in progress.
Parameters
----------
align_transform : str
The algorithm to use for the alignment of intermediate
views. Options are 'rigid' and 'affine'. (experimental) If 'affine' is
chosen then none of the following hypermateters are used.
to_tear : bool
If True then tear-enabled alignment of views is performed.
nu : int
The ratio of the size of local views in the embedding against those
in the data.
max_iter : int
Number of iterations to refine the global embedding for.
color_tear : bool
If True, colors the points across the tear with a
spectral coloring scheme.
vis_before_init : bool
If True, plots the global embedding before
alignment begins. This is same as just plotting
all the intermediate views without alignment.
compute_error : bool
If True the alignment error is computed at each
iteration of the refinement, otherwise only at
the last iteration.
init_algo_name : str
The algorithm used to compute initial global embedding
by aligning the intermediate views. Options are 'procrustes'
for spanning-tree-based-procrustes alignment,
'spectral' for spectral alignment (ignores to_tear),
'sdp' for semi-definite programming based alignment (ignores to_tear).
align_w_parent_only : bool
If True, then aligns child views the parent views only
in the spanning-tree-based-procrustes alignment.
refine_algo_name : str
The algorithm used to refine the initial global embedding
by refining the alignment between intermediate views.
Options are 'gpa' for Generalized Procustes Analysis
(GPA) based alignment, 'rgd' for Riemannian gradient descent
based alignment, 'spectral' for spectral alignment,
'gpm' for generalized power method based alignment,
'sdp' for semi-definite programming based alignment. Note that
sdp based alignment is very slow. Recommended options are 'rgd'
with an appropriate step size (alpha) and 'gpm'.
max_internal_iter : int
The number of internal iterations used by
the refinement algorithm, for example, RGD updates.
This is ignored by 'spectral' refinement.
alpha : float
The step size used in the Riemannian gradient descent
when the refinement algorithm is 'rgd'.
eps : float
The tolerance used by sdp solver when the init or refinement
algorithm is 'sdp'.
add_dim : bool
(experimental) add an extra dimension to intermediate views.
beta : dict
(experimental) Hyperparameters used for computing the alignment weights and
the repulsion weights. Form is {'align': float, 'repel': float}.
Default is {'align': None, 'repel': None} i.e. unweighted.
repel_by : float
If positive, the points which are far off are repelled
away from each other by a force proportional to this parameter.
Ignored when refinement algorithm is 'gpa'.
repel_decay : float
Multiply repel_decay with current value of repel_by after every iteration.
n_repel : int
The number of far off points repelled from each other.
far_off_points_type : 'fixed' or 'random'
Whether to use the same points for repulsion or
randomize over refinement iterations. If 'reuse' is
in the string, for example 'fixed_reuse', then the points
to be repelled are the same across iterations.
patience : int
The number of iteration to wait for error below tolerance
to persist before stopping the refinement.
err_tol : float
The tolerance level for the alignment error.
tear_color_method : str
Method to color the tear. Options are 'spectral' or 'heuristic'.
The latter keeps the coloring of the tear same accross
the iterations. Recommended option is 'spectral'.
tear_color_eig_inds : int
Eigenvectors to be used to color the tear. The value must either
be a non-negative integer or it must be a list of three non-negative
integers [R,G,B] representing the indices of eigenvectors to be used
as RGB channels for coloring the tear. Higher values result in
more diversity of colors. The diversity saturates after a certain value.
color_cutoff_frac : float
If the number of points in a tear component is less than
(color_cutoff_frac * number of data points), then all the
points in the component will be colored with the same color.
color_largest_tear_comp_only : bool
If True then the largest tear components is colored only.
metric : str
metric assumed on the global embedding. Currently only euclidean is supported.
n_forced_clusters : str
(experimental) Minimum no. of clusters to force in the embeddings.
"""
return {'to_tear': to_tear, 'nu': nu, 'max_iter': max_iter,
'color_tear': color_tear,
'vis_before_init': vis_before_init,
'compute_error': compute_error,
'align_transform': align_transform,
'init_algo_name': init_algo_name,
'align_w_parent_only': align_w_parent_only,
'refine_algo_name': refine_algo_name,
'max_internal_iter': max_internal_iter,
'alpha': alpha, 'eps': eps, 'add_dim': add_dim,
'beta': beta, 'repel_by': repel_by,
'repel_decay': repel_decay, 'n_repel': n_repel,
'far_off_points_type': far_off_points_type,
'patience': patience, 'err_tol': err_tol,
'tear_color_method': tear_color_method,
'tear_color_eig_inds': tear_color_eig_inds,
'metric': metric, 'color_cutoff_frac': color_cutoff_frac,
'color_largest_tear_comp_only': color_largest_tear_comp_only,
'n_forced_clusters': n_forced_clusters
}
[docs]def get_default_vis_opts(save_dir='', cmap_interior='summer', cmap_boundary='jet', c=None):
"""Sets and returns a dictionary of default_vis_opts.
Parameters
----------
save_dir : str
The directory to save the plots in.
cmap_interior : str
The colormap to use for the interior of the manifold.
cmap_boundary : str
The colormap to use for the boundary of the manifold.
c : array shape (n_samples)
The labels for each point to be used to color them.
"""
return {'save_dir': save_dir,
'cmap_interior': cmap_interior,
'cmap_boundary': cmap_boundary,
'c': c}
[docs]class BUML:
"""Bottom-up manifold learning.
Parameters
----------
d : int
Intrinsic dimension of the manifold.
local_opts : dict
Options for local views construction. The key-value pairs
provided override the ones in default_local_opts.
intermed_opts : dict
Options for intermediate views construction. The key-value pairs
provided override the ones in default_intermed_opts.
global_opts : dict
Options for global views construction. The key-value pairs
provided override the ones in default_global_opts.
vis_opts : dict
Options for visualization. The key-value pairs
provided override the ones in default_vis_opts.
n_proc : int
The number of processors to use. Defaults to approximately
3/4th of the available processors.
verbose : bool
print logs if True.
debug : bool
saves intermediary objects/data for debugging.
"""
def __init__(self,
d = 2,
local_opts = {},
intermed_opts = {},
global_opts = {},
vis_opts = {},
n_proc = min(32,max(1,int(mp.cpu_count()*0.75))),
exit_at = None,
verbose = False,
debug = False):
default_local_opts = get_default_local_opts()
default_intermed_opts = get_default_intermed_opts()
default_global_opts = get_default_global_opts()
default_vis_opts = get_default_vis_opts()
self.d = d
local_opts['n_proc'] = n_proc
local_opts['verbose'] = verbose
local_opts['debug'] = debug
for i in local_opts:
default_local_opts[i] = local_opts[i]
self.local_opts = default_local_opts
#############################################
intermed_opts['n_proc'] = n_proc
intermed_opts['local_algo'] = self.local_opts['algo']
intermed_opts['verbose'] = verbose
intermed_opts['debug'] = debug
for i in intermed_opts:
default_intermed_opts[i] = intermed_opts[i]
self.intermed_opts = default_intermed_opts
# Update k_nn
self.local_opts['k_nn'] = max(self.local_opts['k_nn'], self.local_opts['k'])
self.local_opts['k_nn0'] = max(self.local_opts['k_nn'],
self.intermed_opts['eta_max']*self.local_opts['k'])
print("local_opts['k_nn0'] =", self.local_opts['k_nn0'], "is created.")
#############################################
global_opts['k'] = self.local_opts['k']
global_opts['n_proc'] = n_proc
global_opts['verbose'] = verbose
global_opts['debug'] = debug
for i in global_opts:
default_global_opts[i] = global_opts[i]
if default_global_opts['refine_algo_name'] != 'rgd':
if 'max_internal_iter' not in global_opts:
default_global_opts['max_internal_iter'] = 10
print("Making global_opts['max_internal_iter'] =",
default_global_opts['max_internal_iter'])
print('Supply the argument to use a different value', flush=True)
self.global_opts = default_global_opts
#############################################
for i in vis_opts:
default_vis_opts[i] = vis_opts[i]
self.vis_opts = default_vis_opts
self.vis = visualize_.Visualize(self.vis_opts['save_dir'])
#############################################
self.exit_at = exit_at
self.verbose = verbose
self.debug = debug
#############################################
# Other useful inits
self.global_start_time = time.time()
self.local_start_time = time.time()
print('Options provided:')
print('local_opts:')
print(json.dumps(self.local_opts, sort_keys=True, indent=4))
print('intermed_opts:')
print(json.dumps( self.intermed_opts, sort_keys=True, indent=4))
print('global_opts:')
print(json.dumps( self.global_opts, sort_keys=True, indent=4))
# The variables created during the fit
self.X = None
self.d_e = None
self.scale = None
self.ddX = None
self.neigh_dist = None
self.neigh_ind = None
self.LocalViews = None
self.IntermedViews = None
def log(self, s='', log_time=False):
if self.verbose:
self.local_start_time = print_log(s, log_time,
self.local_start_time,
self.global_start_time)
[docs] def fit(self, X = None, d_e = None, ddX = None):
"""Run the algorithm. Either X or d_e must be supplied.
Parameters
---------
X : array shape (n_samples, n_features)
A 2d array containing data representing a manifold.
d_e : array shape (n_samples, n_samples)
A square numpy matrix representing the geodesic distance
between each pair of points on the manifold.
ddX : array shape (n_samples)
Optional. A 1d array representing the distance of the
points from the boundary OR a 1d array such that
ddX[i] = 0 if the point i i.e. X[i,:] is on the boundary.
Returns
-------
y : array shape (n_samples, d)
The embedding of the data in lower dimension.
"""
assert X is not None or d_e is not None, "Either X or d_e should be provided."
if d_e is None:
data = X
else:
data = d_e.copy()
if ddX is None or self.local_opts['algo'] != 'LDLE':
neigh_dist, neigh_ind = nearest_neighbors(data,
k_nn=self.local_opts['k_nn0'],
metric=self.local_opts['metric0'])
else:
self.log('Doubling manifold.')
neigh_dist, neigh_ind = double_manifold_k_nn(data, ddX,
self.local_opts['k_nn0'],
self.local_opts['metric0'],
self.local_opts['n_proc'])
self.log('Done.', log_time=True)
# self.neigh_ind = neigh_ind
# self.neigh_dist = neigh_dist
# return
# Construct a sparse d_e matrix based on neigh_ind and neigh_dist
# self.scale = np.min(neigh_dist[neigh_dist > 0])
if (self.local_opts['metric0'] != self.local_opts['metric']) and self.local_opts['update_metric']:
for k in range(neigh_ind.shape[0]):
neigh_dist[k,:] = cdist(data[k,:][None,:], data[neigh_ind[k,:],:],
metric=self.local_opts['metric'])
# cdist can return a very small value of self-distance
# enforce self-distance to be zero.
neigh_dist[k,0] = 0
d_e = sparse_matrix(neigh_ind, neigh_dist)
d_e = d_e.maximum(d_e.transpose())
# to compute far off points
d_e_small = sparse_matrix(neigh_ind[:,:self.local_opts['k']],
neigh_dist[:,:self.local_opts['k']])
d_e_small = d_e_small.maximum(d_e_small.transpose())
neigh_ind = neigh_ind[:,:self.local_opts['k_nn']]
neigh_dist = neigh_dist[:,:self.local_opts['k_nn']]
# Construct low dimensional local views
LocalViews = local_views_.LocalViews(self.exit_at, self.verbose, self.debug)
LocalViews.fit(self.d, data, d_e, neigh_dist, neigh_ind, ddX, self.local_opts)
# Halving distance matrix
if ddX is not None:
n = ddX.shape[0]
d_e = d_e[:n,:n]
if self.debug:
self.d_e = d_e
self.d_e_small = d_e_small
self.neigh_ind = neigh_ind
self.neigh_dist = neigh_dist
self.LocalViews = LocalViews
if self.exit_at == 'local_views':
return
# Construct intermediate views
IntermedViews = intermed_views_.IntermedViews(self.exit_at, self.verbose, self.debug)
IntermedViews.fit(self.d, d_e, LocalViews.U,
LocalViews.local_param_post,
self.intermed_opts)
self.IntermedViews = IntermedViews
if self.exit_at == 'intermed_views':
return
# Construct Global view
GlobalViews = global_views_.GlobalViews(self.exit_at, self.verbose, self.debug)
GlobalViews.fit(self.d, d_e_small, IntermedViews.Utilde, IntermedViews.C, IntermedViews.c,
IntermedViews.n_C, IntermedViews.intermed_param,
self.global_opts, self.vis, self.vis_opts)
self.GlobalViews = GlobalViews
return GlobalViews.y_final