-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgeometric_median.py
96 lines (67 loc) · 2.5 KB
/
geometric_median.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
from scipy.optimize import minimize
from scipy.spatial.distance import cdist
def geometric_median(points, method='auto', options={}):
"""
Calculates the geometric median of an array of points.
method specifies which algorithm to use:
* 'auto' -- uses a heuristic to pick an algorithm
* 'minimize' -- scipy.optimize the sum of distances
* 'weiszfeld' -- Weiszfeld's algorithm
"""
points = np.asarray(points)
if len(points.shape) == 1:
# geometric_median((0, 0)) has too much potential for error.
# Did the user intend a single 2D point or two scalars?
# Use np.median if you meant the latter.
raise ValueError("Expected 2D array")
if method == 'auto':
if points.shape[1] > 2:
# weiszfeld tends to converge faster in higher dimensions
method = 'weiszfeld'
else:
method = 'minimize'
return _methods[method](points, options)
def minimize_method(points, options={}):
"""
Geometric median as a convex optimization problem.
"""
# objective function
def aggregate_distance(x):
return cdist([x], points).sum()
# initial guess: centroid
centroid = points.mean(axis=0)
optimize_result = minimize(aggregate_distance, centroid, method='COBYLA')
return optimize_result.x
def weiszfeld_method(points, options={}):
"""
Weiszfeld's algorithm as described on Wikipedia.
"""
default_options = {'maxiter': 1000, 'tol': 1e-7}
default_options.update(options)
options = default_options
def distance_func(x):
return cdist([x], points)
# initial guess: centroid
guess = points.mean(axis=0)
iters = 0
while iters < options['maxiter']:
distances = distance_func(guess).T
# catch divide by zero
# TODO: Wikipedia cites how to deal with distance 0
distances = np.where(distances == 0, 1, distances)
guess_next = (points/distances).sum(axis=0) / (1./distances).sum(axis=0)
guess_movement = np.sqrt(((guess - guess_next)**2).sum())
guess = guess_next
if guess_movement <= options['tol']:
break
iters += 1
return guess
_methods = {
'minimize': minimize_method,
'weiszfeld': weiszfeld_method,
}
if __name__ == "__main__":
a = np.random.randn(10, 60)
meadian=geometric_median(points=a)
print(meadian.shape)