Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

# 

# Licensed to the Apache Software Foundation (ASF) under one or more 

# contributor license agreements. See the NOTICE file distributed with 

# this work for additional information regarding copyright ownership. 

# The ASF licenses this file to You under the Apache License, Version 2.0 

# (the "License"); you may not use this file except in compliance with 

# the License. You may obtain a copy of the License at 

# 

# http://www.apache.org/licenses/LICENSE-2.0 

# 

# Unless required by applicable law or agreed to in writing, software 

# distributed under the License is distributed on an "AS IS" BASIS, 

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

# See the License for the specific language governing permissions and 

# limitations under the License. 

# 

 

import numpy as np 

 

from pyspark.mllib.common import callMLlibFunc 

from pyspark.rdd import RDD 

 

 

class KernelDensity(object): 

""" 

Estimate probability density at required points given an RDD of samples 

from the population. 

 

Examples 

-------- 

>>> kd = KernelDensity() 

>>> sample = sc.parallelize([0.0, 1.0]) 

>>> kd.setSample(sample) 

>>> kd.estimate([0.0, 1.0]) 

array([ 0.12938758, 0.12938758]) 

""" 

def __init__(self): 

self._bandwidth = 1.0 

self._sample = None 

 

def setBandwidth(self, bandwidth): 

"""Set bandwidth of each sample. Defaults to 1.0""" 

self._bandwidth = bandwidth 

 

def setSample(self, sample): 

"""Set sample points from the population. Should be a RDD""" 

if not isinstance(sample, RDD): 

raise TypeError("samples should be a RDD, received %s" % type(sample)) 

self._sample = sample 

 

def estimate(self, points): 

"""Estimate the probability density at points""" 

points = list(points) 

densities = callMLlibFunc( 

"estimateKernelDensity", self._sample, self._bandwidth, points) 

return np.asarray(densities)