Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #
self._random = random.Random(self._seed ^ split)
# mixing because the initial seeds are close to each other for _ in range(10): self._random.randint(0, 1)
return self._random.random()
# Using Knuth's algorithm described in # http://en.wikipedia.org/wiki/Poisson_distribution if mean < 20.0: # one exp and k+1 random calls l = math.exp(-mean) p = self._random.random() k = 0 while p > l: k += 1 p *= self._random.random() else: # switch to the log domain, k+1 expovariate (random + log) calls p = self._random.expovariate(mean) k = 0 while p < 1.0: k += 1 p += self._random.expovariate(mean) return k
raise NotImplementedError
self.initRandomGenerator(split) if self._withReplacement: for obj in iterator: # For large datasets, the expected number of occurrences of each element in # a sample with replacement is Poisson(frac). We use that to get a count for # each element. count = self.getPoissonSample(self._fraction) for _ in range(0, count): yield obj else: for obj in iterator: if self.getUniformSample() < self._fraction: yield obj
self.initRandomGenerator(split) for obj in iterator: if self._lowerBound <= self.getUniformSample() < self._upperBound: yield obj
self.initRandomGenerator(split) if self._withReplacement: for key, val in iterator: # For large datasets, the expected number of occurrences of each element in # a sample with replacement is Poisson(frac). We use that to get a count for # each element. count = self.getPoissonSample(self._fractions[key]) for _ in range(0, count): yield key, val else: for key, val in iterator: if self.getUniformSample() < self._fractions[key]: yield key, val |