Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #
HasPredictionCol, Params
""" Used by default implementation of Estimator.fitMultiple to produce models in a thread safe iterator. This class handles the simple case of fitMultiple where each param map should be fit independently.
Parameters ---------- fitSingleModel : function Callable[[int], Transformer] which fits an estimator to a dataset. `fitSingleModel` may be called up to `numModels` times, with a unique index each time. Each call to `fitSingleModel` with an index should return the Model associated with that index. numModel : int Number of models this iterator should produce.
Notes ----- See :py:meth:`Estimator.fitMultiple` for more info. """ """
"""
"""For python2 compatibility.""" return self.__next__()
""" Abstract class for estimators that fit models to data.
.. versionadded:: 1.3.0 """ pass
def _fit(self, dataset): """ Fits a model to the input dataset. This is called by the default implementation of fit.
Parameters ---------- dataset : :py:class:`pyspark.sql.DataFrame` input dataset
Returns ------- :class:`Transformer` fitted model """ raise NotImplementedError()
""" Fits a model to the input dataset for each param map in `paramMaps`.
.. versionadded:: 2.3.0
Parameters ---------- dataset : :py:class:`pyspark.sql.DataFrame` input dataset. paramMaps : :py:class:`collections.abc.Sequence` A Sequence of param maps.
Returns ------- :py:class:`_FitMultipleIterator` A thread safe iterable which contains one model for each param map. Each call to `next(modelIterator)` will return `(index, model)` where model was fit using `paramMaps[index]`. `index` values may not be sequential. """
""" Fits a model to the input dataset with optional parameters.
.. versionadded:: 1.3.0
Parameters ---------- dataset : :py:class:`pyspark.sql.DataFrame` input dataset. params : dict or list or tuple, optional an optional param map that overrides embedded params. If a list/tuple of param maps is given, this calls fit on each param map and returns a list of models.
Returns ------- :py:class:`Transformer` or a list of :py:class:`Transformer` fitted model(s) """ models = [None] * len(params) for index, model in self.fitMultiple(dataset, params): models[index] = model return models else: else: "but got %s." % type(params))
""" Abstract class for transformers that transform one dataset into another.
.. versionadded:: 1.3.0 """ pass
def _transform(self, dataset): """ Transforms the input dataset.
Parameters ---------- dataset : :py:class:`pyspark.sql.DataFrame` input dataset.
Returns ------- :py:class:`pyspark.sql.DataFrame` transformed dataset """ raise NotImplementedError()
""" Transforms the input dataset with optional parameters.
.. versionadded:: 1.3.0
Parameters ---------- dataset : :py:class:`pyspark.sql.DataFrame` input dataset params : dict, optional an optional param map that overrides embedded params.
Returns ------- :py:class:`pyspark.sql.DataFrame` transformed dataset """ else: else:
""" Abstract class for models that are fitted by estimators.
.. versionadded:: 1.4.0 """
""" Abstract class for transformers that take one input column, apply transformation, and output the result as a new column.
.. versionadded:: 2.3.0 """
""" Sets the value of :py:attr:`inputCol`. """
""" Sets the value of :py:attr:`outputCol`. """
def createTransformFunc(self): """ Creates the transform function using the given param map. The input param map already takes account of the embedded param map. So the param values should be determined solely by the input param map. """ raise NotImplementedError()
def outputDataType(self): """ Returns the data type of the output column. """ raise NotImplementedError()
def validateInputType(self, inputType): """ Validates the input type. Throw an exception if it is invalid. """ raise NotImplementedError()
raise ValueError("Output column %s already exists." % self.getOutputCol()) self.outputDataType(), nullable=False))
transformUDF(dataset[self.getInputCol()]))
""" Params for :py:class:`Predictor` and :py:class:`PredictorModel`.
.. versionadded:: 3.0.0 """
""" Estimator for prediction tasks (regression and classification). """
def setLabelCol(self, value): """ Sets the value of :py:attr:`labelCol`. """ return self._set(labelCol=value)
def setFeaturesCol(self, value): """ Sets the value of :py:attr:`featuresCol`. """
def setPredictionCol(self, value): """ Sets the value of :py:attr:`predictionCol`. """ return self._set(predictionCol=value)
""" Model for prediction tasks (regression and classification). """
def setFeaturesCol(self, value): """ Sets the value of :py:attr:`featuresCol`. """
def setPredictionCol(self, value): """ Sets the value of :py:attr:`predictionCol`. """
def numFeatures(self): """ Returns the number of features the model was trained on. If unknown, returns -1 """ raise NotImplementedError()
def predict(self, value): """ Predict label for the given features. """ raise NotImplementedError() |