Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

# 

# Licensed to the Apache Software Foundation (ASF) under one or more 

# contributor license agreements. See the NOTICE file distributed with 

# this work for additional information regarding copyright ownership. 

# The ASF licenses this file to You under the Apache License, Version 2.0 

# (the "License"); you may not use this file except in compliance with 

# the License. You may obtain a copy of the License at 

# 

# http://www.apache.org/licenses/LICENSE-2.0 

# 

# Unless required by applicable law or agreed to in writing, software 

# distributed under the License is distributed on an "AS IS" BASIS, 

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

# See the License for the specific language governing permissions and 

# limitations under the License. 

# 

 

""" 

.. attribute:: ImageSchema 

 

An attribute of this module that contains the instance of :class:`_ImageSchema`. 

 

.. autoclass:: _ImageSchema 

:members: 

""" 

 

import sys 

 

import numpy as np 

from distutils.version import LooseVersion 

 

from pyspark import SparkContext 

from pyspark.sql.types import Row, _create_row, _parse_datatype_json_string 

from pyspark.sql import SparkSession 

 

__all__ = ["ImageSchema"] 

 

 

class _ImageSchema(object): 

""" 

Internal class for `pyspark.ml.image.ImageSchema` attribute. Meant to be private and 

not to be instantized. Use `pyspark.ml.image.ImageSchema` attribute to access the 

APIs of this class. 

""" 

 

def __init__(self): 

self._imageSchema = None 

self._ocvTypes = None 

self._columnSchema = None 

self._imageFields = None 

self._undefinedImageType = None 

 

@property 

def imageSchema(self): 

""" 

Returns the image schema. 

 

Returns 

------- 

:class:`StructType` 

with a single column of images named "image" (nullable) 

and having the same type returned by :meth:`columnSchema`. 

 

.. versionadded:: 2.3.0 

""" 

 

67 ↛ 71line 67 didn't jump to line 71, because the condition on line 67 was never false if self._imageSchema is None: 

ctx = SparkContext._active_spark_context 

jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageSchema() 

self._imageSchema = _parse_datatype_json_string(jschema.json()) 

return self._imageSchema 

 

@property 

def ocvTypes(self): 

""" 

Returns the OpenCV type mapping supported. 

 

Returns 

------- 

dict 

a dictionary containing the OpenCV type mapping supported. 

 

.. versionadded:: 2.3.0 

""" 

 

if self._ocvTypes is None: 

ctx = SparkContext._active_spark_context 

self._ocvTypes = dict(ctx._jvm.org.apache.spark.ml.image.ImageSchema.javaOcvTypes()) 

return self._ocvTypes 

 

@property 

def columnSchema(self): 

""" 

Returns the schema for the image column. 

 

Returns 

------- 

:class:`StructType` 

a schema for image column, 

``struct<origin:string, height:int, width:int, nChannels:int, mode:int, data:binary>``. 

 

.. versionadded:: 2.4.0 

""" 

 

105 ↛ 109line 105 didn't jump to line 109, because the condition on line 105 was never false if self._columnSchema is None: 

ctx = SparkContext._active_spark_context 

jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.columnSchema() 

self._columnSchema = _parse_datatype_json_string(jschema.json()) 

return self._columnSchema 

 

@property 

def imageFields(self): 

""" 

Returns field names of image columns. 

 

Returns 

------- 

list 

a list of field names. 

 

.. versionadded:: 2.3.0 

""" 

 

if self._imageFields is None: 

ctx = SparkContext._active_spark_context 

self._imageFields = list(ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageFields()) 

return self._imageFields 

 

@property 

def undefinedImageType(self): 

""" 

Returns the name of undefined image type for the invalid image. 

 

.. versionadded:: 2.3.0 

""" 

 

137 ↛ 141line 137 didn't jump to line 141, because the condition on line 137 was never false if self._undefinedImageType is None: 

ctx = SparkContext._active_spark_context 

self._undefinedImageType = \ 

ctx._jvm.org.apache.spark.ml.image.ImageSchema.undefinedImageType() 

return self._undefinedImageType 

 

def toNDArray(self, image): 

""" 

Converts an image to an array with metadata. 

 

Parameters 

---------- 

image : :class:`Row` 

image: A row that contains the image to be converted. It should 

have the attributes specified in `ImageSchema.imageSchema`. 

 

Returns 

------- 

:class:`numpy.ndarray` 

that is an image. 

 

.. versionadded:: 2.3.0 

""" 

 

if not isinstance(image, Row): 

raise TypeError( 

"image argument should be pyspark.sql.types.Row; however, " 

"it got [%s]." % type(image)) 

 

if any(not hasattr(image, f) for f in self.imageFields): 

raise ValueError( 

"image argument should have attributes specified in " 

"ImageSchema.imageSchema [%s]." % ", ".join(self.imageFields)) 

 

height = image.height 

width = image.width 

nChannels = image.nChannels 

return np.ndarray( 

shape=(height, width, nChannels), 

dtype=np.uint8, 

buffer=image.data, 

strides=(width * nChannels, nChannels, 1)) 

 

def toImage(self, array, origin=""): 

""" 

Converts an array with metadata to a two-dimensional image. 

 

Parameters 

---------- 

array : :class:`numpy.ndarray` 

The array to convert to image. 

origin : str 

Path to the image, optional. 

 

Returns 

------- 

:class:`Row` 

that is a two dimensional image. 

 

.. versionadded:: 2.3.0 

""" 

 

if not isinstance(array, np.ndarray): 

raise TypeError( 

"array argument should be numpy.ndarray; however, it got [%s]." % type(array)) 

 

203 ↛ 204line 203 didn't jump to line 204, because the condition on line 203 was never true if array.ndim != 3: 

raise ValueError("Invalid array shape") 

 

height, width, nChannels = array.shape 

ocvTypes = ImageSchema.ocvTypes 

208 ↛ 209line 208 didn't jump to line 209, because the condition on line 208 was never true if nChannels == 1: 

mode = ocvTypes["CV_8UC1"] 

210 ↛ 212line 210 didn't jump to line 212, because the condition on line 210 was never false elif nChannels == 3: 

mode = ocvTypes["CV_8UC3"] 

elif nChannels == 4: 

mode = ocvTypes["CV_8UC4"] 

else: 

raise ValueError("Invalid number of channels") 

 

# Running `bytearray(numpy.array([1]))` fails in specific Python versions 

# with a specific Numpy version, for example in Python 3.6.0 and NumPy 1.13.3. 

# Here, it avoids it by converting it to bytes. 

220 ↛ 224line 220 didn't jump to line 224, because the condition on line 220 was never false if LooseVersion(np.__version__) >= LooseVersion('1.9'): 

data = bytearray(array.astype(dtype=np.uint8).ravel().tobytes()) 

else: 

# Numpy prior to 1.9 don't have `tobytes` method. 

data = bytearray(array.astype(dtype=np.uint8).ravel()) 

 

# Creating new Row with _create_row(), because Row(name = value, ... ) 

# orders fields by name, which conflicts with expected schema order 

# when the new DataFrame is created by UDF 

return _create_row(self.imageFields, 

[origin, height, width, nChannels, mode, data]) 

 

 

ImageSchema = _ImageSchema() 

 

 

# Monkey patch to disallow instantiation of this class. 

def _disallow_instance(_): 

raise RuntimeError("Creating instance of _ImageSchema class is disallowed.") 

_ImageSchema.__init__ = _disallow_instance 

 

 

def _test(): 

import doctest 

import pyspark.ml.image 

globs = pyspark.ml.image.__dict__.copy() 

spark = SparkSession.builder\ 

.master("local[2]")\ 

.appName("ml.image tests")\ 

.getOrCreate() 

globs['spark'] = spark 

 

(failure_count, test_count) = doctest.testmod( 

pyspark.ml.image, globs=globs, 

optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE) 

spark.stop() 

256 ↛ 257line 256 didn't jump to line 257, because the condition on line 256 was never true if failure_count: 

sys.exit(-1) 

 

 

if __name__ == "__main__": 

_test()