Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

# 

# Licensed to the Apache Software Foundation (ASF) under one or more 

# contributor license agreements. See the NOTICE file distributed with 

# this work for additional information regarding copyright ownership. 

# The ASF licenses this file to You under the Apache License, Version 2.0 

# (the "License"); you may not use this file except in compliance with 

# the License. You may obtain a copy of the License at 

# 

# http://www.apache.org/licenses/LICENSE-2.0 

# 

# Unless required by applicable law or agreed to in writing, software 

# distributed under the License is distributed on an "AS IS" BASIS, 

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

# See the License for the specific language governing permissions and 

# limitations under the License. 

# 

import os 

import tempfile 

import unittest 

 

from pyspark.install import install_spark, DEFAULT_HADOOP, DEFAULT_HIVE, \ 

UNSUPPORTED_COMBINATIONS, checked_versions, checked_package_name 

 

 

class SparkInstallationTestCase(unittest.TestCase): 

 

def test_install_spark(self): 

# Test only one case. Testing this is expensive because it needs to download 

# the Spark distribution. 

spark_version, hadoop_version, hive_version = checked_versions("3.0.1", "3.2", "2.3") 

 

with tempfile.TemporaryDirectory() as tmp_dir: 

install_spark( 

dest=tmp_dir, 

spark_version=spark_version, 

hadoop_version=hadoop_version, 

hive_version=hive_version) 

 

self.assertTrue(os.path.isdir("%s/jars" % tmp_dir)) 

self.assertTrue(os.path.exists("%s/bin/spark-submit" % tmp_dir)) 

self.assertTrue(os.path.exists("%s/RELEASE" % tmp_dir)) 

 

def test_package_name(self): 

self.assertEqual( 

"spark-3.0.0-bin-hadoop3.2", 

checked_package_name("spark-3.0.0", "hadoop3.2", "hive2.3")) 

 

def test_checked_versions(self): 

test_version = "3.0.1" # Just pick one version to test. 

 

# Positive test cases 

self.assertEqual( 

("spark-3.0.0", "hadoop2.7", "hive2.3"), 

checked_versions("spark-3.0.0", "hadoop2.7", "hive2.3")) 

 

self.assertEqual( 

("spark-3.0.0", "hadoop2.7", "hive2.3"), 

checked_versions("3.0.0", "2.7", "2.3")) 

 

self.assertEqual( 

("spark-2.4.1", "without-hadoop", "hive2.3"), 

checked_versions("2.4.1", "without", "2.3")) 

 

self.assertEqual( 

("spark-3.0.1", "without-hadoop", "hive2.3"), 

checked_versions("spark-3.0.1", "without-hadoop", "hive2.3")) 

 

# Negative test cases 

69 ↛ 70line 69 didn't jump to line 70, because the loop on line 69 never started for (hadoop_version, hive_version) in UNSUPPORTED_COMBINATIONS: 

with self.assertRaisesRegex(RuntimeError, 'Hive.*should.*Hadoop'): 

checked_versions( 

spark_version=test_version, 

hadoop_version=hadoop_version, 

hive_version=hive_version) 

 

with self.assertRaisesRegex(RuntimeError, "Spark version should start with 'spark-'"): 

checked_versions( 

spark_version="malformed", 

hadoop_version=DEFAULT_HADOOP, 

hive_version=DEFAULT_HIVE) 

 

with self.assertRaisesRegex(RuntimeError, "Spark distribution.*malformed.*"): 

checked_versions( 

spark_version=test_version, 

hadoop_version="malformed", 

hive_version=DEFAULT_HIVE) 

 

with self.assertRaisesRegex(RuntimeError, "Spark distribution.*malformed.*"): 

checked_versions( 

spark_version=test_version, 

hadoop_version=DEFAULT_HADOOP, 

hive_version="malformed") 

 

with self.assertRaisesRegex(RuntimeError, "Spark distribution of hive1.2 is not supported"): 

checked_versions( 

spark_version=test_version, 

hadoop_version="hadoop3.2", 

hive_version="hive1.2") 

 

 

if __name__ == "__main__": 

from pyspark.tests.test_install_spark import * # noqa: F401 

 

try: 

import xmlrunner # type: ignore[import] 

testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) 

except ImportError: 

testRunner = None 

unittest.main(testRunner=testRunner, verbosity=2)