Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

# 

# Licensed to the Apache Software Foundation (ASF) under one or more 

# contributor license agreements. See the NOTICE file distributed with 

# this work for additional information regarding copyright ownership. 

# The ASF licenses this file to You under the Apache License, Version 2.0 

# (the "License"); you may not use this file except in compliance with 

# the License. You may obtain a copy of the License at 

# 

# http://www.apache.org/licenses/LICENSE-2.0 

# 

# Unless required by applicable law or agreed to in writing, software 

# distributed under the License is distributed on an "AS IS" BASIS, 

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

# See the License for the specific language governing permissions and 

# limitations under the License. 

# 

import time 

import unittest 

 

from pyspark import StorageLevel 

from pyspark.streaming.kinesis import KinesisUtils, InitialPositionInStream 

from pyspark.testing.streamingutils import should_test_kinesis, kinesis_requirement_message, \ 

PySparkStreamingTestCase 

 

 

@unittest.skipIf(not should_test_kinesis, kinesis_requirement_message) 

class KinesisStreamTests(PySparkStreamingTestCase): 

 

def test_kinesis_stream_api(self): 

# Don't start the StreamingContext because we cannot test it in Jenkins 

KinesisUtils.createStream( 

self.ssc, "myAppNam", "mySparkStream", 

"https://kinesis.us-west-2.amazonaws.com", "us-west-2", 

InitialPositionInStream.LATEST, 2, StorageLevel.MEMORY_AND_DISK_2) 

KinesisUtils.createStream( 

self.ssc, "myAppNam", "mySparkStream", 

"https://kinesis.us-west-2.amazonaws.com", "us-west-2", 

InitialPositionInStream.LATEST, 2, StorageLevel.MEMORY_AND_DISK_2, 

"awsAccessKey", "awsSecretKey") 

 

def test_kinesis_stream(self): 

import random 

kinesisAppName = ("KinesisStreamTests-%d" % abs(random.randint(0, 10000000))) 

kinesisTestUtils = self.ssc._jvm.org.apache.spark.streaming.kinesis.KinesisTestUtils(2) 

try: 

kinesisTestUtils.createStream() 

aWSCredentials = kinesisTestUtils.getAWSCredentials() 

stream = KinesisUtils.createStream( 

self.ssc, kinesisAppName, kinesisTestUtils.streamName(), 

kinesisTestUtils.endpointUrl(), kinesisTestUtils.regionName(), 

InitialPositionInStream.LATEST, 10, StorageLevel.MEMORY_ONLY, 

aWSCredentials.getAWSAccessKeyId(), aWSCredentials.getAWSSecretKey()) 

 

outputBuffer = [] 

 

def get_output(_, rdd): 

for e in rdd.collect(): 

outputBuffer.append(e) 

 

stream.foreachRDD(get_output) 

self.ssc.start() 

 

testData = [i for i in range(1, 11)] 

expectedOutput = set([str(i) for i in testData]) 

start_time = time.time() 

while time.time() - start_time < 120: 

kinesisTestUtils.pushData(testData) 

if expectedOutput == set(outputBuffer): 

break 

time.sleep(10) 

self.assertEqual(expectedOutput, set(outputBuffer)) 

except: 

import traceback 

traceback.print_exc() 

raise 

finally: 

self.ssc.stop(False) 

kinesisTestUtils.deleteStream() 

kinesisTestUtils.deleteDynamoDBTable(kinesisAppName) 

 

 

if __name__ == "__main__": 

from pyspark.streaming.tests.test_kinesis import * # noqa: F401 

 

try: 

import xmlrunner # type: ignore[import] 

testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2) 

except ImportError: 

testRunner = None 

unittest.main(testRunner=testRunner, verbosity=2)