Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

# 

# Licensed to the Apache Software Foundation (ASF) under one or more 

# contributor license agreements. See the NOTICE file distributed with 

# this work for additional information regarding copyright ownership. 

# The ASF licenses this file to You under the Apache License, Version 2.0 

# (the "License"); you may not use this file except in compliance with 

# the License. You may obtain a copy of the License at 

# 

# http://www.apache.org/licenses/LICENSE-2.0 

# 

# Unless required by applicable law or agreed to in writing, software 

# distributed under the License is distributed on an "AS IS" BASIS, 

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

# See the License for the specific language governing permissions and 

# limitations under the License. 

# 

 

from collections import namedtuple 

import os 

import traceback 

 

 

CallSite = namedtuple("CallSite", "function file linenum") 

 

 

def first_spark_call(): 

""" 

Return a CallSite representing the first Spark call in the current call stack. 

""" 

tb = traceback.extract_stack() 

31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true if len(tb) == 0: 

return None 

file, line, module, what = tb[len(tb) - 1] 

sparkpath = os.path.dirname(file) 

first_spark_frame = len(tb) - 1 

36 ↛ 41line 36 didn't jump to line 41, because the loop on line 36 didn't complete for i in range(0, len(tb)): 

file, line, fun, what = tb[i] 

if file.startswith(sparkpath): 

first_spark_frame = i 

break 

41 ↛ 42line 41 didn't jump to line 42, because the condition on line 41 was never true if first_spark_frame == 0: 

file, line, fun, what = tb[0] 

return CallSite(function=fun, file=file, linenum=line) 

sfile, sline, sfun, swhat = tb[first_spark_frame] 

ufile, uline, ufun, uwhat = tb[first_spark_frame - 1] 

return CallSite(function=sfun, file=ufile, linenum=uline) 

 

 

class SCCallSiteSync(object): 

""" 

Helper for setting the spark context call site. 

 

Example usage: 

from pyspark.context import SCCallSiteSync 

with SCCallSiteSync(<relevant SparkContext>) as css: 

<a Spark call> 

""" 

 

_spark_stack_depth = 0 

 

def __init__(self, sc): 

call_site = first_spark_call() 

63 ↛ 67line 63 didn't jump to line 67, because the condition on line 63 was never false if call_site is not None: 

self._call_site = "%s at %s:%s" % ( 

call_site.function, call_site.file, call_site.linenum) 

else: 

self._call_site = "Error! Could not extract traceback info" 

self._context = sc 

 

def __enter__(self): 

if SCCallSiteSync._spark_stack_depth == 0: 

self._context._jsc.setCallSite(self._call_site) 

SCCallSiteSync._spark_stack_depth += 1 

 

def __exit__(self, type, value, tb): 

SCCallSiteSync._spark_stack_depth -= 1 

if SCCallSiteSync._spark_stack_depth == 0: 

self._context._jsc.setCallSite(None)