Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #
""" launch jvm gateway
Parameters ---------- conf : :py:class:`pyspark.SparkConf` spark configuration passed to spark-submit popen_kwargs : dict Dictionary of kwargs to pass to Popen when spawning the py4j JVM. This is a developer feature intended for use in customizing how pyspark interacts with the py4j JVM (e.g., capturing stdout/stderr).
Returns ------- ClientServer or JavaGateway """ gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"]) gateway_secret = os.environ["PYSPARK_GATEWAY_SECRET"] # Process already exists proc = None else: # Launch the Py4j gateway using Spark's run command so that we pick up the # proper classpath and settings from spark-env.sh "--conf spark.ui.enabled=false", submit_args ])
# Create a temporary directory where the gateway server should write the connection # information.
# Launch the Java gateway. # We open a pipe to stdin so that the Java gateway can die when the pipe is broken # We always set the necessary environment variables. # Don't send ctrl-c / SIGINT to the Java gateway: signal.signal(signal.SIGINT, signal.SIG_IGN) else: # preexec_fn not supported on Windows proc = Popen(command, **popen_kwargs)
# Wait for the file to appear, or for the process to exit, whichever happens first.
raise RuntimeError("Java gateway process exited before sending its port number")
finally:
# In Windows, ensure the Java child processes do not linger after Python has exited. # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when # the parent process' stdin sends an EOF). In Windows, however, this is not possible # because java.lang.Process reads directly from the parent process' stdin, contending # with any opportunity to read an EOF from the parent. Note that this is only best # effort and will not take effect if the python process is violently terminated. # In Windows, the child process here is "spark-submit.cmd", not the JVM itself # (because the UNIX "exec" command is not available). This means we cannot simply # call proc.kill(), which kills only the "spark-submit.cmd" process but not the # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx) def killChild(): Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)]) atexit.register(killChild)
# Connect to the gateway (or client server to pin the thread between JVM and Python) java_parameters=JavaParameters( port=gateway_port, auth_token=gateway_secret, auto_convert=True), python_parameters=PythonParameters( port=0, eager_load=False)) else: gateway = JavaGateway( gateway_parameters=GatewayParameters( port=gateway_port, auth_token=gateway_secret, auto_convert=True))
# Store a reference to the Popen object for use by the caller (e.g., in reading stdout/stderr)
# Import the classes used by PySpark # TODO(davies): move into sql
""" Performs the authentication protocol defined by the SocketAuthHelper class on the given file-like object 'conn'. """ conn.close() raise RuntimeError("Unexpected reply from iterator server.")
""" Connect to local host, authenticate with it, and return a (sockfile,sock) for that connection. Handles IPV4 & IPV6, does some error handling.
Parameters ---------- port : str or int or None auth_secret : str
Returns ------- tuple with (sockfile, sock) """ # Support for both IPv4 and IPv6. # On most of IPv6-ready systems, IPv6 will take precedence. except socket.error as e: emsg = str(e) errors.append("tried to connect to %s, but an error occurred: %s" % (sa, emsg)) sock.close() sock = None raise RuntimeError("could not open socket: %s" % errors)
""" Start callback server if not already started. The callback server is needed if the Java driver process needs to callback into the Python driver process to execute Python code. """
# getattr will fallback to JVM, so we cannot test by hasattr() # gateway with real port # get the GatewayServer object in JVM by ID # update the port of CallbackClient with real port |