Coverage for pyspark/sql/catalog.py: 93%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

# Licensed to the Apache Software Foundation (ASF) under one or more

# contributor license agreements. See the NOTICE file distributed with

# this work for additional information regarding copyright ownership.

# The ASF licenses this file to You under the Apache License, Version 2.0

# (the "License"); you may not use this file except in compliance with

# the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

import sys

import warnings

from collections import namedtuple

from pyspark import since

from pyspark.sql.dataframe import DataFrame

from pyspark.sql.types import StructType

Database = namedtuple("Database", "name description locationUri")

Table = namedtuple("Table", "name database description tableType isTemporary")

Column = namedtuple("Column", "name description dataType nullable isPartition isBucket")

Function = namedtuple("Function", "name description className isTemporary")

class Catalog(object):

"""User-facing catalog API, accessible through `SparkSession.catalog`.

This is a thin wrapper around its Scala implementation org.apache.spark.sql.catalog.Catalog.

"""

def __init__(self, sparkSession):

"""Create a new Catalog that wraps the underlying JVM object."""

self._sparkSession = sparkSession

self._jsparkSession = sparkSession._jsparkSession

self._jcatalog = sparkSession._jsparkSession.catalog()

@since(2.0)

def currentDatabase(self):

"""Returns the current default database in this session."""

return self._jcatalog.currentDatabase()

@since(2.0)

def setCurrentDatabase(self, dbName):

"""Sets the current default database in this session."""

return self._jcatalog.setCurrentDatabase(dbName)

@since(2.0)

def listDatabases(self):

"""Returns a list of databases available across all sessions."""

iter = self._jcatalog.listDatabases().toLocalIterator()

databases = []

while iter.hasNext():

jdb = iter.next()

databases.append(Database(

name=jdb.name(),

description=jdb.description(),

locationUri=jdb.locationUri()))

return databases

def databaseExists(self, dbName):

"""Check if the database with the specified name exists.

.. versionadded:: 3.3.0

Parameters

----------

dbName : str

name of the database to check existence

Returns

-------

bool

Indicating whether the database exists

Examples

--------

>>> spark.catalog.databaseExists("test_new_database")

False

>>> df = spark.sql("CREATE DATABASE test_new_database")

>>> spark.catalog.databaseExists("test_new_database")

True

>>> df = spark.sql("DROP DATABASE test_new_database")

"""

return self._jcatalog.databaseExists(dbName)

@since(2.0)

def listTables(self, dbName=None):

"""Returns a list of tables/views in the specified database.

If no database is specified, the current database is used.

This includes all temporary views.

"""

if dbName is None:

dbName = self.currentDatabase()

iter = self._jcatalog.listTables(dbName).toLocalIterator()

tables = []

while iter.hasNext():

jtable = iter.next()

tables.append(Table(

name=jtable.name(),

database=jtable.database(),

description=jtable.description(),

tableType=jtable.tableType(),

isTemporary=jtable.isTemporary()))

return tables

@since(2.0)

def listFunctions(self, dbName=None):

"""Returns a list of functions registered in the specified database.

If no database is specified, the current database is used.

This includes all temporary functions.

"""

if dbName is None:

dbName = self.currentDatabase()

iter = self._jcatalog.listFunctions(dbName).toLocalIterator()

functions = []

while iter.hasNext():

jfunction = iter.next()

functions.append(Function(

name=jfunction.name(),

description=jfunction.description(),

className=jfunction.className(),

isTemporary=jfunction.isTemporary()))

return functions

def functionExists(self, functionName, dbName=None):

"""Check if the function with the specified name exists.

This can either be a temporary function or a function.

.. versionadded:: 3.3.0

Parameters

----------

functionName : str

name of the function to check existence

dbName : str, optional

name of the database to check function existence in.

If no database is specified, the current database is used

Returns

-------

bool

Indicating whether the function exists

Examples

--------

>>> spark.catalog.functionExists("unexisting_function")

False

"""

if dbName is None:

dbName = self.currentDatabase()

return self._jcatalog.functionExists(dbName, functionName)

def listColumns(self, tableName, dbName=None):

"""Returns a list of columns for the given table/view in the specified database.

If no database is specified, the current database is used.

.. versionadded:: 2.0.0

Notes

-----

the order of arguments here is different from that of its JVM counterpart

because Python does not support method overloading.

"""

if dbName is None:

dbName = self.currentDatabase()

iter = self._jcatalog.listColumns(dbName, tableName).toLocalIterator()

columns = []

while iter.hasNext():

jcolumn = iter.next()

columns.append(Column(

name=jcolumn.name(),

description=jcolumn.description(),

dataType=jcolumn.dataType(),

nullable=jcolumn.nullable(),

isPartition=jcolumn.isPartition(),

isBucket=jcolumn.isBucket()))

return columns

def tableExists(self, tableName, dbName=None):

"""Check if the table or view with the specified name exists.

This can either be a temporary view or a table/view.

.. versionadded:: 3.3.0

Parameters

----------

tableName : str

name of the table to check existence

dbName : str, optional

name of the database to check table existence in.

If no database is specified, the current database is used

Returns

-------

bool

Indicating whether the table/view exists

Examples

--------

This function can check if a table is defined or not:

>>> spark.catalog.tableExists("unexisting_table")

False

>>> df = spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")

>>> spark.catalog.tableExists("tab1")

True

>>> df = spark.sql("DROP TABLE tab1")

>>> spark.catalog.tableExists("unexisting_table")

False

It also works for views: