Sunday, February 12, 2023

How to read Parquet file in DataBricks

# Databricks notebook source

df_GLDATAXYZA = spark.read.option("header",True).parquet("/mnt/ABCprdadls/2_RAW/mload/XYZA_GL/GLDATA",inferSchema="True")

df_GLDATAXYZA.createOrReplaceTempView("GLDATA")


df_GLDATAXYZA = spark.read.option("header",True).parquet("/mnt/ABCprdadls/2_RAW/mload/XYZA_GL/GL_Details",inferSchema="True")

df_GLDATAXYZA.createOrReplaceTempView("GL_Details")


df_GLDATAXYZA = spark.read.option("header",True).parquet("/mnt/ABCprdadls/2_RAW/mload/XYZA_GL/GLDetails_2023",inferSchema="True")

df_GLDATAXYZA.createOrReplaceTempView("GLDetails_2023")


df_GLDATAXYZA = spark.read.option("header",True).parquet("/mnt/ABCprdadls/2_RAW/mload/XYZA_GL/tblBU_list_MF40_Desc",inferSchema="True")

df_GLDATAXYZA.createOrReplaceTempView("tblBU_list_MF40_Desc")


df_GLDATAXYZA = spark.read.option("header",True).parquet("/mnt/ABCprdadls/2_RAW/mload/XYZA_GL/BudgetData",inferSchema="True")

df_GLDATAXYZA.createOrReplaceTempView("Budgetdata")


df_GLDATAXYZA = spark.read.option("header",True).parquet("/mnt/ABCprdadls/2_RAW/mload/XYZA_GL/Tbl_PurePeriod",inferSchema="True")

df_GLDATAXYZA.createOrReplaceTempView("Tbl_PurePeriod")




spark.catalog.setCurrentDatabase("test")



# COMMAND ----------


# MAGIC %sql

# MAGIC CREATE TEMPORARY VIEW VWtempGLDetails

# MAGIC     AS

# MAGIC select * from GLDetails_2023

# MAGIC union 

# MAGIC Select *  from GL_Details  --where glfy=22 and glpn=12


# COMMAND ----------


# MAGIC %sql

# MAGIC select * from VWtempGLDetails where glfy=23 and glpn=1 and gllt in('AA','AL','AN') and glco=2

# MAGIC limit 10


# COMMAND ----------


# MAGIC %sql

# MAGIC SELECT * FROM GLDATA WHERE fy=23 AND lt IN('AA','AL','AN') AND co=2 


# COMMAND ----------


# MAGIC %sql

# MAGIC select * from Tbl_PurePeriod


No comments: