How to load data (CSV file) in databricks delta table using Excel, VBA and DSN
1. Create DSN using Simba driver on your machine
2. Get token and password from databricks
3. CSV column header Name must be same as table's columns header.
Sub UploadDataToSQLServer_Dynamic()
' Define variables
Dim sFile As String
Dim Cn As ADODB.Connection
Dim UploadQuery As String
Dim fso As Object
Dim ts As Object
Dim line As String
Dim DataArray() As String
Dim i As Long
Dim batchCounter As Long
Dim batchSize As Long
Dim batchQuery As String
Dim headerArray() As String
Dim numColumns As Integer
Dim columnList As String
Dim StartTime As Double
StartTime = Timer
' Get the CSV file to upload
sFile = Application.GetOpenFilename("CSV Files (*.csv), *.csv", , "Select CSV File to Upload")
If sFile = "False" Then Exit Sub ' User canceled the file selection
' Open the CSV file
Set fso = CreateObject("Scripting.FileSystemObject")
Set ts = fso.OpenTextFile(sFile, 1) ' 1 for reading
' Establish connection to SQL Server (replace details with your own)
dsnName = "YourDSNName"
Set Cn = New ADODB.Connection
Cn.Open "DSN=" & dsnName & ";Uid=YorrTokenFromDataBricks;Pwd=YourPassWord from Databricks ;"
' Initialize variables
batchCounter = 0
batchSize = 80
TableName = "DatabaseName.TableName"
UploadQuery = "INSERT INTO " & TableName & "("
' Read the header row to determine the number of columns
If Not ts.AtEndOfStream Then
line = ts.ReadLine
headerArray = Split(line, ",")
numColumns = UBound(headerArray) + 1
columnList = Join(headerArray, ", ")
UploadQuery = UploadQuery & columnList & ") "
End If
' Initialize the batch query
batchQuery = ""
' Read and process the CSV file
i = 0
Do Until ts.AtEndOfStream
line = ts.ReadLine
DataArray = Split(line, ",")
' Build the SELECT part dynamically based on data types (modify as needed)
If UBound(DataArray) = UBound(headerArray) Then
batchQuery = batchQuery & "SELECT "
For j = 0 To numColumns - 1
batchQuery = batchQuery & "'" & Replace(DataArray(j), "'", "''") & "'"
If j < numColumns - 1 Then
batchQuery = batchQuery & ", "
End If
Next j
batchQuery = batchQuery & " UNION ALL " & Chr(10)
batchCounter = batchCounter + 1
' Check if batch size is reached
If batchCounter >= batchSize Then
' Remove the last 'UNION ALL' and execute the batch query
If Len(batchQuery) > 0 Then
batchQuery = Left(batchQuery, Len(batchQuery) - Len(" UNION ALL " & Chr(10)))
' Debug.Print UploadQuery & Chr(10) & batchQuery
Cn.Execute UploadQuery & Chr(10) & batchQuery
End If
' Reset the batch variables
batchQuery = ""
batchCounter = 0
End If
End If
i = i + 1
Loop
' Execute any remaining batch query
If batchCounter > 0 And Len(batchQuery) > 0 Then
batchQuery = Left(batchQuery, Len(batchQuery) - Len(" UNION ALL " & Chr(10)))
Debug.Print UploadQuery & Chr(10) & batchQuery
Cn.Execute UploadQuery & Chr(10) & batchQuery
End If
' Close the text stream
ts.Close
' Close the connection
Cn.Close
' Inform user of successful upload
MsgBox Format((Timer - StartTime) / 86400, "hh:mm:ss") & " Data upload to Databricks SQL completed!", vbInformation
' Clean up
Set Cn = Nothing
Set fso = Nothing
Set ts = Nothing
End Sub
No comments:
Post a Comment