Create ADF resources

No category
Rating & reviews (0 reviews)
PowerShell

# Connect to Azure account
Connect-AzAccount

# List all subscriptions
Get-AzSubscription

# Select Subscription
Select-AzSubscription -SubscriptionId "<SUBSCRIPTION_ID>"

# Create Resource group in East US
$resourceGroupName = "<RESOURCE_GROUP_NAME_UNIQUE>";
$ResGrp = New-AzResourceGroup $resourceGroupName -location 'East US'

#Create ADF resource
$dataFactoryName = "<DATA_FACTORY_RESOURCE_NAME_UNIQUE>";
PS C:UserscorifDocumentsMyDataLearnADF> $DataFactory = Set-AzDataFactoryV2 -ResourceGroupName $ResGrp.ResourceGroupName -Location $ResGrp.Location -Name $dataFactoryName

# Create a Folder for files (json) RESOURCE_FOLDER
# Create AzureStorageLinkedService.json file in RESOURCE_FOLDER folder
# Switch to that folder
Set-Location 'PATH_TO_RESOURCE_FOLDER'

# Create linked service
Set-AzDataFactoryV2LinkedService -DataFactoryName $DataFactory.DataFactoryName `
-ResourceGroupName $ResGrp.ResourceGroupName -Name "AzureStorageLinkedService" `
-DefinitionFile ".AzureStorageLinkedService.json"

# Output must be:
# LinkedServiceName : AzureStorageLinkedService
# ResourceGroupName : RESOURCE_GROUP_NAME_UNIQUE
# DataFactoryName : DATA_FACTORY_RESOURCE_NAME_UNIQUE
# Properties : Microsoft.Azure.Management.DataFactory.Models.AzureBlobStorageLinkedService

# Create datasets
# Create a JSON file named InputDataset.json in the RESOURCE_FOLDER

# Run
Set-AzDataFactoryV2Dataset -DataFactoryName $DataFactory.DataFactoryName `
-ResourceGroupName $ResGrp.ResourceGroupName -Name "InputDataset" `
-DefinitionFile ".InputDataset.json"

# Output must be:
# DatasetName : InputDataset
# ResourceGroupName : RESOURCE_GROUP_NAME_UNIQUE
# DataFactoryName : DATA_FACTORY_RESOURCE_NAME_UNIQUE
# Structure :
# Properties : Microsoft.Azure.Management.DataFactory.Models.BinaryDataset

# Create a JSON file named OutputDataset.json in the RESOURCE_FOLDER

# Run
Set-AzDataFactoryV2Dataset -DataFactoryName $DataFactory.DataFactoryName `
-ResourceGroupName $ResGrp.ResourceGroupName -Name "OutputDataset" `
-DefinitionFile ".OutputDataset.json"

# Output must be:
# DatasetName : OutputDataset
# ResourceGroupName : RESOURCE_GROUP_NAME_UNIQUE
# DataFactoryName : DATA_FACTORY_RESOURCE_NAME_UNIQUE
# Structure :
# Properties : Microsoft.Azure.Management.DataFactory.Models.BinaryDataset

#Make sure you have created folder structure and files in Azure Storage (Dat Lake Gen2) !!!

# Create a pipeline
# Create a JSON file named myPipeline.json in the RESOURCE_FOLDER

# Run
$DFPipeLine = Set-AzDataFactoryV2Pipeline `
-DataFactoryName $DataFactory.DataFactoryName `
-ResourceGroupName $ResGrp.ResourceGroupName `
-Name "myPipeline" `
-DefinitionFile ".myPipeline"

# Nothing output

# Create a pipeline run
# Run
$RunId = Invoke-AzDataFactoryV2Pipeline `
-DataFactoryName $DataFactory.DataFactoryName `
-ResourceGroupName $ResGrp.ResourceGroupName `
-PipelineName $DFPipeLine.Name

# Nothing output

# Monitor the pipeline run

# Run
while ($True) {
$Run = Get-AzDataFactoryV2PipelineRun `
-ResourceGroupName $ResGrp.ResourceGroupName `
-DataFactoryName $DataFactory.DataFactoryName `
-PipelineRunId $RunId

if ($Run) {
if ( ($Run.Status -ne "InProgress") -and ($Run.Status -ne "Queued") ) {
Write-Output ("Pipeline run finished. The status is: " + $Run.Status)
$Run
break
}
Write-Output ("Pipeline is running...status: " + $Run.Status)
}

Start-Sleep -Seconds 10
}

# Run the following script to retrieve copy activity run details, for example, size of the data read/written

Write-Output "Activity run details:"
$Result = Get-AzDataFactoryV2ActivityRun -DataFactoryName $DataFactory.DataFactoryName -ResourceGroupName $ResGrp.ResourceGroupName -PipelineRunId $RunId -RunStartedAfter (Get-Date).AddMinutes(-30) -RunStartedBefore (Get-Date).AddMinutes(30)
$Result

Write-Output "Activity 'Output' section:"
$Result.Output -join "`r`n"

Write-Output "Activity 'Error' section:"
$Result.Error -join "`r`n"





ADF JSON files

AzureStorageLinkedService.json

{
"name": "AzureStorageLinkedService",
"properties": {
"annotations": [],
"type": "AzureBlobStorage",
"typeProperties": {
"connectionString": "DefaultEndpointsProtocol=https;AccountName=<NAME>;AccountKey=<KEY>;EndpointSuffix=core.windows.net"
}
}
}

InputDataset.json

{
"name": "InputDataset",
"properties": {
"linkedServiceName": {
"referenceName": "AzureStorageLinkedService",
"type": "LinkedServiceReference"
},
"annotations": [],
"type": "Binary",
"typeProperties": {
"location": {
"type": "AzureBlobStorageLocation",
"fileName": "emp.txt",
"folderPath": "input",
"container": "<CONTAINER_NAME>"
}
}
}
}

OutputDataset.json

{
"name": "OutputDataset",
"properties": {
"linkedServiceName": {
"referenceName": "AzureStorageLinkedService",
"type": "LinkedServiceReference"
},
"annotations": [],
"type": "Binary",
"typeProperties": {
"location": {
"type": "AzureBlobStorageLocation",
"folderPath": "output",
"container": "<CONTAINER_NAME>"
}
}
}
}

mytPipeline.json

{
"name": "<PIPE_LINE_NAME>",
"properties": {
"activities": [
{
"name": "CopyFromBlobToBlob",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "7.00:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "BinarySource",
"storeSettings": {
"type": "AzureBlobStorageReadSettings",
"recursive": true
}
},
"sink": {
"type": "BinarySink",
"storeSettings": {
"type": "AzureBlobStorageWriteSettings"
}
},
"enableStaging": false
},
"inputs": [
{
"referenceName": "InputDataset",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "OutputDataset",
"type": "DatasetReference"
}
]
}
],
"annotations": []
}
}