Add synapse 201

This commit is contained in:
murggu 2022-11-08 13:47:23 +01:00
parent ebf66eab0b
commit d65c97c307
17 changed files with 603 additions and 16 deletions

View File

@ -1,10 +1,4 @@
locals {
tags = {
Toolkit = "Terraform"
}
safe_name = replace(var.name, "-", "")
safe_environment = replace(var.environment, "-", "")
basename = "${var.name}-${var.environment}"
safe_basename = replace(local.basename, "-", "")
}

View File

@ -19,6 +19,4 @@ data "http" "ip" {
resource "azurerm_resource_group" "default" {
name = "rg-${local.basename}"
location = var.location
tags = local.tags
}

View File

@ -1,13 +1,11 @@
resource "azurerm_storage_account" "default" {
name = "st${local.safe_name}${local.safe_environment}"
name = "st${local.safe_basename}"
resource_group_name = azurerm_resource_group.default.name
location = azurerm_resource_group.default.location
account_tier = "Standard"
account_replication_type = "LRS"
account_kind = "StorageV2"
is_hns_enabled = true
tags = local.tags
}
resource "azurerm_role_assignment" "sbdc_current_user" {

View File

@ -9,8 +9,6 @@ resource "azurerm_synapse_workspace" "default" {
managed_resource_group_name = "${azurerm_resource_group.default.name}-syn-managed"
public_network_access_enabled = true
aad_admin {
login = var.aad_login.name
object_id = var.aad_login.object_id
@ -20,8 +18,6 @@ resource "azurerm_synapse_workspace" "default" {
identity {
type = "SystemAssigned"
}
tags = local.tags
}
resource "azurerm_synapse_firewall_rule" "allow_my_ip" {

View File

@ -0,0 +1,11 @@
# Terraform specific
.terraform
.terraform.lock.hcl
terraform.tfstate
terraform.tfstate.backup
.terraform.tfstate.lock.info
terraform.tfvars
**.tfbackend
state/

View File

@ -0,0 +1,64 @@
# Azure Synapse Analytics workspace (moderately secure network set up)
This deployment configuration specifies an [Azure Synapse Analytics workspace](https://learn.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace),
and its associated resources including Azure Data Lake Storage (gen2), Synapse Spark Pool and Synapse SQL Pool.
In addition to these core services, this configuration specifies any networking components that are required to set up Azure Synapse Analytics
for private network connectivity using [Azure Private Link](https://docs.microsoft.com/en-us/azure/private-link/).
This configuration describes the minimal set of resources you require to get started with Azure Synapse Analytics in a network-isolated set-up. This configuration creates new network components. Use Azure Bastion to securely connect to the Virtual Machine.
## Resources
| Terraform Resource Type | Description |
| - | - |
| `azurerm_resource_group` | The resource group all resources get deployed into |
| `azurerm_bastion_host` | An Azure Bastion Instance to securely RDP/SSH into Virtual Machines deployed into the Virtual Network |
| `azurerm_windows_virtual_machine` | A Windows Data Science Virtual Machine used for connecting to the Azure Machine Learning workspace |
| `azurerm_application_insights` | An Azure Application Insights instance associated to the Azure Machine Learning workspace |
| `azurerm_key_vault` | An Azure Key Vault instance associated to the Azure Machine Learning workspace |
| `azurerm_storage_account` | An Azure Storage instance associated to the Azure Machine Learning workspace |
| `azurerm_container_registry` | An Azure Container Registry instance associated to the Azure Machine Learning workspace |
| `azurerm_machine_learning_workspace` | An Azure Machine Learning workspace instance |
| `azurerm_virtual_network` | An Azure Machine Learning workspace instance |
| `azurerm_subnet` | An Azure Machine Learning workspace instance |
| `azurerm_private_dns_zone` | Private DNS Zones for FQDNs required for Azure Machine Learning and associated resources |
| `azurerm_private_dns_zone_virtual_network_link` | Virtual network links of the Private DNS Zones to the virtual network resource |
| `azurerm_private_endpoint` | Private Endpoints for the Azure Machine Learning workspace and associated resources |
| `azurerm_machine_learning_compute_instance` | An Azure Machine Learning compute instance a single-node managed compute. |
| `azurerm_machine_learning_compute_cluster` | An Azure Machine Learning compute cluster as multi-node shared and managed compute. |
| `azurerm_network_security_group` | Network security group with required inbound and outbound rules for Azure Machine Learning. |
## Variables
| Name | Description | Default |
|-|-|-|
| name | Name of the deployment | - |
| environment | The deployment environment name (used for pre- and postfixing resource names) | dev |
| location | The Azure region used for deployments | East US |
| vnet_address_space | Address space of the virtual network | ["10.0.0.0/16"] |
| training_subnet_address_space | Address space of the training subnet | ["10.0.1.0/24"] |
| aks_subnet_address_space | Address space of the aks subnet | ["10.0.2.0/23"] |
| ml_subnet_address_space | Address space of the ML workspace subnet | ["10.0.0.0/24"] |
| image_build_compute_name | Name of the compute cluster to be created and configured for building docker images (Azure ML Environments) | image-builder |
| dsvm_name | Name of the Windows Data Science VM resource | vmdsvm01 |
| dsvm_admin_username | Admin username of the Windows Data Science VM | azureadmin |
| dsvm_host_password | Password for the admin username of the Data Science VM | - |
## Usage
1. Copy `terraform.tfvars.example` to `terraform.tfvars`
2. Update `terraform.tfvars` with your desired values
3. Run Terraform
```console
$ terraform init
$ terraform plan
$ terraform apply
```
## Learn more
- If you are new to Azure Synapse Analytics, see [Azure Synapse Analytics service](https://azure.microsoft.com/services/synapse-analytics/) and [Azure Synapse Analytics documentation](https://learn.microsoft.com/azure/synapse-analytics/overview-what-is).
- To learn more about security configurations in Azure Synapse Analytics, see [Azure Synapse Analytics security white paper](https://learn.microsoft.com/azure/synapse-analytics/guidance/security-white-paper-introduction).
- For all configurations of Azure Synapse Analytics in Terraform, see [Terraform Hashicorp AzureRM provider documentation](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/synapse_workspace).

View File

@ -0,0 +1,19 @@
resource "azurerm_bastion_host" "default" {
name = "bas-${local.basename}"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
ip_configuration {
name = "configuration"
subnet_id = azurerm_subnet.bastion.id
public_ip_address_id = azurerm_public_ip.default.id
}
}
resource "azurerm_public_ip" "default" {
name = "pip-${local.basename}"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
allocation_method = "Static"
sku = "Standard"
}

View File

@ -0,0 +1,87 @@
resource "azurerm_virtual_machine" "jumphost" {
name = "wvm-${local.basename}"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
network_interface_ids = [azurerm_network_interface.jumphost_nic.id]
vm_size = "Standard_DS3_v2"
delete_os_disk_on_termination = true
delete_data_disks_on_termination = true
storage_image_reference {
publisher = "microsoft-dsvm"
offer = "dsvm-win-2019"
sku = "server-2019"
version = "latest"
}
os_profile {
computer_name = "jumphost"
admin_username = var.jumphost_username
admin_password = var.jumphost_password
}
os_profile_windows_config {
provision_vm_agent = true
enable_automatic_upgrades = true
}
identity {
type = "SystemAssigned"
}
storage_os_disk {
name = "disk-${local.basename}"
caching = "ReadWrite"
create_option = "FromImage"
managed_disk_type = "StandardSSD_LRS"
}
}
resource "azurerm_network_interface" "jumphost_nic" {
name = "nic-${local.basename}"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
ip_configuration {
name = "configuration"
private_ip_address_allocation = "Dynamic"
subnet_id = azurerm_subnet.default.id
}
}
resource "azurerm_network_security_group" "jumphost_nsg" {
name = "nsg-${local.basename}"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
security_rule {
name = "RDP"
priority = 1010
direction = "Inbound"
access = "Allow"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = 3389
source_address_prefix = "*"
destination_address_prefix = "*"
}
}
resource "azurerm_network_interface_security_group_association" "syn_jumphost_nsg_association" {
network_interface_id = azurerm_network_interface.jumphost_nic.id
network_security_group_id = azurerm_network_security_group.jumphost_nsg.id
}
resource "azurerm_dev_test_global_vm_shutdown_schedule" "syn_jumphost_schedule" {
virtual_machine_id = azurerm_virtual_machine.jumphost.id
location = azurerm_resource_group.default.location
enabled = true
daily_recurrence_time = "2000"
timezone = "W. Europe Standard Time"
notification_settings {
enabled = false
}
}

View File

@ -0,0 +1,4 @@
locals {
basename = "${var.name}-${var.environment}"
safe_basename = replace(local.basename, "-", "")
}

View File

@ -0,0 +1,22 @@
terraform {
required_providers {
azurerm = {
version = "= 3.30.0"
}
}
}
provider "azurerm" {
features {}
}
data "azurerm_client_config" "current" {}
data "http" "ip" {
url = "https://ifconfig.me"
}
resource "azurerm_resource_group" "default" {
name = "rg-${local.basename}"
location = var.location
}

View File

@ -0,0 +1,24 @@
resource "azurerm_virtual_network" "default" {
name = "vnet-${local.basename}"
address_space = ["10.0.0.0/16"]
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
}
# Subnets
resource "azurerm_subnet" "default" {
name = "snet-${local.basename}"
resource_group_name = azurerm_resource_group.default.name
virtual_network_name = azurerm_virtual_network.default.name
address_prefixes = ["10.0.1.0/24"]
service_endpoints = []
enforce_private_link_endpoint_network_policies = true
}
resource "azurerm_subnet" "bastion" {
name = "AzureBastionSubnet"
resource_group_name = azurerm_resource_group.default.name
virtual_network_name = azurerm_virtual_network.default.name
address_prefixes = ["10.0.10.0/27"]
}

View File

@ -0,0 +1,115 @@
resource "azurerm_storage_account" "default" {
name = "st${local.safe_basename}"
resource_group_name = azurerm_resource_group.default.name
location = azurerm_resource_group.default.location
account_tier = "Standard"
account_replication_type = "LRS"
account_kind = "StorageV2"
is_hns_enabled = true
}
resource "azurerm_role_assignment" "sbdc_current_user" {
scope = azurerm_storage_account.default.id
role_definition_name = "Storage Blob Data Contributor"
principal_id = data.azurerm_client_config.current.object_id
}
resource "azurerm_role_assignment" "sbdc_syn_ws" {
scope = azurerm_storage_account.default.id
role_definition_name = "Storage Blob Data Contributor"
principal_id = azurerm_synapse_workspace.default.identity[0].principal_id
}
resource "azurerm_role_assignment" "c_syn_ws" {
scope = azurerm_storage_account.default.id
role_definition_name = "Contributor"
principal_id = azurerm_synapse_workspace.default.identity[0].principal_id
}
resource "azurerm_storage_data_lake_gen2_filesystem" "default" {
name = "default"
storage_account_id = azurerm_storage_account.default.id
depends_on = [
azurerm_role_assignment.sbdc_current_user
]
}
# Virtual Network & Firewall configuration
resource "azurerm_storage_account_network_rules" "firewall_rules" {
storage_account_id = azurerm_storage_account.default.id
default_action = "Deny"
ip_rules = [data.http.ip.body]
virtual_network_subnet_ids = []
bypass = ["None"]
}
# DNS Zones
resource "azurerm_private_dns_zone" "zone_blob" {
name = "privatelink.blob.core.windows.net"
resource_group_name = azurerm_resource_group.default.name
}
resource "azurerm_private_dns_zone" "zone_dfs" {
name = "privatelink.dfs.core.windows.net"
resource_group_name = azurerm_resource_group.default.name
}
# Linking of DNS zones to Virtual Network
resource "azurerm_private_dns_zone_virtual_network_link" "zone_blob_link" {
name = "${local.basename}_link_blob"
resource_group_name = azurerm_resource_group.default.name
private_dns_zone_name = azurerm_private_dns_zone.zone_blob.name
virtual_network_id = azurerm_virtual_network.default.id
}
resource "azurerm_private_dns_zone_virtual_network_link" "zone_dfs_link" {
name = "${local.basename}_link_dfs"
resource_group_name = azurerm_resource_group.default.name
private_dns_zone_name = azurerm_private_dns_zone.zone_dfs.name
virtual_network_id = azurerm_virtual_network.default.id
}
# Private Endpoint configuration
resource "azurerm_private_endpoint" "pe_blob" {
name = "pe-${azurerm_storage_account.default.name}-blob"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
subnet_id = azurerm_subnet.default.id
private_service_connection {
name = "psc-blob-${local.basename}"
private_connection_resource_id = azurerm_storage_account.default.id
subresource_names = ["blob"]
is_manual_connection = false
}
private_dns_zone_group {
name = "private-dns-zone-group-blob"
private_dns_zone_ids = [azurerm_private_dns_zone.zone_blob.id]
}
}
resource "azurerm_private_endpoint" "pe_dfs" {
name = "pe-${azurerm_storage_account.default.name}-dfs"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
subnet_id = azurerm_subnet.default.id
private_service_connection {
name = "psc-dfs-${local.basename}"
private_connection_resource_id = azurerm_storage_account.default.id
subresource_names = ["dfs"]
is_manual_connection = false
}
private_dns_zone_group {
name = "private-dns-zone-group-dfs"
private_dns_zone_ids = [azurerm_private_dns_zone.zone_dfs.id]
}
}

View File

@ -0,0 +1,28 @@
# Sql Pool
resource "azurerm_synapse_sql_pool" "syn_pool_sql" {
name = "syndp01"
synapse_workspace_id = azurerm_synapse_workspace.default.id
sku_name = "DW100c"
create_mode = "Default"
count = var.enable_syn_sqlpool ? 1 : 0
}
# Spark Pool
resource "azurerm_synapse_spark_pool" "syn_pool_spark" {
name = "synsp01"
synapse_workspace_id = azurerm_synapse_workspace.default.id
node_size_family = "MemoryOptimized"
node_size = "Small"
count = var.enable_syn_sparkpool ? 1 : 0
auto_scale {
max_node_count = 50
min_node_count = 3
}
auto_pause {
delay_in_minutes = 15
}
}

View File

@ -0,0 +1,33 @@
resource "azurerm_synapse_private_link_hub" "default" {
name = "synplh${local.safe_basename}"
resource_group_name = azurerm_resource_group.default.name
location = azurerm_resource_group.default.location
}
# DNS Zones
resource "azurerm_private_dns_zone" "zone_web" {
name = "privatelink.azuresynapse.net"
resource_group_name = azurerm_resource_group.default.name
}
# Private Endpoint configuration
resource "azurerm_private_endpoint" "pe_web" {
name = "pe-${azurerm_synapse_private_link_hub.default.name}-web"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
subnet_id = azurerm_subnet.default.id
private_service_connection {
name = "psc-web-${local.basename}"
private_connection_resource_id = azurerm_synapse_private_link_hub.default.id
subresource_names = ["web"]
is_manual_connection = false
}
private_dns_zone_group {
name = "private-dns-zone-group-syn-web"
private_dns_zone_ids = [azurerm_private_dns_zone.zone_web.id]
}
}

View File

@ -0,0 +1,116 @@
resource "azurerm_synapse_workspace" "default" {
name = "syn-${local.basename}"
resource_group_name = azurerm_resource_group.default.name
location = azurerm_resource_group.default.location
storage_data_lake_gen2_filesystem_id = azurerm_storage_data_lake_gen2_filesystem.default.id
sql_administrator_login = var.synadmin_username
sql_administrator_login_password = var.synadmin_password
managed_virtual_network_enabled = true
managed_resource_group_name = "${azurerm_resource_group.default.name}-syn-managed"
aad_admin {
login = var.aad_login.name
object_id = var.aad_login.object_id
tenant_id = var.aad_login.tenant_id
}
identity {
type = "SystemAssigned"
}
}
resource "azurerm_synapse_firewall_rule" "allow_my_ip" {
name = "AllowMyPublicIp"
synapse_workspace_id = azurerm_synapse_workspace.default.id
start_ip_address = data.http.ip.body
end_ip_address = data.http.ip.body
}
# DNS Zones
resource "azurerm_private_dns_zone" "zone_dev" {
name = "privatelink.dev.azuresynapse.net"
resource_group_name = azurerm_resource_group.default.name
}
resource "azurerm_private_dns_zone" "zone_sql" {
name = "privatelink.sql.azuresynapse.net"
resource_group_name = azurerm_resource_group.default.name
}
# Linking of DNS zones to Virtual Network
resource "azurerm_private_dns_zone_virtual_network_link" "zone_dev_link" {
name = "${local.basename}_link_dev"
resource_group_name = azurerm_resource_group.default.name
private_dns_zone_name = azurerm_private_dns_zone.zone_dev.name
virtual_network_id = azurerm_virtual_network.default.id
}
resource "azurerm_private_dns_zone_virtual_network_link" "zone_sql_link" {
name = "${local.basename}_link_sql"
resource_group_name = azurerm_resource_group.default.name
private_dns_zone_name = azurerm_private_dns_zone.zone_sql.name
virtual_network_id = azurerm_virtual_network.default.id
}
# Private Endpoint configuration
resource "azurerm_private_endpoint" "pe_dev" {
name = "pe-${azurerm_synapse_workspace.default.name}-dev"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
subnet_id = azurerm_subnet.default.id
private_service_connection {
name = "psc-dev-${local.basename}"
private_connection_resource_id = azurerm_synapse_workspace.default.id
subresource_names = ["dev"]
is_manual_connection = false
}
private_dns_zone_group {
name = "private-dns-zone-group-dev"
private_dns_zone_ids = [azurerm_private_dns_zone.zone_dev.id]
}
}
resource "azurerm_private_endpoint" "pe_sql" {
name = "pe-${azurerm_synapse_workspace.default.name}-sql"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
subnet_id = azurerm_subnet.default.id
private_service_connection {
name = "psc-sql-${local.basename}"
private_connection_resource_id = azurerm_synapse_workspace.default.id
subresource_names = ["sql"]
is_manual_connection = false
}
private_dns_zone_group {
name = "private-dns-zone-group-sql"
private_dns_zone_ids = [azurerm_private_dns_zone.zone_sql.id]
}
}
resource "azurerm_private_endpoint" "pe_sqlondemand" {
name = "pe-${azurerm_synapse_workspace.default.name}-sqlondemand"
location = azurerm_resource_group.default.location
resource_group_name = azurerm_resource_group.default.name
subnet_id = azurerm_subnet.default.id
private_service_connection {
name = "psc-sqlondemand-${local.basename}"
private_connection_resource_id = azurerm_synapse_workspace.default.id
subresource_names = ["sqlondemand"]
is_manual_connection = false
}
private_dns_zone_group {
name = "private-dns-zone-group-sqlondemand"
private_dns_zone_ids = [azurerm_private_dns_zone.zone_sql.id]
}
}

View File

@ -0,0 +1,12 @@
name = "syn101"
environment = "dev"
location = "East US"
aad_login = {
name = "azureuser@contoso.com"
object_id = "00000000-0000-0000-0000-000000000000"
tenant_id = "00000000-0000-0000-0000-000000000000"
}
enable_syn_sparkpool = true
enable_syn_sqlpool = true

View File

@ -0,0 +1,66 @@
variable "name" {
type = string
description = "Name of the deployment"
}
variable "environment" {
type = string
description = "Name of the environment"
default = "dev"
}
variable "location" {
type = string
description = "Location of the resources"
default = "East US"
}
variable "aad_login" {
description = "AAD login"
type = object({
name = string
object_id = string
tenant_id = string
})
default = {
name = "AzureAD Admin"
object_id = "00000000-0000-0000-0000-000000000000"
tenant_id = "00000000-0000-0000-0000-000000000000"
}
}
variable "jumphost_username" {
type = string
description = "Admin username of the VM"
default = "azureuser"
}
variable "jumphost_password" {
type = string
description = "Password for the admin username of the VM"
default = "ThisIsNotVerySecure!"
}
variable "synadmin_username" {
type = string
description = "Specifies The login name of the SQL administrator"
default = "sqladminuser"
}
variable "synadmin_password" {
type = string
description = "The Password associated with the sql_administrator_login for the SQL administrator"
default = "ThisIsNotVerySecure!"
}
variable "enable_syn_sparkpool" {
type = bool
description = "Variable to enable or disable Synapse Spark pool deployment"
default = false
}
variable "enable_syn_sqlpool" {
type = bool
description = "Variable to enable or disable Synapse Dedicated SQL pool deployment"
default = false
}