Configures a basic EMR cluster with Spark application, including master and core instance groups with EBS configurations.

Terraform Template

resource "aws_emr_cluster" "cluster" {
  additional_info = <<-EOF
    {
     "instanceAwsClientConfiguration": {
     "proxyPort": 8099,
     "proxyHost": "myproxy.example.com"
     }
    }
    
  EOF
  applications = ["Spark"]

  bootstrap_action {
    args = ["instance.isMaster=true", "echo running on master node"]
    name = "runif"
    path = "s3://elasticmapreduce/bootstrap-actions/run-if"
  }
  configurations_json = <<-EOF
     [
     {
     "Classification": "hadoop-env",
     "Configurations": [
     {
     "Classification": "export",
     "Properties": {
     "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
     }
     }
     ],
     "Properties": {}
     },
     {
     "Classification": "spark-env",
     "Configurations": [
     {
     "Classification": "export",
     "Properties": {
     "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
     }
     }
     ],
     "Properties": {}
     }
     ]
    
  EOF

  core_instance_group {
    autoscaling_policy = <<-EOF
      {
      "Constraints": {
       "MinCapacity": 1,
       "MaxCapacity": 2
      },
      "Rules": [
       {
       "Name": "ScaleOutMemoryPercentage",
       "Description": "Scale out if YARNMemoryAvailablePercentage is less than 15",
       "Action": {
       "SimpleScalingPolicyConfiguration": {
       "AdjustmentType": "CHANGE_IN_CAPACITY",
       "ScalingAdjustment": 1,
       "CoolDown": 300
       }
       },
       "Trigger": {
       "CloudWatchAlarmDefinition": {
       "ComparisonOperator": "LESS_THAN",
       "EvaluationPeriods": 1,
       "MetricName": "YARNMemoryAvailablePercentage",
       "Namespace": "AWS/ElasticMapReduce",
       "Period": 300,
       "Statistic": "AVERAGE",
       "Threshold": 15.0,
       "Unit": "PERCENT"
       }
       }
       }
      ]
      }
      
    EOF
    bid_price = "0.30"
    instance_count = 1
    instance_type = "c4.large"
  }
  ebs_root_volume_size = 100

  ec2_attributes {
    emr_managed_master_security_group = "aws_security_group.sg.id"
    emr_managed_slave_security_group = "aws_security_group.sg.id"
    instance_profile = "aws_iam_instance_profile.emr_profile.arn"
    subnet_id = "aws_subnet.main.id"
  }
  keep_job_flow_alive_when_no_steps = true

  master_instance_group {
    instance_type = "m4.large"
  }
  name = "emr-test-arn"
  release_label = "emr-4.6.0"
  service_role = "aws_iam_role.iam_emr_service_role.arn"

  tags = {
    env = "env"
    role = "rolename"
  }
  termination_protection = false
  configurations = [" [
 {
 "Classification": "hadoop-env",
 "Configurations": [
 {
 "Classification": "export",
 "Properties": {
 "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
 }
 }
 ],
 "Properties": {}
 },
 {
 "Classification": "spark-env",
 "Configurations": [
 {
 "Classification": "export",
 "Properties": {
 "JAVA_HOME": "/usr/lib/jvm/java-1.8.0"
 }
 }
 ],
 "Properties": {}
 }
 ]
"]
}