This template sets up an AWS Glue ML Transform for finding matches, along with a Glue Catalog Database and a Glue Catalog Table configured with various properties including storage, serialization, and partitioning.

Terraform Template

resource "aws_glue_catalog_database" "test" {
  name = "example"
}

resource "aws_glue_catalog_table" "test" {
  database_name = aws_glue_catalog_database.test.name
  name = "example"
  owner = "my_owner"

  parameters = {
    param1 = "param1_val"
  }

  partition_keys {
    comment = "my_column_1_comment"
    name = "my_column_1"
    type = "int"
  }

  partition_keys {
    comment = "my_column_2_comment"
    name = "my_column_2"
    type = "string"
  }
  retention = 1

  storage_descriptor {
    bucket_columns = ["bucket_column_1"]

    columns {
      comment = "my_column1_comment"
      name = "my_column_1"
      type = "int"
    }

    columns {
      comment = "my_column2_comment"
      name = "my_column_2"
      type = "string"
    }
    compressed = false
    input_format = "SequenceFileInputFormat"
    location = "my_location"
    number_of_buckets = 1
    output_format = "SequenceFileInputFormat"

    parameters = {
      param1 = "param1_val"
    }

    ser_de_info {
      name = "ser_de_name"

      parameters = {
        param1 = "param_val_1"
      }
      serialization_library = "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
    }

    skewed_info {
      skewed_column_names = ["my_column_1"]
      skewed_column_value_location_maps = ["my_column_1_val_loc_map"]

      skewed_column_values = {
        0 = "skewed_val_1"
      }
    }

    sort_columns {
      column = "my_column_1"
      sort_order = 1
    }
    stored_as_sub_directories = false
  }
  table_type = "VIRTUAL_VIEW"
  view_expanded_text = "view_expanded_text_1"
  view_original_text = "view_original_text_1"
}

resource "aws_glue_ml_transform" "test" {
  depends_on = ["aws_iam_role_policy_attachment.test"]
  input_record_tables = [aws_glue_catalog_table.test.database_name, aws_glue_catalog_table.test.name]
  name = "example"

  parameters {

    find_matches_parameters {
      primary_key_column_name = "my_column_1"
    }
    transform_type = "FIND_MATCHES"
  }
  role_arn = "aws_iam_role.test.arn"
}