Sets up an AWS Glue Catalog Table optimized for Athena, using Parquet format with SNAPPY compression, stored in S3.

Terraform Template

resource "aws_glue_catalog_table" "aws_glue_catalog_table" {
  database_name = "MyCatalogDatabase"
  name = "MyCatalogTable"

  parameters = {
    EXTERNAL = "TRUE"
    parquet.compression = "SNAPPY"
    parquet = "[object Object]"
  }

  storage_descriptor {

    columns {
      name = "my_string"
      type = "string"
    }

    columns {
      name = "my_double"
      type = "double"
    }

    columns {
      name = "my_date"
      type = "date"
    }

    columns {
      name = "my_bigint"
      type = "bigint"
    }

    columns {
      name = "my_struct"
      type = "struct<my_nested_string:string>"
    }
    input_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"
    location = "s3://my-bucket/event-streams/my-stream"
    output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"

    ser_de_info {
      name = "my-stream"

      parameters = {
        serialization.format = 1
        serialization = "[object Object]"
      }
      serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
    }
  }
  table_type = "EXTERNAL_TABLE"
}