# ============================================================
#  IABD - Stack Big Data con perfiles
#  ------------------------------------------------------------
#  Arranque selectivo según la sesión:
#
#    docker compose --profile kafka up -d            # broker único (Kafka 1, casos 3/4)
#    docker compose --profile kafka-connect up -d    # broker único (Kafka 1, Kafka Connect)
#    docker compose --profile kafka-cluster up -d    # 3 brokers KRaft (Kafka 2 - caso 2)
#    docker compose --profile spark up -d            # cluster Spark completo + MinIO + Hive
#    docker compose --profile spark-single up -d     # Spark en modo local[*] desde Jupyter
#    docker compose --profile datos up -d            # MySQL con retail_db
#    docker compose --profile airflow up -d          # Airflow LocalExecutor + Postgres
#
#  Combinables (Spark Streaming sobre Kafka, Airflow orquestando Spark, etc.):
#    docker compose --profile spark --profile kafka up -d
#    docker compose --profile spark --profile kafka --profile datos --profile airflow up -d
# ============================================================

services:

  # ============================================================
  #  MySQL - Hive Metastore (profile: spark)
  # ============================================================
  mysql:
    image: mysql:8.0
    container_name: iabd-mysql-metastore
    hostname: mysql-metastore
    profiles: ["spark", "spark-single"]
    environment:
      MYSQL_ROOT_PASSWORD: rootpass
      MYSQL_DATABASE: hive_metastore
      MYSQL_USER: hive
      MYSQL_PASSWORD: hivepass
    ports:
      - '3306:3306'
    volumes:
      - mysql_data:/var/lib/mysql
    healthcheck:
      test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
      interval: 10s
      timeout: 5s
      retries: 10
    networks:
      - spark-net

  hive-init:
    image: apache/hive:4.0.1
    container_name: iabd-hive-init
    profiles: ["spark", "spark-single"]
    # Idempotente: si el schema ya existe (-info devuelve 0), no reinicializa.
    # Si no existe (-info devuelve != 0), lanza -initSchema.
    entrypoint: /bin/bash
    command:
      - -c
      - |
        URL="jdbc:mysql://mysql-metastore:3306/hive_metastore?createDatabaseIfNotExist=true&useSSL=false&allowPublicKeyRetrieval=true"
        if /opt/hive/bin/schematool -info -dbType mysql \
             -url "$$URL" -driver com.mysql.cj.jdbc.Driver \
             -userName hive -passWord hivepass > /dev/null 2>&1; then
          echo ">>> Hive Metastore ya inicializado, no se hace nada."
        else
          echo ">>> Inicializando Hive Metastore..."
          /opt/hive/bin/schematool -initSchema -dbType mysql \
            -url "$$URL" -driver com.mysql.cj.jdbc.Driver \
            -userName hive -passWord hivepass
        fi
    volumes:
      - ./jars/mysql-connector-j-8.0.33.jar:/opt/hive/lib/mysql-connector-j-8.0.33.jar
    depends_on:
      mysql:
        condition: service_healthy
    networks:
      - spark-net
    restart: "no"

  # ============================================================
  #  MySQL - Datos de negocio (retail_db) (profile: datos)
  # ============================================================
  mysql-datos:
    image: mysql:8.0
    container_name: iabd-mysql-datos
    hostname: mysql-datos
    profiles: ["datos"]
    environment:
      MYSQL_ROOT_PASSWORD: rootpass
      MYSQL_DATABASE: retail_db
      MYSQL_USER: iabd
      MYSQL_PASSWORD: iabd
    ports:
      - '3307:3306'
    volumes:
      - mysql_datos_data:/var/lib/mysql
      - ./spark-config/create_db.sql:/docker-entrypoint-initdb.d/01-create_db.sql:ro
    healthcheck:
      test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
      interval: 10s
      timeout: 5s
      retries: 10
    networks:
      - spark-net

  # ============================================================
  #  MinIO - Almacenamiento S3 (profile: spark)
  # ============================================================
  minio:
    image: minio/minio:latest
    container_name: iabd-minio
    hostname: minio
    profiles: ["spark", "spark-single"]
    command: server /data --console-address ":9001"
    environment:
      MINIO_ROOT_USER: minioadmin
      MINIO_ROOT_PASSWORD: minioadmin123
    ports:
      - '9000:9000'
      - '9001:9001'
    volumes:
      - minio_data:/data
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - spark-net

  minio-init:
    image: minio/mc:latest
    container_name: iabd-minio-init
    profiles: ["spark", "spark-single"]
    depends_on:
      minio:
        condition: service_healthy
    entrypoint: >
      /bin/sh -c "
        mc alias set local http://minio:9000 minioadmin minioadmin123 &&
        mc mb --ignore-existing local/warehouse &&
        mc mb --ignore-existing local/raw-data &&
        mc mb --ignore-existing local/processed &&
        echo '>>> Buckets creados correctamente'
      "
    networks:
      - spark-net

  # ============================================================
  #  Kafka - Broker único KRaft (profile: kafka)
  #  Para Kafka 1, casos 3 y 4 de Kafka 2, y Kafka Connect
  # ============================================================
  kafka:
    image: apache/kafka:latest
    container_name: iabd-kafka
    hostname: kafka
    extra_hosts:
      - "kafka:127.0.0.1"
    profiles: ["kafka", "kafka-connect"]
    environment:
      KAFKA_NODE_ID: 1
      KAFKA_PROCESS_ROLES: broker,controller
      KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:9093
      KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
      KAFKA_LISTENERS: PLAINTEXT://:9092,CONTROLLER://:9093,EXTERNAL://:9094
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,EXTERNAL://localhost:9094
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,EXTERNAL:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_NUM_PARTITIONS: 3
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_LOG_DIRS: /var/lib/kafka/data
    ports:
      - '9094:9094'
    volumes:
      - kafka_data:/var/lib/kafka/data
    healthcheck:
      test: ["CMD-SHELL", "/opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server localhost:9092 > /dev/null 2>&1"]
      interval: 15s
      timeout: 10s
      retries: 10
    networks:
      - spark-net

  # ------------------------------------------------------------
  #  UI unificada para ambas topologías Kafka.
  #  Configura dos clusters; el alumno verá uno sano y otro
  #  "offline" según el profile que tenga arrancado. La URL
  #  http://localhost:8081 es siempre la misma.
  # ------------------------------------------------------------
  kafka-ui:
    image: provectuslabs/kafka-ui:latest
    container_name: iabd-kafka-ui
    hostname: kafka-ui
    profiles: ["kafka", "kafka-cluster", "kafka-connect"]
    environment:
      # Cluster 0: broker único (visible cuando arrancas --profile kafka)
      KAFKA_CLUSTERS_0_NAME: single
      KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092
      # Cluster 1: cluster de 3 nodos (visible con --profile kafka-cluster)
      KAFKA_CLUSTERS_1_NAME: cluster
      KAFKA_CLUSTERS_1_BOOTSTRAPSERVERS: kafka-1:9092,kafka-2:9092,kafka-3:9092
    ports:
      - '8081:8080'
    networks:
      - spark-net

  # ============================================================
  #  Kafka - Clúster de 3 brokers KRaft (profile: kafka-cluster)
  #  Para Kafka 2 - Caso 2
  #  ------------------------------------------------------------
  #  Quórum: 3 nodos (combined broker+controller).
  #  Tolerancia: cae 1 nodo y sigue; caen 2 y se para el control plane.
  #  Puertos externos: 9094, 9095, 9096 (acceso desde host)
  # ============================================================
  kafka-1:
    image: apache/kafka:latest
    container_name: iabd-kafka-1
    hostname: kafka-1
    extra_hosts:
      - "kafka-1:127.0.0.1"
    profiles: ["kafka-cluster"]
    environment:
      KAFKA_NODE_ID: 1
      KAFKA_PROCESS_ROLES: broker,controller
      KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-1:9093,2@kafka-2:9093,3@kafka-3:9093
      KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
      KAFKA_LISTENERS: PLAINTEXT://:9092,CONTROLLER://:9093,EXTERNAL://:9094
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-1:9092,EXTERNAL://localhost:9094
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,EXTERNAL:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_NUM_PARTITIONS: 3
      KAFKA_DEFAULT_REPLICATION_FACTOR: 2
      KAFKA_MIN_INSYNC_REPLICAS: 2
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 2
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_LOG_DIRS: /var/lib/kafka/data
    ports:
      - '9094:9094'
    volumes:
      - kafka_1_data:/var/lib/kafka/data
    healthcheck:
      test: ["CMD-SHELL", "/opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server localhost:9092 > /dev/null 2>&1"]
      interval: 15s
      timeout: 10s
      retries: 10
    networks:
      - spark-net

  kafka-2:
    image: apache/kafka:latest
    container_name: iabd-kafka-2
    hostname: kafka-2
    extra_hosts:
      - "kafka-2:127.0.0.1"
    profiles: ["kafka-cluster"]
    environment:
      KAFKA_NODE_ID: 2
      KAFKA_PROCESS_ROLES: broker,controller
      KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-1:9093,2@kafka-2:9093,3@kafka-3:9093
      KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
      KAFKA_LISTENERS: PLAINTEXT://:9092,CONTROLLER://:9093,EXTERNAL://:9094
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-2:9092,EXTERNAL://localhost:9095
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,EXTERNAL:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_NUM_PARTITIONS: 3
      KAFKA_DEFAULT_REPLICATION_FACTOR: 2
      KAFKA_MIN_INSYNC_REPLICAS: 2
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 2
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_LOG_DIRS: /var/lib/kafka/data
    ports:
      - '9095:9094'
    volumes:
      - kafka_2_data:/var/lib/kafka/data
    healthcheck:
      test: ["CMD-SHELL", "/opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server localhost:9092 > /dev/null 2>&1"]
      interval: 15s
      timeout: 10s
      retries: 10
    networks:
      - spark-net

  kafka-3:
    image: apache/kafka:latest
    container_name: iabd-kafka-3
    hostname: kafka-3
    extra_hosts:
      - "kafka-3:127.0.0.1"
    profiles: ["kafka-cluster"]
    environment:
      KAFKA_NODE_ID: 3
      KAFKA_PROCESS_ROLES: broker,controller
      KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-1:9093,2@kafka-2:9093,3@kafka-3:9093
      KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
      KAFKA_LISTENERS: PLAINTEXT://:9092,CONTROLLER://:9093,EXTERNAL://:9094
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-3:9092,EXTERNAL://localhost:9096
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,EXTERNAL:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_NUM_PARTITIONS: 3
      KAFKA_DEFAULT_REPLICATION_FACTOR: 2
      KAFKA_MIN_INSYNC_REPLICAS: 2
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 3
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 2
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_LOG_DIRS: /var/lib/kafka/data
    ports:
      - '9096:9094'
    volumes:
      - kafka_3_data:/var/lib/kafka/data
    healthcheck:
      test: ["CMD-SHELL", "/opt/kafka/bin/kafka-broker-api-versions.sh --bootstrap-server localhost:9092 > /dev/null 2>&1"]
      interval: 15s
      timeout: 10s
      retries: 10
    networks:
      - spark-net

  # ============================================================
  #  Kafka Connect (profile: kafka)
  # ============================================================
  kafka-connect:
    image: apache/kafka:latest
    container_name: iabd-kafka-connect
    hostname: kafka-connect
    profiles: ["kafka-connect"]
    depends_on:
      kafka:
        condition: service_healthy
    ports:
      - '8083:8083'
    command: ["/opt/kafka/bin/connect-distributed.sh",
              "/opt/kafka/config/connect-distributed.properties"]
    volumes:
      - ./kafka/connect-distributed.properties:/opt/kafka/config/connect-distributed.properties:ro
      - ./kafka/connect-plugins:/opt/kafka/plugins
    healthcheck:
      test: ["CMD-SHELL", "curl -f http://localhost:8083/ > /dev/null 2>&1 || exit 1"]
      interval: 15s
      timeout: 10s
      retries: 10
    networks:
      - spark-net
      
  # ============================================================
  #  Spark 4 - Master y Workers (profile: spark)
  # ============================================================
  spark-master:
    image: apache/spark:4.1.1
    container_name: iabd-spark-master
    hostname: spark-master
    profiles: ["spark", "spark-single"]
    command: /opt/spark/sbin/start-master.sh
    ports:
      - '8080:8080'
      - '7077:7077'
    volumes:
      - ./jars:/opt/spark/extra-jars
      - ./spark-config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
    environment:
      SPARK_MASTER_HOST: spark-master
      SPARK_NO_DAEMONIZE: "true"
    depends_on:
      mysql:
        condition: service_healthy
      minio:
        condition: service_healthy
    networks:
      - spark-net

  spark-worker-1:
    image: apache/spark:4.1.1
    container_name: iabd-spark-worker-1
    hostname: spark-worker-1
    profiles: ["spark", "spark-single"]
    command: /opt/spark/sbin/start-worker.sh spark://spark-master:7077
    environment:
      SPARK_WORKER_CORES: "2"
      SPARK_WORKER_MEMORY: "4g"
      SPARK_NO_DAEMONIZE: "true"
    mem_limit: 6g
    volumes:
      - ./jars:/opt/spark/extra-jars
      - ./spark-config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
    depends_on:
      - spark-master
    networks:
      - spark-net

  spark-worker-2:
    image: apache/spark:4.1.1
    container_name: iabd-spark-worker-2
    hostname: spark-worker-2
    profiles: ["spark"]
    command: /opt/spark/sbin/start-worker.sh spark://spark-master:7077
    environment:
      SPARK_WORKER_CORES: "2"
      SPARK_WORKER_MEMORY: "4g"
      SPARK_NO_DAEMONIZE: "true"
    mem_limit: 6g
    volumes:
      - ./jars:/opt/spark/extra-jars
      - ./spark-config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
    depends_on:
      - spark-master
    networks:
      - spark-net

  # ============================================================
  #  Jupyter - Notebooks PySpark (profiles: spark, spark-single)
  # ============================================================
  jupyter:
    build: ./jupyter
    container_name: iabd-jupyter
    hostname: jupyter
    profiles: ["spark", "spark-single"]
    mem_limit: 4g
    ports:
      - '8888:8888'
      - '4040:4040'
    volumes:
      - ./notebooks:/home/spark/work
      - ./jars:/opt/spark/extra-jars
      - ./spark-config/spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
    networks:
      - spark-net

  # ============================================================
  #  Airflow - LocalExecutor + Postgres (profile: airflow)
  # ============================================================
  postgres-airflow:
    image: postgres:16
    container_name: iabd-postgres-airflow
    hostname: postgres-airflow
    profiles: ["airflow"]
    environment:
      POSTGRES_USER: airflow
      POSTGRES_PASSWORD: airflow
      POSTGRES_DB: airflow
    volumes:
      - postgres_airflow_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD", "pg_isready", "-U", "airflow"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - spark-net

  airflow-init:
    image: apache/airflow:2.10.3-python3.11
    container_name: iabd-airflow-init
    profiles: ["airflow"]
    entrypoint: /bin/bash
    command:
      - -c
      - |
        airflow db migrate &&
        airflow users create \
          --username admin \
          --password admin \
          --firstname Admin \
          --lastname IABD \
          --role Admin \
          --email admin@iabd.local || true
    environment: &airflow-env
      AIRFLOW__CORE__EXECUTOR: LocalExecutor
      AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres-airflow/airflow
      AIRFLOW__CORE__LOAD_EXAMPLES: "false"
      AIRFLOW__CORE__FERNET_KEY: ""
      AIRFLOW__WEBSERVER__SECRET_KEY: "iabd-secret-key-cambiar-en-prod"
      _PIP_ADDITIONAL_REQUIREMENTS: "kafka-python boto3 requests pymongo apache-airflow-providers-apache-spark"
    volumes:
      - ./airflow/dags:/opt/airflow/dags
      - ./airflow/logs:/opt/airflow/logs
      - ./airflow/plugins:/opt/airflow/plugins
    depends_on:
      postgres-airflow:
        condition: service_healthy
    networks:
      - spark-net
    restart: "no"

  airflow-webserver:
    image: apache/airflow:2.10.3-python3.11
    container_name: iabd-airflow-webserver
    hostname: airflow-webserver
    profiles: ["airflow"]
    command: webserver
    environment: *airflow-env
    ports:
      - '8082:8080'
    volumes:
      - ./airflow/dags:/opt/airflow/dags
      - ./airflow/logs:/opt/airflow/logs
      - ./airflow/plugins:/opt/airflow/plugins
    depends_on:
      postgres-airflow:
        condition: service_healthy
      airflow-init:
        condition: service_completed_successfully
    healthcheck:
      test: ["CMD", "curl", "--fail", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 5
    networks:
      - spark-net

  airflow-scheduler:
    image: apache/airflow:2.10.3-python3.11
    container_name: iabd-airflow-scheduler
    hostname: airflow-scheduler
    profiles: ["airflow"]
    command: scheduler
    environment: *airflow-env
    volumes:
      - ./airflow/dags:/opt/airflow/dags
      - ./airflow/logs:/opt/airflow/logs
      - ./airflow/plugins:/opt/airflow/plugins
    depends_on:
      postgres-airflow:
        condition: service_healthy
      airflow-init:
        condition: service_completed_successfully
    networks:
      - spark-net

# ============================================================
#  VOLÚMENES Y RED
# ============================================================
volumes:
  mysql_data:
  mysql_datos_data:
  minio_data:
  kafka_data:
  kafka_1_data:
  kafka_2_data:
  kafka_3_data:
  postgres_airflow_data:

networks:
  spark-net:
    driver: bridge