#!/usr/bin/env bash

user_dir=../../module
bpe_dir=../../utils/BPE

# Parameters and defaults
split=${1:-test}
beam_size=${2:-5}
dataset_name=${3:-vqa-rad}

data_dir=../../datasets/finetuning/${dataset_name}
data=${data_dir}/test.tsv

declare -a Scale=('tiny' 'medium' 'base')

for scale in ${Scale[@]}; do
    if [[ $scale =~ "tiny" ]]; then
        patch_image_size=256
    elif [[ $scale =~ "medium" ]]; then
        patch_image_size=256
    elif [[ $scale =~ "base" ]]; then  
        patch_image_size=384
    fi

    path=../../checkpoints/instruct_biomedgpt_${scale}.pt
    result_path=./results/bcdrf_rad_unconstrained/${scale}
    mkdir -p $result_path
    selected_cols=0,5,2,3,4

    log_file=${result_path}/${scale}.log

    # Run evaluation on a single GPU (Google Colab's one GPU)
    CUDA_VISIBLE_DEVICES=0 python3 ../../evaluate.py \
        ${data} \
        --path=${path} \
        --user-dir=${user_dir} \
        --task=vqa_gen \
        --batch-size=64 \
        --log-format=simple --log-interval=10 \
        --seed=7 \
        --gen-subset=${split} \
        --results-path=${result_path} \
        --fp16 \
        --beam-search-vqa-eval \
        --ema-eval \
        --beam=${beam_size} \
        --unnormalized \
        --temperature=1.0 \
        --num-workers=0 \
        --model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}" \
        > ${log_file} 2>&1
done
