#!/bin/bash

export OMP_NUM_THREADS=8

SPARSITY=${SPARSITY:-0.50}
ITERATIONS=${ITERATIONS:-10}
ALPHA=${ALPHA:-0.001}

MODEL=meta-llama/Llama-2-7b-hf
DATA_SIZE=128

CUDA_VISIBLE_DEVICES=0 python iht.py \
    --model_name_or_path ${MODEL} \
    --dtype auto \
    --dataset_name_or_path ./data/red_pajama_n=4096_4096_context_length.pth \
    --pre_decoder_modules 'model.embed_tokens' \
    --decoder_blocks 'model.layers' \
    --post_decoder_modules 'model.norm' 'lm_head' \
    --module_regex '.*layers.*((q|k|v|o|gate|up|down)_proj)$' \
    --low_cpu_mem_usage \
    --sequence_length 4096 \
    --calibration_dataset_size ${DATA_SIZE} \
    --sparsity 0.50 \
    --iterations ${ITERATIONS} \
    --sparsity ${SPARSITY} \
    --pruning_method FastOBC \
    --sequential \
    --alpha ${ALPHA} \
    --rel_damp 0.01 \
    --cpu_offload \
    --output_dir "./iht_results/FastOBC/iterations=${ITERATIONS}-sp=${SPARSITY}-alpha=${ALPHA}-sequential=True_data-size=${DATA_SIZE}"