switching to using huggingface evaluate library for evaluation - remo…

…ving references to olmo-eval
rdiehlmartinez · Dec 6, 2024 · ca17bed · ca17bed
1 parent 80ad6f6
commit ca17bed
Show file tree

Hide file tree

Showing 8 changed files with 86 additions and 282 deletions.
diff --git a/.gitmodules b/.gitmodules
diff --git a/README.md b/README.md
@@ -149,7 +149,7 @@ If you use Pico in your research, please cite:
 
 ```bibtex
 @software{pico2024,
-    author = {Martinez, Richard Diehl},
+    author = {Diehl Martinez, Richard},
     title = {Pico: Framework for Training Tiny Language Models},
     year = {2024},
 }

diff --git a/config.py b/config.py
@@ -160,9 +160,9 @@ class TrainingConfig:
 
 
 @dataclass
-class _PalomaEvaluationConfig:
-    limit_eval_examples: Optional[int] = 1
+class PalomaEvaluationConfig:
     max_length: int = MAX_SEQ_LEN
+    batch_size: int = 16
 
 
 @dataclass
@@ -182,4 +182,4 @@ class EvaluationConfig:
 
     # NOTE: Add other evaluation configs here
     # Each evaluation metric should have its own config
-    paloma: _PalomaEvaluationConfig = field(default_factory=_PalomaEvaluationConfig)
+    paloma: PalomaEvaluationConfig = field(default_factory=PalomaEvaluationConfig)
diff --git a/lib/olmo-eval b/lib/olmo-eval
diff --git a/model.py b/model.py
@@ -44,6 +44,7 @@
 
 
 from transformers import PretrainedConfig, PreTrainedModel
+from transformers.modeling_outputs import CausalLMOutputWithPast, CausalLMOutput
 
 ########################################################
 #
@@ -189,6 +190,8 @@ def forward(
         # otherwise, we need to move it to the correct device
         if self.fabric is not None:
             freqs_cis = self.fabric.to_device(freqs_cis)
+        else:
+            freqs_cis = freqs_cis.to(queries.device)
 
         queries_rotated = torch.view_as_real(queries_ * freqs_cis).flatten(3)
         keys_rotated = torch.view_as_real(keys_ * freqs_cis).flatten(3)
@@ -577,8 +580,17 @@ def forward(
         past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
         use_cache: bool = False,
         **kwargs,
-    ) -> Tuple[torch.Tensor, Optional[Tuple[Tuple[torch.Tensor]]]]:
-        return self.pico(input_ids, past_key_values, use_cache)
+    ) -> Union[CausalLMOutput, CausalLMOutputWithPast]:
+        logits, past_key_values = self.pico(input_ids, past_key_values, use_cache)
+        if use_cache:
+            return CausalLMOutputWithPast(
+                logits=logits,
+                past_key_values=past_key_values,
+            )
+        else:
+            return CausalLMOutput(
+                logits=logits,
+            )
 
 
 # Register for auto classes

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,17 +9,15 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-ray = "^2.35.0"
 lightning = "^2.4.0"
-omegaconf = "^2.3.0"
 click = "^8.1.7"
 wandb = "^0.18.1"
 huggingface-hub = {extras = ["cli"], version = "^0.25.1"}
-jsonnet = "^0.20.0"
-virtualenv = "^20.27.1"
 datasets = "^3.0.1"
 transformers = "^4.45.2"
 pre-commit = "^4.0.1"
+torch = "^2.5.1"
+evaluate = "^0.4.3"
 
 [tool.poetry.group.dev.dependencies]
 ipykernel = "^6.29.5"

diff --git a/setup.sh b/setup.sh
@@ -28,10 +28,11 @@ print_warning() {
     echo -e "${YELLOW}⚠ $1${NC}"
 }
 
-# Check if git-lfs is installed
+# --- GIT LFS SETUP --- #
 print_section "Git LFS Setup"
 if ! command -v git-lfs &> /dev/null; then
     print_warning "git-lfs is not installed. Some model checkpointing functionality may not work correctly."
+    ERRORS_FOUND=$((ERRORS_FOUND + 1))
 
     # Check the operating system
     if [[ "$OSTYPE" == "darwin"* ]]; then
@@ -60,7 +61,7 @@ else
     print_success "git-lfs installed and initialized"
 fi
 
-# Check CUDA version
+# --- CUDA VERSION CHECK --- #
 print_section "CUDA Version Check"
 if command -v nvidia-smi &> /dev/null; then
     CUDA_VERSION=$(nvidia-smi | sed -n 's/.*CUDA Version: \([0-9.]*\).*/\1/p')
@@ -77,7 +78,7 @@ if command -v nvidia-smi &> /dev/null; then
             ERRORS_FOUND=$((ERRORS_FOUND + 1))
             print_warning "CUDA version ${MAJOR_VERSION}.${MINOR_VERSION} detected."
             echo -e "${YELLOW}    Some multi-node communication GPU features may not work properly.${NC}"
-            echo -e "${YELLOW}    CUDA version 12.1 or newer is required.${NC}"
+            echo -e "${YELLOW}    CUDA version 12.1 or newer is recommended.${NC}"
         else
             print_success "CUDA version ${MAJOR_VERSION}.${MINOR_VERSION} detected"
         fi
@@ -88,11 +89,6 @@ else
     echo -e "${YELLOW}    Ensure that NVIDIA drivers and CUDA version at 12.1 or newer are installed for GPU support.${NC}"
 fi
 
-# Initialize and update git submodules
-print_section "Git Submodules"
-echo "Initializing git submodules..."
-git submodule update --init --recursive
-print_success "Git submodules initialized"
 
 # ---- ENVIRONMENT VARIABLES ---- #
 print_section "Environment Variables"
@@ -105,6 +101,7 @@ else
     echo -e "${YELLOW}    Example .env contents:${NC}"
     echo "    export HF_TOKEN=your_huggingface_token"
     echo "    export WANDB_API_KEY=your_wandb_key"
+    ERRORS_FOUND=$((ERRORS_FOUND + 1))
 fi
 
 # ---- POETRY SETUP ---- #
@@ -132,15 +129,6 @@ fi
 # ---- PRE-COMMIT SETUP ---- #
 print_section "Pre-commit Setup"
 
-# First check if pre-commit is installed in the poetry environment
-if ! poetry run pre-commit --version &> /dev/null; then
-    echo "Installing pre-commit in poetry environment..."
-    poetry add pre-commit --group dev
-    print_success "pre-commit installed successfully"
-else
-    print_success "pre-commit already installed"
-fi
-
 # Install pre-commit hooks
 echo "Installing pre-commit hooks..."
 poetry run pre-commit install
@@ -151,58 +139,7 @@ echo "Running pre-commit hooks on all files..."
 poetry run pre-commit run --all-files
 print_success "Pre-commit initial run complete"
 
-# ---- EVALUATION SETUP ---- #
-print_section "Evaluation (Paloma) Setup"
-
-# Add flag check for skipping evaluation
-if [ "$1" = "--skip-eval" ]; then
-    print_warning "Skipping evaluation setup as requested"
-else
-    if [ ! -d "lib/paloma" ]; then
-        if [ ! -z "$HF_TOKEN" ]; then
-            echo "Setting up HuggingFace authentication..."
-            echo $HF_TOKEN | poetry run huggingface-cli login --token $HF_TOKEN
-
-            echo "Cloning Paloma evaluation dataset..."
-            git clone https://oauth2:${HF_TOKEN}@huggingface.co/datasets/allenai/paloma lib/paloma
-
-            if [ $? -eq 0 ]; then
-                print_success "Paloma dataset cloned successfully"
-            else
-                ERRORS_FOUND=$((ERRORS_FOUND + 1))
-                print_warning "Failed to clone Paloma dataset"
-                echo -e "${YELLOW}    Please verify your HuggingFace token has correct permissions${NC}"
-                echo -e "${YELLOW}    Make sure you have been granted access to allenai/paloma dataset${NC}"
-                rm -rf lib/paloma
-            fi
-        else
-            print_warning "Skipping Paloma dataset clone. HuggingFace credentials not found."
-            echo -e "${YELLOW}    You need to request access to the Paloma dataset on HuggingFace:${NC}"
-            echo -e "    ${BLUE}https://huggingface.co/datasets/allenai/paloma${NC}"
-            echo -e "${YELLOW}    Visit the dataset page and click 'Access Request' to request permission.${NC}"
-            rm -rf lib/paloma
-        fi
-    else
-        print_success "Paloma dataset already exists, skipping clone"
-    fi
-
-    # Create environment for running evaluation inside of lib/olmo_eval
-    if [ ! -d "lib/olmo-eval/env" ]; then
-        print_section "OLMo Eval Setup"
-        poetry run bash -c '
-            cd lib/olmo-eval
-            echo "Creating virtual environment..."
-            virtualenv env
-            source env/bin/activate
-            pip install --python-version 3.10 -e . 
-            deactivate
-            cd ../../
-            echo "OLMo eval environment setup complete"
-        '
-    else
-        print_success "OLMo eval environment already exists, skipping setup"
-    fi
-fi
+# --- Final Status Message --- #
 
 # Final status message
 print_section "Setup Status"