Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
teetone committed Apr 28, 2024
1 parent a909139 commit ded6d2c
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 109 deletions.
6 changes: 4 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,13 @@ install_requires=
# Basic metrics
nltk~=3.7
pyext~=0.7
pycocoevalcap~=1.2
rouge-score~=0.1.2
scipy~=1.10
uncertainty-calibration~=0.1.4
scikit-learn~=1.1

# Models and Metrics Extras
transformers~=4.40.0 # For anthropic_client, vision_language.huggingface_vlm_client, huggingface_client, huggingface_tokenizer, test_openai_token_cost_estimator, model_summac (via summarization_metrics)
transformers~=4.40 # For anthropic_client, vision_language.huggingface_vlm_client, huggingface_client, huggingface_tokenizer, test_openai_token_cost_estimator, model_summac (via summarization_metrics)
# TODO: Upgrade torch - we need > 2.0.0 for newer versions of transformers
torch>=1.13.1,<3.0.0 # For huggingface_client, yalm_tokenizer, model_summac (via summarization_metrics)
torchvision>=0.14.1,<3.0.0 # For huggingface_client, yalm_tokenizer, model_summac (via summarization_metrics)
Expand Down Expand Up @@ -176,6 +175,9 @@ vlm =
crfm-helm[images]
crfm-helm[image2structure]

# For metrics
pycocoevalcap~=1.2

image2structure =
crfm-helm[images]

Expand Down
4 changes: 1 addition & 3 deletions src/helm/benchmark/metrics/common_metric_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,4 @@ def get_disinformation_metric_specs(args: Optional[Dict] = None) -> List[MetricS


def get_open_ended_generation_metric_specs() -> List[MetricSpec]:
return get_basic_metric_specs(
["exact_match", "quasi_exact_match", "f1_score", "rouge_l", "bleu_1", "bleu_4", "cider"]
)
return get_basic_metric_specs(["exact_match", "quasi_exact_match", "f1_score", "rouge_l", "bleu_1", "bleu_4"])
8 changes: 7 additions & 1 deletion src/helm/benchmark/metrics/evaluate_reference_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from helm.benchmark.metrics.statistic import Stat
from helm.benchmark.scenarios.code_scenario import CodeReference
from helm.benchmark.scenarios.scenario import Reference
from helm.common.optional_dependencies import handle_module_not_found_error
from helm.common.request import GeneratedOutput
from helm.benchmark.scenarios.math_scenario import is_equiv, is_equiv_chain_of_thought
from nltk.metrics.scores import f_measure
Expand All @@ -20,7 +21,7 @@
import string
from . import code_metrics_helper
import nltk
from pycocoevalcap.cider.cider import Cider


try:
nltk.data.find("tokenizers/punkt")
Expand Down Expand Up @@ -190,6 +191,11 @@ def bleu_4(gold: str, pred: str) -> float:


def cider(gold: str, pred: str) -> float:
try:
from pycocoevalcap.cider.cider import Cider
except ModuleNotFoundError as e:
handle_module_not_found_error(e, ["vlm"])

cider_evaluator = Cider()
candidate = {"caption": [pred]}
reference = {"caption": [gold]}
Expand Down
5 changes: 0 additions & 5 deletions src/helm/benchmark/presentation/run_entries_debug.conf

This file was deleted.

125 changes: 61 additions & 64 deletions src/helm/benchmark/run_specs/vlm_run_specs.py

Large diffs are not rendered by default.

8 changes: 0 additions & 8 deletions src/helm/clients/openai_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,7 @@ def _is_chat_model_engine(self, model_engine: str) -> bool:
return True
return False

def _is_high_res_vision_model(self, model_engine: str) -> bool:
return model_engine == "gpt-4-vision-preview-high-res"

def _get_model_for_request(self, request: Request) -> str:
if self._is_high_res_vision_model(request.model_engine):
return "gpt-4-vision-preview"
return request.model_engine

def _get_cache_key(self, raw_request: Dict, request: Request):
Expand Down Expand Up @@ -136,9 +131,6 @@ def _make_chat_request(self, request: Request) -> RequestResult:

base64_image: str = encode_base64(media_object.location)
image_object: Dict[str, str] = {"url": f"data:image/jpeg;base64,{base64_image}"}
if self._is_high_res_vision_model(request.model_engine):
image_object["detail"] = "high"

content.append({"type": "image_url", "image_url": image_object})
elif media_object.is_type(TEXT_TYPE):
if media_object.text is None:
Expand Down
10 changes: 6 additions & 4 deletions src/helm/clients/vertexai_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,9 @@ def do_it() -> Dict[str, Any]:
raise VertexAIContentBlockedError("No candidates in response due to content blocking")

# We should only have one candidate
assert (
len(candidates) == 1
), f"Expected 1 candidate since candidate_count is 1, got {len(candidates)}."
candidate: Candidate = candidates[0]
if (
candidate.finish_reason in VertexAIChatClient.CONTENT_BLOCKED_FINISH_REASONS
Expand All @@ -373,12 +376,11 @@ def do_it() -> Dict[str, Any]:

cache_key = CachingClient.make_cache_key(raw_cache_key, request)
response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
except (requests.exceptions.RequestException, VertexAIContentBlockedError) as e:
if "Content has no parts" in str(e):
return complete_for_valid_error(self.CONTENT_HAS_NO_PARTS_ERROR)

except requests.exceptions.RequestException as e:
error: str = f"Gemini Vision error: {e}"
return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
except VertexAIContentBlockedError as e:
return complete_for_valid_error(str(e))

if "error" in response:
return complete_for_valid_error(response["error"])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@


@dataclass(frozen=True)
class LoadedVision2SeqModelProcessor:
class Vision2SeqModelProcessor:
"""Loaded model and processor."""

model: AutoModelForVision2Seq
processor: AutoProcessor


_models_lock: Lock = Lock()
_models: Dict[str, Optional[LoadedVision2SeqModelProcessor]] = {
_models: Dict[str, Optional[Vision2SeqModelProcessor]] = {
"HuggingFaceM4/idefics2-8b": None,
}

Expand All @@ -44,7 +44,7 @@ def __init__(self, tokenizer: Tokenizer, tokenizer_name: str, cache_config: Cach
self.tokenizer_name = tokenizer_name
self._device: str = get_torch_device_name()

def _get_model(self, checkpoint: str) -> LoadedVision2SeqModelProcessor:
def _get_model(self, checkpoint: str) -> Vision2SeqModelProcessor:
global _models_lock
global _models

Expand All @@ -57,7 +57,7 @@ def _get_model(self, checkpoint: str) -> LoadedVision2SeqModelProcessor:
model = AutoModelForVision2Seq.from_pretrained(checkpoint, torch_dtype=torch_dtype).to(self._device)
processor = AutoProcessor.from_pretrained(checkpoint)

_models[checkpoint] = LoadedVision2SeqModelProcessor(model, processor)
_models[checkpoint] = Vision2SeqModelProcessor(model, processor)
loaded_model_processor = _models[checkpoint]

assert loaded_model_processor is not None
Expand All @@ -67,7 +67,7 @@ def make_request(self, request: Request) -> RequestResult:
assert request.model_deployment in _models, f"Not a valid model for this client: {request.model_deployment}"
assert request.multimodal_prompt is not None, "Multimodal prompt is required"

loaded_model_processor: LoadedVision2SeqModelProcessor = self._get_model(request.model_deployment)
loaded_model_processor: Vision2SeqModelProcessor = self._get_model(request.model_deployment)
model = loaded_model_processor.model
processor = loaded_model_processor.processor

Expand Down
9 changes: 0 additions & 9 deletions src/helm/config/model_deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1357,15 +1357,6 @@ model_deployments:
client_spec:
class_name: "helm.clients.openai_client.OpenAIClient"

- name: openai/gpt-4-vision-preview-high-res
model_name: openai/gpt-4-vision-preview-high-res
tokenizer_name: openai/cl100k_base
max_sequence_length: 128000 # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
max_request_length: 128001
max_sequence_and_generated_tokens_length: 132096
client_spec:
class_name: "helm.clients.openai_client.OpenAIClient"

## Codex Models
# DEPRECATED: Codex models have been shut down on March 23 2023.

Expand Down
8 changes: 0 additions & 8 deletions src/helm/config/model_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1772,14 +1772,6 @@ models:
release_date: 2023-11-06
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]

- name: openai/gpt-4-vision-preview-high-res
display_name: GPT-4V high res (preview)
description: GPT-4V with "high res" mode enabled, which first allows the model to see the low res image and then creates detailed crops of input images as 512px squares based on the input image size.
creator_organization_name: OpenAI
access: limited
release_date: 2023-11-06
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]

## Codex Models
# DEPRECATED: Codex models have been shut down on March 23 2023.

Expand Down

0 comments on commit ded6d2c

Please sign in to comment.