generate_conditioned.py

"""
Conditional text generation with the auto-regressive models of the HuggingFace Transformers repository.
"""
import sys
import os
import os
os.environ["CUDA_VISIBLE_DEVICES"]="4"
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

import typing as t
import argparse
import logging
import pathlib
import json
import re

import pandas as pd
import torch
import warnings
from tqdm import tqdm
from transformers import AutoTokenizer

from selfcond.generation import force_units_hooks, set_seed
from selfcond.models import PytorchTransformersModel

logging.getLogger("transformers.tokenization_utils").setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

# the following number is exact number of neurons. If you want to use top/bottom neurons, you should use 100, 
# if you want to use both top and bottom, you should use 200, and set neuron_group to both.

NEURON_NUMBER = 3000
RELATION_LIST = [
        "person_occupation", 
        "person_pro_sport",
        "person_mother",
        "product_company",
        "company_ceo",
        "landmark_country", 
        "person_sport_position",
        "company_hq",
        "star_constellation",
        "landmark_continent",
        "person_father",
        "person_plays_instrument"
    ]

def argument_parser(prev_args: t.Optional[str] = None):
    parser = argparse.ArgumentParser(prev_args)
    parser.add_argument(
        "--model-name",
        default="llama-2-7b-hf",  # Set default model to llama-2-7b-hf
        type=str,
        help="Path to pre-trained model or shortcut name from the HuggingFace Transformers.",
    )
    parser.add_argument(
        "--cache-dir",
        type=pathlib.Path,
        help="Models cached directory.",
        required=False,
        default=None,
    )
    parser.add_argument(
        "--test-data",
        type=str,
        default="datasets/LRE/lre_prompts_zeroshot/test_data/company_ceo_prompts_excluded.json",
        help="Path to the JSON file containing test prompts and answers.",
        required=False,
    )
    parser.add_argument(
        "--test-data-folder",
        type=str,
        default="datasets/LRE/lre_prompts_zeroshot/test_data",
        help="Path to the folder containing test data files.",
    )
    parser.add_argument(
        "--train-data-folder",
        type=str,
        default="datasets/LRE/lre_prompts_zeroshot",
        help="Path to the folder containing test data files.",
    )
    parser.add_argument(
        "--compute-all",
        action="store_true",
        help="If set, the model will be tested on all data in the test data folder.",
    )
    parser.add_argument(
        "--neuron_group",
        type=str,
        choices=["top", "bottom", "both"],
        required=True,
        help='Neuron group to use: "top", "bottom", or "both".',
    )
    parser.add_argument(
        "--length", type=int, default=2, help="Number of new tokens to be generated."
    )
    parser.add_argument(
        "--temperature",
        type=float,
        default=0.0,
        help="Logits softmax temperature (not used when do_sample=False).",
    )
    parser.add_argument(
        "--top-k",
        type=int,
        default=0,
        help="Top-k tokens taken into account at generation (not used when do_sample=False).",
    )
    parser.add_argument(
        "--top-p",
        type=float,
        default=1.0,
        help=(
            "Only those tokens whose probabilities add up to top_p are "
            "taken into account for generation (not used when do_sample=False)."
        ),
    )
    parser.add_argument("--device", type=str, required=False, default="cuda:0")
    parser.add_argument(
        "--seed",
        type=int,
        default=[1509],
        nargs="*",
        help=(
            "Random seed for initialization. If 2 seeds are passed, all seeds in between are swept."
        ),
    )
    parser.add_argument(
        "--metric",
        type=str,
        default="ap",
        help="Metric to use to rank experts for generation.",
    )
    parser.add_argument(
        "--forcing",
        type=str,
        nargs="*",
        default=["off_mean"],  # Set default forcing value to 0, this parameter can be "zero" or a float
        help="Forcing value.",
    )
    parser.add_argument(
        "--num-units",
        type=int,
        default=[0, NEURON_NUMBER],  # Include 0 to test without neuron forcing
        nargs="+",
        help=(
            "Number of neurons (top experts in terms of --metric) to be intervened on during"
            " generation. Include 0 to test without neuron forcing."
        ),
    )
    parser.add_argument(
        "--top-n",
        type=int,
        nargs="+",
        default=[
            1,
        ],
        help=(
            "Which set of top units to use. If set to 1, units from [0, --num-units] are used. "
            "If set to 2, units from [--num-units, 2*--num-units] are used. And so on. "
            "If set to 0, --num-units random units are selected."
        ),
    )
    parser.add_argument(
        "--per-layer",
        action="store_true",
        help="If set, force --num-units per layer at a time.",
    )

    parser.add_argument(
        "--eos", action="store_true", help="Trim the sentence if EOS is generated."
    )
    parser.add_argument("--verbose", action="store_true", help="Show more information")
    parser.add_argument("--no-save", action="store_true", help="If set, nothing is saved.")
    parser.add_argument(
        "--only-last-token",
        action="store_true",
        help="If set, only the last token of the sequence is intervened upon.",
    )
    parser.add_argument(
        "--show-overlap",
        action="store_true",
        help="If set, will use one expertise file for forcing other relation neurons.",
    )
    parser.add_argument(
        "--train_set",
        action="store_true",
        help="If set, will calculate the accuruacy on the train set."
    )
    parser.add_argument(
        "--neuron_type",
        default='all',
        type=str,
    )
    parser.add_argument(
        "--random",
        action="store_true",
        help="If set, num-units neurons will be randomly selected"
    )
    
    return parser.parse_args()


def load_test_data(file_path: str):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data


def is_correct_answer(model_response: str, expected_answer: str) -> bool:
    """Check if the model's response contains the expected answer."""
    # Simple match ignoring case and leading/trailing whitespace
    return expected_answer.lower().startswith(model_response.lower().strip().rstrip(".")) and model_response != ""


def generate_response(prompt: str, model, tokenizer, device, max_new_tokens=2) -> str:
    """Generate a response from the model."""
    # Encode the prompt and get attention mask
    
    if model.config.pad_token_id is None:
        model.config.pad_token_id = tokenizer.pad_token_id
    encoding = tokenizer(prompt, return_tensors="pt").to(device)
    input_ids = encoding["input_ids"]
    attention_mask = encoding["attention_mask"]
    model.eval()

    with torch.no_grad():
        output_ids = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,  # Pass attention mask
            max_new_tokens=max_new_tokens,
            num_beams=1,  # Greedy search
            do_sample=False,  # Disable sampling for deterministic output
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    response = generated_text[len(prompt):].strip()
    response = re.sub(r'^[^\w]*', '', response)  # remove prefix non-alphanumeric characters
    
    return response


def generate(args, relation: str = None):
    assert len(args.seed) in [1, 2]

    if args.device is None:
        device ="cuda" if torch.cuda.is_available() else "cpu"
    else:
        device = args.device

    n_gpu = torch.cuda.device_count() if device != "cpu" else 0
    print(f"Device {device} ({n_gpu})")

    extrain_info_of_use_trainset = ""
    # Prepare test data files
    if args.compute_all:
        # Get all JSON files in the test data folder
        if args.train_set:
            print("Calculating the accuracy on the train set ...")
            test_data_files = [
                os.path.join(args.train_data_folder, f)
                for f in os.listdir(args.train_data_folder)
                if f.endswith(".json")
            ]
            extrain_info_of_use_trainset += "_train"
        else:
            print("Calculating the accuracy on the test set ...")
            test_data_files = [
                os.path.join(args.test_data_folder, f)
                for f in os.listdir(args.test_data_folder)
                if f.endswith(".json")
            ]
        if not test_data_files:
            raise FileNotFoundError(f"No JSON files found in folder {args.test_data_folder}")
    else:
        if not args.test_data:
            raise ValueError("You must provide --test-data when --compute-all is not set.")
        test_data_files = [args.test_data]
        
    if "llama-2-7b" in args.model_name:
        m_name = "meta-llama/Llama-2-7b-hf"
    elif "llama-2-13b" in args.model_name:
        m_name = "meta-llama/Llama-2-13b-hf"
    elif "llama-2-70b" in args.model_name:
        m_name = "meta-llama/Llama-2-70b-hf"
    elif "xglm-564m" in args.model_name:
        m_name = "facebook/xglm-564M"
    elif "xglm-1.7b" in args.model_name:
        m_name = "facebook/xglm-1.7B"
    elif "xglm-2.9b" in args.model_name:
        m_name = "facebook/xglm-2.9B"
    elif "bloom-560m" in args.model_name:
        m_name = "bigscience/bloom-560m"
    elif "bloom-1.7b" in args.model_name:
        m_name = "bigscience/bloomz-1b7"
    elif "bloom-3b" in args.model_name:
        m_name = "bigscience/bloom-3b"
    else:
        raise ValueError(f"Model name {args.model_name} not implemented")

    # Load model and tokenizer once
    tokenizer = AutoTokenizer.from_pretrained(m_name, cache_dir=args.cache_dir)
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

    readable_model = PytorchTransformersModel(
        model_name=m_name,
        seq_len=128,
        cache_dir=args.cache_dir,
        device=device,
    )

    generation_results = []
    accuracy_results = {}
    accuracys = {}

    for test_file in test_data_files:
        
        test_file_name = os.path.basename(test_file).replace(".json", "")
        # Extract concept from test file name
        # Assuming the test file name is like 'company_ceo_prompts_excluded.json'
        concept_match = re.match(r"(.*)_prompts", test_file_name)
        if concept_match:
            concept = concept_match.group(1)
        else:
            concept = test_file_name
            
        # Load test data
        test_data = load_test_data(test_file)
        
        # if we use the train data, we only consider those used for neuron identification
        if args.train_set:
            
            with open(f"./assets/Relation/sense/{args.model_name}/{concept}.json", "r", encoding="utf-8") as f:
                used_data = json.load(f)
            
            used_prompts = used_data['sentences']['positive']
            real_test_data = []
            for d in test_data:
                if d['prompt'] in used_prompts:
                    real_test_data.append(d)
            test_data = real_test_data 
            
        total_questions = len(test_data)
        
        extra_info = ''
        if args.neuron_type != 'all':
            extra_info += f"_{args.neuron_type}"
        # Construct the expertise file path based on neuron_group, model, and concept
            
        if relation is None:
            if args.random:
                print(f"Using expertise file (random neurons) for concept: {concept}")
                expertise_file = f"mid_output/Relation/{args.model_name}/sense/{concept}/expertise/"
                expertise_file += f"expertise_random_{NEURON_NUMBER}{extra_info}.csv"
                print(f"Using expertise file for concept: {concept}")
            else:
                expertise_file = f"mid_output/Relation/{args.model_name}/sense/{concept}/expertise/"
                if args.neuron_group == "top":
                    expertise_file += f"expertise_limited_{NEURON_NUMBER}_top{extra_info}.csv"
                elif args.neuron_group == "bottom":
                    expertise_file += f"expertise_limited_{NEURON_NUMBER}_bottom{extra_info}.csv"
                elif args.neuron_group == "both":
                    expertise_file += f"expertise_limited_{2*NEURON_NUMBER}_both{extra_info}.csv"
                else:
                    raise ValueError("Invalid neuron_group. Must be 'top', 'bottom', or 'both'.")
        else:
            if args.random:
                print(f"Using expertise file (random neurons) from relation: {relation}")
                expertise_file = f"mid_output/Relation/{args.model_name}/sense/{relation}/expertise/"
                expertise_file += f"expertise_random_{NEURON_NUMBER}{extra_info}.csv"
            else:
                print(f"Using expertise from relation: {relation}")
                expertise_file = f"mid_output/Relation/{args.model_name}/sense/{relation}/expertise/"
                if args.neuron_group == "top":
                    expertise_file += f"expertise_limited_{NEURON_NUMBER}_top{extra_info}.csv"
                elif args.neuron_group == "bottom":
                    expertise_file += f"expertise_limited_{NEURON_NUMBER}_bottom{extra_info}.csv"
                elif args.neuron_group == "both":
                    expertise_file += f"expertise_limited_{2*NEURON_NUMBER}_both{extra_info}.csv"
                else:
                    raise ValueError("Invalid neuron_group. Must be 'top', 'bottom', or 'both'.")

        # Check if the expertise file exists
        if not os.path.exists(expertise_file):
            raise FileNotFoundError(f"Expertise file not found: {expertise_file}")

        # Load expertise data
        expertise = pd.read_csv(expertise_file)

        # Construct the results file path based on neuron_group, model, and concept
        if args.show_overlap:
            if args.random:
                results_dir = f"outputs/{args.model_name}/{relation}/random_results_{NEURON_NUMBER}{extra_info}_{args.forcing[0]}{extrain_info_of_use_trainset}"
            else:
                results_dir = f"outputs/{args.model_name}/{relation}/{args.neuron_group}_results_{NEURON_NUMBER}{extra_info}_{args.forcing[0]}{extrain_info_of_use_trainset}"
        else:
            if args.random:
                results_dir = f"outputs/{args.model_name}/{concept}/random_results_{NEURON_NUMBER}{extra_info}_{args.forcing[0]}{extrain_info_of_use_trainset}"
            else:
                results_dir = f"outputs/{args.model_name}/{concept}/{args.neuron_group}_results_{NEURON_NUMBER}{extra_info}_{args.forcing[0]}{extrain_info_of_use_trainset}"
        os.makedirs(results_dir, exist_ok=True)
        results_file = os.path.join(results_dir, f"{concept}_accuracy_results.csv")

        layer_names = (
            list(expertise.sort_values("layer").layer.unique())
            if args.per_layer
            else [
                None,
            ]
        )
        forcing_values = args.forcing

        print(expertise)
        
        for forcing_value in forcing_values:
            for top_n in args.top_n:
                for force_layer in layer_names:
                    for num_units in args.num_units:
                        # Set units to forcing value
                        mean_metric = 0.0 # means no forcing
                        if num_units > 0:
                            model_with_hooks, df_force = force_units_hooks(
                                model=readable_model,
                                expertise=expertise,
                                value=forcing_value,
                                metric=args.metric,
                                num_units=num_units,
                                top_n=top_n,
                                use_layers=force_layer,
                                only_last_token=args.only_last_token,
                            )
                            mean_metric = float(df_force[args.metric].mean())
                        else:
                            model_with_hooks = readable_model
                            mean_metric = 0.0 # means no forcing

                        correct_answers = 0

                        # Initialize the progress bar here
                        pbar = tqdm(
                            total=total_questions,
                            desc=(
                                f"Processing {test_file_name} "
                                f"[force={forcing_value} units={num_units}/{len(expertise)} ({100 * num_units / len(expertise):0.3f}%)"
                                f" top_n={top_n} layers={force_layer}]"
                            ),
                        )
                        for item in test_data:
                            prompt = item["prompt"]
                            expected_answer = item["answer"]
                            prompt = re.sub(r'^Prompt:\s*', '', prompt).strip()
                            expected_answer = expected_answer.strip()

                            # Random seed for reproducibility
                            seed = 1509
                            set_seed(seed, gpu=device != "cpu")

                            # Generate model's response
                            model_response = generate_response(
                                prompt=prompt,
                                model=model_with_hooks.module,
                                tokenizer=tokenizer,
                                device=device,
                                max_new_tokens=args.length,
                            )

                            # Check if the model's response contains the expected answer
                            correct = is_correct_answer(model_response, expected_answer)
                            if correct:
                                correct_answers += 1

                            # Store generation results
                            generation_results.append(
                                {
                                    "test_file": test_file_name,
                                    "concept": concept,
                                    "neuron_group": args.neuron_group,
                                    "forcing_value": forcing_value,
                                    "num_units": num_units,
                                    "neuron_forcing_applied": num_units > 0,
                                    "top_n": top_n,
                                    "seed": seed,
                                    "prompt": prompt,
                                    "expected_answer": expected_answer,
                                    "model_response": model_response,
                                    "correct": correct,
                                    "mean_metric": mean_metric,
                                    "forced_layer": force_layer,
                                }
                            )
                            pbar.update()

                        # Compute accuracy
                        accuracy = correct_answers / total_questions
                        print(
                            f"\nAccuracy on {test_file_name}: {accuracy * 100:.2f}% with forcing_value={forcing_value}, num_units={num_units}"
                        )
                        accuracys[
                            f"{concept}_{args.neuron_group}_v:{forcing_value}_n:{num_units}"
                        ] = accuracy

                        # Restore units to the original values
                        if num_units > 0:
                            readable_model.restore_units()

                        pbar.close()

        # Save results
        generated_df = pd.DataFrame(generation_results)
        # Save the DataFrame to the results file
        generated_df.to_csv(results_file, index=False)
        print(f"Results saved to {results_file}")
        # Clear generation_results for the next test file
        generation_results.clear()
        accuracy_results[f"expertise: {relation}"] = accuracys
        if args.random:
            json.dump(accuracy_results, open(f"outputs/{args.model_name}/{relation}/random_results_{NEURON_NUMBER}{extra_info}_{args.forcing[0]}{extrain_info_of_use_trainset}/accuracy_results.json", "w"), indent=4)
        else:
            json.dump(accuracy_results, open(f"outputs/{args.model_name}/{relation}/{args.neuron_group}_results_{NEURON_NUMBER}{extra_info}_{args.forcing[0]}{extrain_info_of_use_trainset}/accuracy_results.json", "w"), indent=4)


if __name__ == "__main__":
    args = argument_parser()
    if args.show_overlap:
        RELATION_LIST = sorted(RELATION_LIST)
        for relation in RELATION_LIST:
            generate(args, relation=relation)
    else:
        generate(args)
        
# python generate_conditioned.py --model-name llama-2-7b-hf --compute-all --neuron_group top --forcing zero --show-overlap --random --train_set
# python generate_conditioned.py --model-name llama-2-7b-hf --compute-all --neuron_group top --forcing zero --show-overlap --neuron_type self_attn