model-conversion : add device option to embd run orig model (#18386)

This commit refactors the original model embedding script to include a device selection option. Users can now specify the device (cpu, cuda, mps, auto) via command-line arguments. It also refactors the code to be more structured.
2025-12-29 13:37:02 +01:00
parent 0c8986403b
commit 7cbec34a63
1 changed files with 217 additions and 142 deletions
@@ -2,6 +2,7 @@

 import argparse
 import os
+import sys
 import numpy as np
 import importlib
 from pathlib import Path
@@ -9,32 +10,52 @@ from pathlib import Path
 from transformers import AutoTokenizer, AutoConfig, AutoModel
 import torch

-unreleased_model_name = os.getenv('UNRELEASED_MODEL_NAME')

-parser = argparse.ArgumentParser(description='Process model with specified path')
-parser.add_argument('--model-path', '-m', help='Path to the model')
-parser.add_argument('--prompts-file', '-p', help='Path to file containing prompts (one per line)')
-parser.add_argument('--use-sentence-transformers', action='store_true',
-                    help='Use SentenceTransformer to apply all numbered layers (01_Pooling, 02_Dense, 03_Dense, 04_Normalize)')
-args = parser.parse_args()
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='Run original embedding model')
+    parser.add_argument(
+        '--model-path',
+        '-m',
+        help='Path to the model'
+    )
+    parser.add_argument(
+        '--prompts-file',
+        '-p',
+        help='Path to file containing prompts (one per line)'
+    )
+    parser.add_argument(
+        '--use-sentence-transformers',
+        action='store_true',
+        help=('Use SentenceTransformer to apply all numbered layers '
+              '(01_Pooling, 02_Dense, 03_Dense, 04_Normalize)')
+    )
+    parser.add_argument(
+        '--device',
+        '-d',
+        help='Device to use (cpu, cuda, mps, auto)',
+        default='auto'
+    )
+    return parser.parse_args()

-def read_prompt_from_file(file_path):
-    try:
-        with open(file_path, 'r', encoding='utf-8') as f:
-            return f.read().strip()
-    except FileNotFoundError:
-        print(f"Error: Prompts file '{file_path}' not found")
-        exit(1)
-    except Exception as e:
-        print(f"Error reading prompts file: {e}")
-        exit(1)

-model_path = os.environ.get('EMBEDDING_MODEL_PATH', args.model_path)
-if model_path is None:
-    parser.error("Model path must be specified either via --model-path argument or EMBEDDING_MODEL_PATH environment variable")
-
-# Determine if we should use SentenceTransformer
-use_sentence_transformers = args.use_sentence_transformers or os.environ.get('USE_SENTENCE_TRANSFORMERS', '').lower() in ('1', 'true', 'yes')
+def load_model_and_tokenizer(model_path, use_sentence_transformers=False, device="auto"):
+    if device == "cpu":
+        device_map = {"": "cpu"}
+        print("Forcing CPU usage")
+    elif device == "auto":
+        # On Mac, "auto" device_map can cause issues with accelerate
+        # So we detect the best device manually
+        if torch.cuda.is_available():
+            device_map = {"": "cuda"}
+            print("Using CUDA")
+        elif torch.backends.mps.is_available():
+            device_map = {"": "mps"}
+            print("Using MPS (Apple Metal)")
+        else:
+            device_map = {"": "cpu"}
+            print("Using CPU")
+    else:
+        device_map = {"": device}

    if use_sentence_transformers:
        from sentence_transformers import SentenceTransformer
@@ -44,7 +65,6 @@ if use_sentence_transformers:
        config = model[0].auto_model.config  # type: ignore
    else:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
-
        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)

        # This can be used to override the sliding window size for manual testing. This
@@ -52,9 +72,9 @@ else:
        # and compare it with the converted .gguf model.
        if hasattr(config, 'sliding_window'):
            original_sliding_window = config.sliding_window
-        #original_sliding_window = 6
            print(f"Modified sliding window: {original_sliding_window} -> {config.sliding_window}")

+        unreleased_model_name = os.getenv('UNRELEASED_MODEL_NAME')
        print(f"Using unreleased model: {unreleased_model_name}")
        if unreleased_model_name:
            model_name_lower = unreleased_model_name.lower()
@@ -64,32 +84,81 @@ else:

            try:
                model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
-            model = model_class.from_pretrained(model_path, config=config, trust_remote_code=True)
+                model = model_class.from_pretrained(
+                    model_path,
+                    device_map=device_map,
+                    offload_folder="offload",
+                    trust_remote_code=True,
+                    config=config
+                )
            except (ImportError, AttributeError) as e:
                print(f"Failed to import or load model: {e}")
-            exit(1)
+                sys.exit(1)
        else:
-        model = AutoModel.from_pretrained(model_path, config=config, trust_remote_code=True)
+            model = AutoModel.from_pretrained(
+                model_path,
+                device_map=device_map,
+                offload_folder="offload",
+                trust_remote_code=True,
+                config=config
+            )
        print(f"Model class: {type(model)}")
        print(f"Model file: {type(model).__module__}")

        # Verify the model is using the correct sliding window
-if not use_sentence_transformers:
        if hasattr(model.config, 'sliding_window'):  # type: ignore
            print(f"Model's sliding_window: {model.config.sliding_window}")  # type: ignore
        else:
            print("Model config does not have sliding_window attribute")

+    return model, tokenizer, config
+
+
+def get_prompt(args):
+    if args.prompts_file:
+        try:
+            with open(args.prompts_file, 'r', encoding='utf-8') as f:
+                return f.read().strip()
+        except FileNotFoundError:
+            print(f"Error: Prompts file '{args.prompts_file}' not found")
+            sys.exit(1)
+        except Exception as e:
+            print(f"Error reading prompts file: {e}")
+            sys.exit(1)
+    else:
+        return "Hello world today"
+
+
+def main():
+    args = parse_arguments()
+
+    model_path = os.environ.get('EMBEDDING_MODEL_PATH', args.model_path)
+    if model_path is None:
+        print("Error: Model path must be specified either via --model-path argument "
+              "or EMBEDDING_MODEL_PATH environment variable")
+        sys.exit(1)
+
+    # Determine if we should use SentenceTransformer
+    use_st = (
+        args.use_sentence_transformers or os.environ.get('USE_SENTENCE_TRANSFORMERS', '').lower() in ('1', 'true', 'yes')
+    )
+
+    model, tokenizer, config = load_model_and_tokenizer(model_path, use_st, args.device)
+
+    # Get the device the model is on
+    if not use_st:
+        device = next(model.parameters()).device
+    else:
+        # For SentenceTransformer, get device from the underlying model
+        device = next(model[0].auto_model.parameters()).device  # type: ignore
+
    model_name = os.path.basename(model_path)

-if args.prompts_file:
-    prompt_text = read_prompt_from_file(args.prompts_file)
+    prompt_text = get_prompt(args)
    texts = [prompt_text]
-else:
-    texts = ["Hello world today"]

    with torch.no_grad():
-    if use_sentence_transformers:
+        if use_st:
            embeddings = model.encode(texts, convert_to_numpy=True)
            all_embeddings = embeddings  # Shape: [batch_size, hidden_size]

@@ -120,6 +189,8 @@ with torch.no_grad():
            for i, (token_id, token_str) in enumerate(zip(tokens, token_strings)):
                print(f"{token_id:6d} -> '{token_str}'")

+            # Move inputs to the same device as the model
+            encoded = {k: v.to(device) for k, v in encoded.items()}
            outputs = model(**encoded)
            hidden_states = outputs.last_hidden_state  # Shape: [batch_size, seq_len, hidden_size]

@@ -175,3 +246,7 @@ with torch.no_grad():
        print("")
        print(f"Saved bin embeddings to: {bin_filename}")
        print(f"Saved txt embeddings to: {txt_filename}")
+
+
+if __name__ == "__main__":
+    main()