Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # グローバル変数として保持 | |
| _model = None | |
| _tokenizer = None | |
| def get_model(): | |
| global _model, _tokenizer | |
| if _model is None: | |
| print("Loading Qwen2.5-3B-Instruct... (Lazy Loading)") | |
| model_name = "Qwen/Qwen2.5-3B-Instruct" | |
| # model_name = "Qwen/Qwen2.5-1.5B-Instruct" # はやい | |
| _tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # --- 修正箇所: データ型の決定ロジックを安全にする --- | |
| dtype = torch.float32 # 基本は float32 (約12GB使用、16GBメモリなら入るはず) | |
| if torch.cuda.is_available(): | |
| dtype = torch.bfloat16 | |
| # torch.cpu に is_bf16_supported があるか確認してから使う | |
| elif hasattr(torch.cpu, "is_bf16_supported") and torch.cpu.is_bf16_supported(): | |
| dtype = torch.bfloat16 | |
| # ----------------------------------------------- | |
| _model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| dtype=dtype, | |
| trust_remote_code=True, | |
| ) | |
| print(f"Model Loaded! (dtype: {dtype})") | |
| return _model, _tokenizer | |