Spaces:

yuto0o
/

django-ai-chat

Sleeping

django-ai-chat / ml_api /model_loader.py

yuto0o

性能検査

dd95ee1 8 days ago

1.21 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# グローバル変数として保持
	_model = None
	_tokenizer = None


	def get_model():
	global _model, _tokenizer

	if _model is None:
	print("Loading Qwen2.5-3B-Instruct... (Lazy Loading)")
	model_name = "Qwen/Qwen2.5-3B-Instruct"
	# model_name = "Qwen/Qwen2.5-1.5B-Instruct" # はやい

	_tokenizer = AutoTokenizer.from_pretrained(model_name)

	# --- 修正箇所: データ型の決定ロジックを安全にする ---
	dtype = torch.float32 # 基本は float32 (約12GB使用、16GBメモリなら入るはず)

	if torch.cuda.is_available():
	dtype = torch.bfloat16
	# torch.cpu に is_bf16_supported があるか確認してから使う
	elif hasattr(torch.cpu, "is_bf16_supported") and torch.cpu.is_bf16_supported():
	dtype = torch.bfloat16
	# -----------------------------------------------

	_model = AutoModelForCausalLM.from_pretrained(
	model_name,
	dtype=dtype,
	trust_remote_code=True,
	)
	print(f"Model Loaded! (dtype: {dtype})")

	return _model, _tokenizer