jamesaasher
/

quickdraw-text-diffusion

Model card Files Files and versions

quickdraw-text-diffusion / config.py

jamesaasher's picture

Upload config.py with huggingface_hub

35bd2c2 verified about 1 month ago

history blame contribute delete

868 Bytes

	# Text-Conditional Diffusion Configuration
	IMAGE_SIZE = 64
	BATCH_SIZE = 128 # 32 per GPU across 4 GPUs
	NUM_EPOCHS = 100 # Longer training for better text conditioning
	LEARNING_RATE = 1e-4
	TIMESTEPS = 1000

	# Model architecture
	CHANNELS = 256 # Maximum capacity for best feature learning
	TIME_DIM = 128
	TEXT_DIM = 512 # CLIP embedding dimension

	# Text encoder
	CLIP_MODEL = "openai/clip-vit-base-patch32"
	FREEZE_CLIP = True # Freeze CLIP weights during training

	# Classifier-free guidance
	CFG_DROP_PROB = 0.15 # Increased to 15% for stronger conditioning contrast
	CFG_GUIDANCE_SCALE = 5.0 # Higher default guidance scale

	# Data
	DATASET_NAME = "Xenova/quickdraw-small"
	MAX_SAMPLES = None # Set to None to use all samples, or specify a number
	NUM_CLASSES_FILTER = 5 # Small set for clear proof-of-concept
	NUM_CLASSES = None # Will be set after loading dataset