This commit is contained in:
zcr
2026-03-17 11:29:47 +08:00
parent a6d9bac6d0
commit 06659057c3
26 changed files with 3742 additions and 0 deletions

View File

@@ -0,0 +1,71 @@
{
"models": {
"decoder": {
"name": "ElasticSLatRadianceFieldDecoder",
"args": {
"resolution": 64,
"model_channels": 768,
"latent_channels": 8,
"num_blocks": 12,
"num_heads": 12,
"mlp_ratio": 4,
"attn_mode": "swin",
"window_size": 8,
"use_fp16": true,
"representation_config": {
"rank": 16,
"dim": 8
}
}
}
},
"dataset": {
"name": "SLat2Render",
"args": {
"image_size": 512,
"latent_model": "dinov2_vitl14_reg_slat_enc_swin8_B_64l8_fp16",
"min_aesthetic_score": 4.5,
"max_num_voxels": 32768
}
},
"trainer": {
"name": "SLatVaeRadianceFieldDecoderTrainer",
"args": {
"max_steps": 1000000,
"batch_size_per_gpu": 4,
"batch_split": 2,
"optimizer": {
"name": "AdamW",
"args": {
"lr": 1e-4,
"weight_decay": 0.0
}
},
"ema_rate": [
0.9999
],
"fp16_mode": "inflat_all",
"fp16_scale_growth": 0.001,
"elastic": {
"name": "LinearMemoryController",
"args": {
"target_ratio": 0.75,
"max_mem_ratio_start": 0.5
}
},
"grad_clip": {
"name": "AdaptiveGradClipper",
"args": {
"max_norm": 1.0,
"clip_percentile": 95
}
},
"i_log": 500,
"i_sample": 10000,
"i_save": 10000,
"loss_type": "l1",
"lambda_ssim": 0.2,
"lambda_lpips": 0.2
}
}
}

View File

@@ -0,0 +1,105 @@
{
"models": {
"encoder": {
"name": "ElasticSLatEncoder",
"args": {
"resolution": 64,
"in_channels": 1024,
"model_channels": 768,
"latent_channels": 8,
"num_blocks": 12,
"num_heads": 12,
"mlp_ratio": 4,
"attn_mode": "swin",
"window_size": 8,
"use_fp16": true
}
},
"decoder": {
"name": "ElasticSLatGaussianDecoder",
"args": {
"resolution": 64,
"model_channels": 768,
"latent_channels": 8,
"num_blocks": 12,
"num_heads": 12,
"mlp_ratio": 4,
"attn_mode": "swin",
"window_size": 8,
"use_fp16": true,
"representation_config": {
"lr": {
"_xyz": 1.0,
"_features_dc": 1.0,
"_opacity": 1.0,
"_scaling": 1.0,
"_rotation": 0.1
},
"perturb_offset": true,
"voxel_size": 1.5,
"num_gaussians": 32,
"2d_filter_kernel_size": 0.1,
"3d_filter_kernel_size": 9e-4,
"scaling_bias": 4e-3,
"opacity_bias": 0.1,
"scaling_activation": "softplus"
}
}
}
},
"dataset": {
"name": "SparseFeat2Render",
"args": {
"image_size": 512,
"model": "dinov2_vitl14_reg",
"resolution": 64,
"min_aesthetic_score": 4.5,
"max_num_voxels": 32768
}
},
"trainer": {
"name": "SLatVaeGaussianTrainer",
"args": {
"max_steps": 1000000,
"batch_size_per_gpu": 4,
"batch_split": 2,
"optimizer": {
"name": "AdamW",
"args": {
"lr": 1e-4,
"weight_decay": 0.0
}
},
"ema_rate": [
0.9999
],
"fp16_mode": "inflat_all",
"fp16_scale_growth": 0.001,
"elastic": {
"name": "LinearMemoryController",
"args": {
"target_ratio": 0.75,
"max_mem_ratio_start": 0.5
}
},
"grad_clip": {
"name": "AdaptiveGradClipper",
"args": {
"max_norm": 1.0,
"clip_percentile": 95
}
},
"i_log": 500,
"i_sample": 10000,
"i_save": 10000,
"loss_type": "l1",
"lambda_ssim": 0.2,
"lambda_lpips": 0.2,
"lambda_kl": 1e-06,
"regularizations": {
"lambda_vol": 10000.0,
"lambda_opacity": 0.001
}
}
}
}