|
| 1 | +name: "imagecondition" |
| 2 | +tag: "${rmspace:${system.prompt_processor.prompt},_}" |
| 3 | +exp_root_dir: "outputs" |
| 4 | +seed: 0 |
| 5 | + |
| 6 | +data_type: "single-image-datamodule" |
| 7 | +data: |
| 8 | + image_path: ./load/images/hamburger_rgba.png |
| 9 | + height: 256 |
| 10 | + width: 256 |
| 11 | + default_elevation_deg: 0.0 |
| 12 | + default_azimuth_deg: 0.0 |
| 13 | + default_camera_distance: 3.8 |
| 14 | + default_fovy_deg: 20.0 |
| 15 | + random_camera: |
| 16 | + batch_size: 4 |
| 17 | + height: 256 |
| 18 | + width: 256 |
| 19 | + eval_height: 512 |
| 20 | + eval_width: 512 |
| 21 | + eval_batch_size: 1 |
| 22 | + elevation_range: [-10, 80] |
| 23 | + azimuth_range: [-180, 180] |
| 24 | + camera_distance_range: [3.8, 3.8] |
| 25 | + fovy_range: [20.0, 20.0] # Zero123 has fixed fovy |
| 26 | + progressive_until: 0 |
| 27 | + camera_perturb: 0.0 |
| 28 | + center_perturb: 0.0 |
| 29 | + up_perturb: 0.0 |
| 30 | + light_position_perturb: 1.0 |
| 31 | + light_distance_range: [7.5, 10.0] |
| 32 | + eval_elevation_deg: ${data.default_elevation_deg} |
| 33 | + eval_camera_distance: ${data.default_camera_distance} |
| 34 | + eval_fovy_deg: ${data.default_fovy_deg} |
| 35 | + light_sample_strategy: "dreamfusion" |
| 36 | + batch_uniform_azimuth: False |
| 37 | + n_val_views: 30 |
| 38 | + n_test_views: 120 |
| 39 | + |
| 40 | +system_type: "image-condition-dreamfusion-system" |
| 41 | +system: |
| 42 | + geometry_type: "implicit-volume" |
| 43 | + geometry: |
| 44 | + radius: 2.0 |
| 45 | + normal_type: "analytic" |
| 46 | + |
| 47 | + # the density initialization proposed in the DreamFusion paper |
| 48 | + # does not work very well |
| 49 | + # density_bias: "blob_dreamfusion" |
| 50 | + # density_activation: exp |
| 51 | + # density_blob_scale: 5. |
| 52 | + # density_blob_std: 0.2 |
| 53 | + |
| 54 | + # use Magic3D density initialization instead |
| 55 | + density_bias: "blob_magic3d" |
| 56 | + density_activation: softplus |
| 57 | + density_blob_scale: 10. |
| 58 | + density_blob_std: 0.5 |
| 59 | + |
| 60 | + # coarse to fine hash grid encoding |
| 61 | + # to ensure smooth analytic normals |
| 62 | + pos_encoding_config: |
| 63 | + otype: HashGrid |
| 64 | + n_levels: 16 |
| 65 | + n_features_per_level: 2 |
| 66 | + log2_hashmap_size: 19 |
| 67 | + base_resolution: 16 |
| 68 | + per_level_scale: 1.447269237440378 # max resolution 4096 |
| 69 | + mlp_network_config: |
| 70 | + otype: "VanillaMLP" |
| 71 | + activation: "ReLU" |
| 72 | + output_activation: "none" |
| 73 | + n_neurons: 64 |
| 74 | + n_hidden_layers: 2 |
| 75 | + |
| 76 | + material_type: "diffuse-with-point-light-material" |
| 77 | + material: |
| 78 | + ambient_only_steps: 100000 |
| 79 | + textureless_prob: 0.05 |
| 80 | + albedo_activation: sigmoid |
| 81 | + |
| 82 | + background_type: "neural-environment-map-background" |
| 83 | + background: |
| 84 | + color_activation: sigmoid |
| 85 | + |
| 86 | + renderer_type: "nerf-volume-renderer" |
| 87 | + renderer: |
| 88 | + radius: ${system.geometry.radius} |
| 89 | + num_samples_per_ray: 512 |
| 90 | + return_comp_normal: ${gt0:${system.loss.lambda_normal_smooth}} |
| 91 | + return_normal_perturb: ${gt0:${system.loss.lambda_3d_normal_smooth}} |
| 92 | + |
| 93 | + prompt_processor_type: "stable-diffusion-prompt-processor" |
| 94 | + prompt_processor: |
| 95 | + pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" |
| 96 | + prompt: "a DSLR photo of a delicious hamburger" |
| 97 | + |
| 98 | + guidance_type: "stable-diffusion-guidance" |
| 99 | + guidance: |
| 100 | + pretrained_model_name_or_path: "runwayml/stable-diffusion-v1-5" |
| 101 | + guidance_scale: 7.5 |
| 102 | + min_step_percent: 0.2 |
| 103 | + # min_step_percent: [0, 0.66, 0.33, 2000] # (start_iter, start_val, end_val, end_iter) |
| 104 | + max_step_percent: 0.6 |
| 105 | + # max_step_percent: [0, 0.98, 0.66, 2000] |
| 106 | + |
| 107 | + # prompt_processor_type: "deep-floyd-prompt-processor" |
| 108 | + # prompt_processor: |
| 109 | + # pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" |
| 110 | + # prompt: "a DSLR photo of a delicious hamburger" |
| 111 | + |
| 112 | + # guidance_type: "deep-floyd-guidance" |
| 113 | + # guidance: |
| 114 | + # pretrained_model_name_or_path: "DeepFloyd/IF-I-XL-v1.0" |
| 115 | + # guidance_scale: 7.5 |
| 116 | + # min_step_percent: 0.2 |
| 117 | + # # min_step_percent: [0, 0.66, 0.33, 2000] # (start_iter, start_val, end_val, end_iter) |
| 118 | + # max_step_percent: 0.6 |
| 119 | + # # max_step_percent: [0, 0.98, 0.66, 2000] |
| 120 | + |
| 121 | + freq: |
| 122 | + ref_only_steps: 0 |
| 123 | + guidance_eval: 13 |
| 124 | + |
| 125 | + loggers: |
| 126 | + wandb: |
| 127 | + enable: false |
| 128 | + project: 'threestudio' |
| 129 | + name: None |
| 130 | + |
| 131 | + loss: |
| 132 | + lambda_sds: 0.1 |
| 133 | + lambda_rgb: 400.0 |
| 134 | + lambda_mask: 50.0 |
| 135 | + lambda_depth: 0.05 |
| 136 | + lambda_normal_smooth: 2.0 |
| 137 | + lambda_3d_normal_smooth: 5.0 |
| 138 | + lambda_orient: 0.01 |
| 139 | + lambda_sparsity: 0.01 |
| 140 | + lambda_opaque: 0.05 |
| 141 | + |
| 142 | + optimizer: |
| 143 | + name: Adan |
| 144 | + args: |
| 145 | + lr: 0.005 |
| 146 | + max_grad_norm: 5.0 |
| 147 | + eps: 1.e-8 |
| 148 | + weight_decay: 1e-5 |
| 149 | + params: |
| 150 | + geometry: |
| 151 | + lr: ${system.optimizer.args.lr} |
| 152 | + background: |
| 153 | + lr: 0.0 |
| 154 | + |
| 155 | +trainer: |
| 156 | + max_steps: 2000 |
| 157 | + log_every_n_steps: 1 |
| 158 | + num_sanity_val_steps: 0 |
| 159 | + val_check_interval: 20 |
| 160 | + enable_progress_bar: true |
| 161 | + precision: 16-mixed |
| 162 | + |
| 163 | +checkpoint: |
| 164 | + save_last: true # save at each validation time |
| 165 | + save_top_k: -1 |
| 166 | + every_n_train_steps: 20 # ${trainer.max_steps} |
0 commit comments