nvdiffrec
/
index.html


<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>

<script src="http://www.google.com/jsapi" type="text/javascript"></script>
<script type="text/javascript">google.load("jquery", "1.3.2");</script>

<style type="text/css">
body {
    font-family: "Titillium Web", "HelveticaNeue-Light", "Helvetica Neue Light", "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif;
    font-weight: 300;
    font-size: 17px;
    margin-left: auto;
    margin-right: auto;
    width: 980px;
}
h1 {
    font-weight:300;
    line-height: 1.15em;
}

h2 {
    font-size: 1.75em;
}
a:link,a:visited {
    color: #1367a7;
    text-decoration: none;
}
a:hover {
    color: #208799;
}
h1, h2, h3 {
    text-align: center;
}
h1 {
    font-size: 40px;
    font-weight: 500;
}
h2 {
    font-weight: 400;
    margin: 16px 0px 4px 0px;
}
.paper-title {
    padding: 16px 0px 16px 0px;
}
section {
    margin: 32px 0px 32px 0px;
    text-align: justify;
    clear: both;
}
.col-6 {
     width: 16.6%;
     float: left;
}
.col-5 {
     width: 20%;
     float: left;
}
.col-4 {
     width: 25%;
     float: left;
}
.col-3 {
     width: 33%;
     float: left;
}
.col-2 {
     width: 50%;
     float: left;
}
.row, .author-row, .affil-row {
     overflow: auto;
}
.author-row, .affil-row {
    font-size: 20px;
}
.row {
    margin: 16px 0px 16px 0px;
}
.authors {
    font-size: 18px;
}
.affil-row {
    margin-top: 16px;
}
.teaser {
    max-width: 100%;
}
.text-center {
    text-align: center;
}
.screenshot {
    width: 256px;
    border: 1px solid #ddd;
}
.screenshot-el {
    margin-bottom: 16px;
}
hr {
    height: 1px;
    border: 0;
    border-top: 1px solid #ddd;
    margin: 0;
}
.material-icons {
    vertical-align: -6px;
}
p {
    line-height: 1.25em;
}
.caption {
    font-size: 16px;
    /*font-style: italic;*/
    color: #666;
    text-align: left;
    margin-top: 8px;
    margin-bottom: 8px;
}
video {
    display: block;
    margin: auto;
}
figure {
    display: block;
    margin: auto;
    margin-top: 10px;
    margin-bottom: 10px;
}
#bibtex pre {
    font-size: 14px;
    background-color: #eee;
    padding: 16px;
}
.blue {
    color: #2c82c9;
    font-weight: bold;
}
.orange {
    color: #d35400;
    font-weight: bold;
}
.flex-row {
    display: flex;
    flex-flow: row wrap;
    justify-content: space-around;
    padding: 0;
    margin: 0;
    list-style: none;
}
.paper-btn {
  position: relative;
  text-align: center;

  display: inline-block;
  margin: 8px;
  padding: 8px 8px;

  border-width: 0;
  outline: none;
  border-radius: 2px;

  background-color: #1367a7;
  color: #ecf0f1 !important;
  font-size: 20px;
  width: 100px;
  font-weight: 600;
}

.supp-btn {
  position: relative;
  text-align: center;

  display: inline-block;
  margin: 8px;
  padding: 8px 8px;

  border-width: 0;
  outline: none;
  border-radius: 2px;

  background-color: #1367a7;
  color: #ecf0f1 !important;
  font-size: 20px;
  width: 150px;
  font-weight: 600;
}

.paper-btn-parent {
    display: flex;
    justify-content: center;
    margin: 16px 0px;
}
.paper-btn:hover {
    opacity: 0.85;
}
.container {
    margin-left: auto;
    margin-right: auto;
    padding-left: 16px;
    padding-right: 16px;
}
.venue {
    color: #1367a7;
}


.topnav {
  overflow: hidden;
  background-color: #EEEEEE;
}

.topnav a {
  float: left;
  color: black;
  text-align: center;
  padding: 14px 16px;
  text-decoration: none;
  font-size: 16px;
}


</style>


<div class="topnav" id="myTopnav">
  <a href="https://www.nvidia.com/"><img width="100%" src="assets/nvidia.svg"></a>
  <a href="https://www.nvidia.com/en-us/research/" ><strong>NVIDIA Research</strong></a>
  <a href="https://nv-tlabs.github.io/" ><strong>Toronto AI Lab</strong></a>
</div>

<!-- End : Google Analytics Code -->
<script type="text/javascript" src="../js/hidebib.js"></script>
<link href='https://fonts.googleapis.com/css?family=Titillium+Web:400,600,400italic,600italic,300,300italic' rel='stylesheet' type='text/css'>
<head>
    <title>Extracting Triangular 3D Models, Materials, and Lighting From Images</title>
    <meta property="og:description" content="Extracting Triangular 3D Models, Materials, and Lighting From Images"/>
    <link href="https://fonts.googleapis.com/css2?family=Material+Icons" rel="stylesheet">

<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-6HHDEXF452"></script>
<script>
    window.dataLayer = window.dataLayer || [];
    function gtag(){dataLayer.push(arguments);}
    gtag('js', new Date());
    gtag('config', 'G-6HHDEXF452');
</script>

</head>


 <body>
<div class="container">
    <div class="paper-title">
      <h1>Extracting Triangular 3D Models, Materials, and Lighting From Images</h1>
    </div>


    <div id="authors">
        <div class="author-row">
            <div class="col-4 text-center"><a href="https://research.nvidia.com/person/jacob-munkberg">Jacob Munkberg</a><sup>1</sup></div>
            <div class="col-4 text-center"><a href="https://research.nvidia.com/person/jon-hasselgren">Jon Hasselgren</a><sup>1</sup></div>
            <div class="col-4 text-center"><a href="http://www.cs.toronto.edu/~shenti11/">Tianchang Shen</a><sup>1,2,3</sup></div>
            <div class="col-4 text-center"><a href="http://www.cs.toronto.edu/~jungao/">Jun Gao</a><sup>1,2,3</sup></div>
            <div class="col-4 text-center"><a href="http://www.cs.toronto.edu/~wenzheng/">Wenzheng Chen</a><sup>1,2,3</sup></div>
            <div class="col-4 text-center"><a href="https://research.nvidia.com/person/alex-evans">Alex Evans</a><sup>1</sup></div>
            <div class="col-4 text-center"><a href="https://research.nvidia.com/person/thomas-mueller">Thomas Müller</a><sup>1</sup></div>
            <div class="col-4 text-center"><a href="https://www.cs.toronto.edu/~fidler/">Sanja Fidler</a><sup>1,2,3</sup></div>
        </div>

        <div class="affil-row">
            <div class="col-3 text-center"><sup>1</sup>NVIDIA</a></div>
            <div class="col-3 text-center"><sup>2</sup>University of Toronto</div>
            <div class="col-3 text-center"><sup>3</sup>Vector Institute</div>
        </div>
        <div class="affil-row">
            <div class="venue text-center"><b>CVPR 2022 (Oral)</b></div>
        </div>

        <div style="clear: both">
            <div class="paper-btn-parent">
            <a class="supp-btn" href="assets/paper.pdf">
                <span class="material-icons"> description </span>
                 Paper
            </a>
            <a class="supp-btn" href="assets/bib.txt">
                <span class="material-icons"> description </span>
                  BibTeX
            </a>
            <a class="supp-btn" href="https://github.com/NVlabs/nvdiffrec">
                <span class="material-icons"> description </span>
                  Code
            </a>
        </div></div>
    </div>

    <section id="teaser">
            <figure style="width: 100%;">
                <a href="assets/system.JPG">
                    <img width="100%" src="assets/system.JPG">
                </a>
                <p class="caption" style="margin-bottom: 1px;">
                    We learn topology, materials, and environment map lighting jointly from 2D supervision. We
                    directly optimize topology of a triangle mesh, learn materials through volumetric texturing,
                    and leverage differentiable split sum environment lighting.
                    Our output representation is a triangle mesh with spatially varying 2D textures and a high dynamic range environment map,
                    which can be used unmodified in standard game engines. <a href="https://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/">Spot model</a>
                    by Keenan Crane.
                </p>
            </figure>
    </section>

    <section id="abstract"/>
        <h2>Abstract</h2>
        <hr>
        <p>
            We present an efficient method for joint optimization of topology, materials and lighting
            from multi-view image observations. Unlike recent multi-view reconstruction approaches,
            which typically produce entangled 3D representations encoded in neural networks, we output
            triangle meshes with spatially-varying materials and environment
            lighting that can be deployed in any traditional graphics engine unmodified.
            We leverage recent work in differentiable rendering,
            coordinate-based networks to compactly represent volumetric texturing,
            alongside differentiable marching tetrahedrons to enable gradient-based optimization directly
            on the surface mesh. Finally, we introduce a differentiable formulation of the split sum
            approximation of environment lighting to efficiently recover all-frequency lighting.
            Experiments show our extracted models used in advanced scene editing, material decomposition,
            and high quality view interpolation, all running at interactive rates in triangle-based
            renderers (rasterizers and path tracers).
            </p>
    </section>

    <hr>

    <section id="teaser-videos">
        <div class="flex-row">
        <figure style="width: 70%;">
            <video class="centered" width="90%" controls muted loop autoplay>
                <source src="assets/video.mp4" type="video/mp4">
                Your browser does not support the video tag.
            </video>
        </figure>
            <div style="width: 30%;">
                <br><br>
                <p>Video illustrating our training progress, scene editing examples and automatic LOD. All examples enabled by our <strong>explicit</strong> decomposition
                    into a triangle mesh, PBR materials and HDR environment light, directly compatible with traditional graphics engines.
                    Feel free to download the <a href="assets/video.mp4">video</a>, native resolution: 1024x1024 pixels.
                </p>
            </div>
        </div>
    </section>

    <section id="results">

        <h2>3D model reconstruction and intrinsic decomposition from images</h2>
        <hr>
        <figure style="width: 100%;">
            <a href="assets/materials.JPG">
                <img width="100%" src="assets/materials.JPG">
            </a>
            <p class="caption" style="margin-bottom: 1px;">
                Our reconstruction from 100 images. We reconstruct a triangle mesh, PBR materials stored in 2D textures, and an HDR environment map.
                Materials scene from the <a href="https://github.com/bmild/nerf">NeRF synthetic dataset</a>.
            </p>
        </figure>

        <h2>Scene manipulation with the reconstructed models</h2>
        <hr>
        <figure style="width: 100%;">
            <a href="assets/teaser.JPG">
                <img width="100%" src="assets/teaser.JPG">
            </a>
            <p class="caption" style="margin-bottom: 1px;">
                We reconstruct a triangular mesh with unknown topology, spatially-varying materials, and lighting from a set of multi-view
                images. We show examples of scene manipulation using off-the-shelf modeling tools, enabled by our reconstructed 3D model.
                Dataset from <a href="https://markboss.me/publication/2021-nerd/">NeRD: Neural Reflectance Decomposition from Image Collections</a>.
            </p>
        </figure>

        <h2>All-frequency environment lighting</h2>
        <hr>
        <figure style="width: 100%;">
            <a href="assets/splitsum.JPG">
                <img width="100%" src="assets/splitsum.JPG">
            </a>
            <p class="caption" style="margin-bottom: 1px;">
                Environment lighting approximated with Spherical Gaussians using 128 lobes vs. Split Sum. The training set consists of 256
                path traced images with Monte Carlo sampled environment lighting using a high resolution HDR probe. We assume known geometry and
                optimize materials and lighting using identical settings for both methods. Reported image metrics are the arithmetic mean of the 16 (novel)
                views in the test set. Note that the split sum approximation is able to capture high frequency lighting.
                Probe from <a href="https://polyhaven.com/"> Poly Haven</a>.
            </p>
        </figure>

    </section>

    <section id="bibtex">
        <h2>Citation</h2>
        <hr>
        <pre><code>
        @inproceedings{Munkberg_2022_CVPR,
            author    = {Munkberg, Jacob and Hasselgren, Jon and Shen, Tianchang and Gao, Jun and Chen, Wenzheng
                         and Evans, Alex and M\"uller, Thomas and Fidler, Sanja},
            title     = "{Extracting Triangular 3D Models, Materials, and Lighting From Images}",
            booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
            month     = {June},
            year      = {2022},
            pages     = {8280-8290}
        }
        </code></pre>
    </section>

<br />
    <section id="paper">
        <h2>Paper</h2>
        <hr>
        <div class="flex-row">
            <div style="box-sizing: border-box; padding: 16px; margin: auto;">
                <a href="assets/paper.pdf"><img class="screenshot" src="assets/paper_preview.JPG"></a>
            </div>
            <div style="width: 50%">
                <p><b>Extracting Triangular 3D Models, Materials, and Lighting From Images</b></p>
                <p>Jacob Munkberg, Jon Hasselgren, Tianchang Shen, Jun Gao, Wenzheng Chen, Alex Evans, Thomas Müller,  Sanja Fidler</p>
                <div><span class="material-icons"> description </span><a href="assets/paper.pdf"> Preprint</a></div>
                <div><span class="material-icons"> description </span><a href="https://arxiv.org/abs/2111.12503"> arXiv version</a></div>
                <div><span class="material-icons"> description </span><a href="assets/video.mp4"> Video</a></div>
                <div><span class="material-icons"> insert_comment </span><a href="assets/bib.txt"> BibTeX</a></div>
            </div>
        </div>
    </section>

</div>
</body>
</html>