{"title":"EFM3D: A Benchmark for Measuring Progress Towards 3D Egocentric Foundation Models","short_title":"EFM3D","arxiv_id":"2406.10224","tex_dir":"arXiv-EFM3D","pdf_file":"EFM3D.pdf","relevance_rank":5,"relevance_category":"egocentric 3D substrate and actor-visible representation","adoptable_ideas":["Use ASE/EFM signals as the actor-visible local evidence substrate.","Separate local EVL target support from broader semi-dense/fused scene memory."]}
{"title":"GenNBV: Generalizable Next-Best-View Policy for Active 3D Reconstruction","short_title":"GenNBV","arxiv_id":"2402.16174","tex_dir":"arXiv-GenNBV","pdf_file":"GenNBV.pdf","relevance_rank":3,"relevance_category":"generalizable continuous NBV policy bridge","adoptable_ideas":["Use 5-DoF continuous NBV policy framing as a future bridge after finite-candidate evidence.","Contrast generalization claims with bounded ARIA mesh/oracle supervision."]}
{"title":"Project Aria: A New Tool for Egocentric Multi-Modal AI Research","short_title":"Project Aria","arxiv_id":"2308.13561","tex_dir":"arXiv-project-aria","pdf_file":"project-aria.pdf","relevance_rank":5,"relevance_category":"egocentric sensing platform and dataset assumptions","adoptable_ideas":["Ground sensor, calibration, trajectory, and multimodal logging assumptions in Project Aria.","Keep actor-visible modality boundaries explicit for V1 target conditioning."]}
{"title":"SceneScript: Reconstructing Scenes With An Autoregressive Structured Language Model","short_title":"SceneScript","arxiv_id":"2403.13064","tex_dir":"arXiv-scene-script","pdf_file":"scene-script.pdf","relevance_rank":2,"relevance_category":"structured scene semantics bridge","adoptable_ideas":["Treat autoregressive structured scene models as future semantic-global memory.","Do not use structured semantics as actor-visible GT in the thesis core."]}
{"title":"VIN-NBV: A View Introspection Network for Next-Best-View Selection","short_title":"VIN-NBV","arxiv_id":"2505.06219","tex_dir":"arXiv-VIN-NBV","pdf_file":"VIN-NBV.pdf","relevance_rank":5,"relevance_category":"quality-driven NBV and RRI supervision","adoptable_ideas":["Adopt point-to-mesh and mesh-to-point RRI as the oracle quality signal.","Use the one-step candidate scorer as the myopic baseline for Q_H."]}
{"title":"Informative Object-centric Next Best View for Object-aware 3D Gaussian Splatting in Cluttered Scenes","short_title":"Instance-NBV","arxiv_id":"2602.08266","tex_dir":"arXiv-Instance-NBV","pdf_file":"Instance-NBV.pdf","relevance_rank":4,"relevance_category":"object-centric NBV and 3DGS target utility bridge","adoptable_ideas":["Compare target/object-aware view utility with ARIA target-RRI.","Use object-centric 3DGS NBV as discussion pressure, not as the core reward."]}
{"title":"PB-NBV: Efficient Projection-Based Next-Best-View Planning Framework for Reconstruction of Unknown Objects","short_title":"PB-NBV","arxiv_id":"2501.10663","tex_dir":"arXiv-PB-NBV","pdf_file":"PB-NBV.pdf","relevance_rank":3,"relevance_category":"projection-based candidate proposal and shortlist baseline","adoptable_ideas":["Use projection-based candidate scoring as a cheap shortlist or diagnostic baseline.","Contrast projection proxies with oracle target-RRI supervision."]}
{"title":"Next Best View Selections for Semantic and Dynamic 3D Gaussian Splatting","short_title":"Dynamic 3DGS NBV","arxiv_id":"2512.22771","tex_dir":"arXiv-Dynamic-3DGS","pdf_file":"Dynamic-3DGS.pdf","relevance_rank":3,"relevance_category":"semantic and dynamic 3DGS NBV bridge","adoptable_ideas":["Use Fisher/semantic/dynamic utility as future radiance-field comparison.","Keep dynamic 3DGS claims outside the mesh-backed thesis core."]}
{"title":"Hestia: Voxel-Face-Aware Hierarchical Next-Best-View Acquisition for Efficient 3D Reconstruction","short_title":"Hestia","arxiv_id":"2508.01014","tex_dir":"arXiv-Hestia","pdf_file":"Hestia.pdf","relevance_rank":4,"relevance_category":"hierarchical target-to-pose NBV bridge","adoptable_ideas":["Adopt target-then-pose factorization as an RQ6 bridge.","Use directional observability as a design hint while rejecting coverage reward as the core objective."]}
{"title":"Offline Reinforcement Learning as One Big Sequence Modeling Problem","short_title":"Trajectory Transformer","arxiv_id":"2106.02039","tex_dir":"arXiv-Trajectory-Transformer","pdf_file":"Trajectory-Transformer.pdf","relevance_rank":3,"relevance_category":"sequence-model rollout planning bridge","adoptable_ideas":["Use trajectory-as-sequence modeling as a later rollout-policy ablation.","Keep beam or sequence rollouts after replay validity and Q_H headroom are established."]}
{"title":"Playing Atari with Deep Reinforcement Learning","short_title":"DQN","arxiv_id":"1312.5602","tex_dir":"arXiv-DQN","pdf_file":"DQN.pdf","relevance_rank":2,"relevance_category":"foundational discrete value learning","adoptable_ideas":["Cite as value-learning lineage for finite discrete actions.","Avoid presenting Atari-style online exploration as the thesis setting."]}
{"title":"Deep Reinforcement Learning with Double Q-Learning","short_title":"Double DQN","arxiv_id":"1509.06461","tex_dir":"arXiv-Double-DQN","pdf_file":"Double-DQN.pdf","relevance_rank":4,"relevance_category":"value-estimation stabilization for finite candidates","adoptable_ideas":["Use decoupled selection/evaluation as an overestimation-control template for masked Q_H backups.","Report whether fitted backups overestimate oracle-rescored returns."]}
{"title":"Offline Reinforcement Learning with Implicit Q-Learning","short_title":"IQL","arxiv_id":"2110.06169","tex_dir":"arXiv-IQL","pdf_file":"IQL.pdf","relevance_rank":3,"relevance_category":"offline RL support-aware value learning bridge","adoptable_ideas":["Use expectile-style offline value learning as a support-aware bridge after Q_H.","Keep IQL as optional if finite-candidate supervised value learning is sufficient."]}
{"title":"Conservative Q-Learning for Offline Reinforcement Learning","short_title":"CQL","arxiv_id":"2006.04779","tex_dir":"arXiv-CQL","pdf_file":"CQL.pdf","relevance_rank":3,"relevance_category":"offline RL conservatism and unsupported-action control","adoptable_ideas":["Use conservative penalties as a possible invalid/unsupported action ablation.","Do not replace hard validity masks with learned conservatism."]}
{"title":"Off-Policy Deep Reinforcement Learning without Exploration","short_title":"BCQ","arxiv_id":"1812.02900","tex_dir":"arXiv-BCQ","pdf_file":"BCQ.pdf","relevance_rank":2,"relevance_category":"batch-constrained offline RL bridge","adoptable_ideas":["Use behavior-support constraints as future comparison for logged candidate distributions.","Keep BCQ outside the initial oracle-generated finite-candidate rollouts."]}
{"title":"Decision Transformer: Reinforcement Learning via Sequence Modeling","short_title":"Decision Transformer","arxiv_id":"2106.01345","tex_dir":"arXiv-Decision-Transformer","pdf_file":"Decision-Transformer.pdf","relevance_rank":3,"relevance_category":"return-conditioned sequence modeling bridge","adoptable_ideas":["Use return-conditioned trajectories as a later policy/control ablation.","Reserve sequence modeling for after replay rows and target returns are stable."]}
{"title":"Reinforcement Learning with Deep Energy-Based Policies","short_title":"Deep Energy-Based Policies","arxiv_id":"1702.08165","tex_dir":"arXiv-Deep-Energy-Based-Policies","pdf_file":"Deep-Energy-Based-Policies.pdf","relevance_rank":2,"relevance_category":"energy-based stochastic policy bridge","adoptable_ideas":["Use energy-based sampling only as a future stochastic policy comparison.","Keep thesis evidence on masked finite-candidate value selection."]}
{"title":"Stochastic Beams and Where to Find Them: The Gumbel-Top-k Trick for Sampling Sequences Without Replacement","short_title":"Gumbel-Top-k","arxiv_id":"1903.06059","tex_dir":"arXiv-Gumbel-Top-k","pdf_file":"Gumbel-Top-k.pdf","relevance_rank":4,"relevance_category":"diverse rollout branch sampling","adoptable_ideas":["Use no-replacement stochastic top-k sampling for diverse rollout branches.","Document temperature/top-k sampling as a controlled rollout-generation knob."]}
{"title":"FisherRF: Active View Selection and Uncertainty Quantification for Radiance Fields using Fisher Information","short_title":"FisherRF","arxiv_id":"2311.17874","tex_dir":"arXiv-FisherRF","pdf_file":"FisherRF.pdf","url":"https://arxiv.org/abs/2311.17874","relevance_rank":4,"relevance_category":"Fisher-information NBV diagnostic and radiance-field bridge","adoptable_ideas":["Use Fisher uncertainty as a diagnostic support/uncertainty channel.","Avoid replacing target-RRI with Fisher information in the core objective."]}
{"title":"Next Best Sense: Guiding Vision and Touch with FisherRF for 3D Gaussian Splatting","short_title":"Next Best Sense","arxiv_id":"2410.04680","tex_dir":"arXiv-Next-Best-Sense","pdf_file":"Next-Best-Sense.pdf","relevance_rank":3,"relevance_category":"multimodal active sensing and 3DGS bridge","adoptable_ideas":["Use vision-touch active sensing as future modality expansion.","Keep thesis core on Aria/ASE vision geometry unless modality scope changes."]}
{"title":"Finding Optimal Viewpoints for Monocular 3D Human Pose Estimation in Dynamic 3D Gaussian Splatting Space","short_title":"FOV-HPE","arxiv_id":"","tex_dir":"","pdf_file":"","doi":"10.1109/AVSS65446.2025.11149906","url":"https://doi.org/10.1109/AVSS65446.2025.11149906","relevance_rank":1,"relevance_category":"downstream-task view utility background","adoptable_ideas":["Mention downstream-task viewpoint utility only as background.","Do not use human-pose utility to justify target-RRI claims."]}
{"title":"Query-Centric Trajectory Prediction","short_title":"QCNet","arxiv_id":"2306.10508","tex_dir":"arXiv-QCNet","pdf_file":"QCNet.pdf","url":"https://github.com/ZikangZhou/QCNet","relevance_rank":4,"relevance_category":"query-centric relative positional encoding for candidate sets","adoptable_ideas":["Adapt query-centric relative encodings to target-conditioned candidate rows.","Use candidate-local RPE as an ablation after MLP/DeepSets controls."]}
{"title":"Geometric Deep Learning: Grids, Groups, Graphs, Geodesics, and Gauges","short_title":"GDL","arxiv_id":"2104.13478","tex_dir":"arXiv-Geometric-Deep-Learning","pdf_file":"Geometric-Deep-Learning.pdf","relevance_rank":4,"relevance_category":"geometric inductive-bias framework","adoptable_ideas":["Use symmetry, local-frame, and permutation arguments to justify architecture choices.","Keep full equivariant tensor networks as ablations rather than default commitments."]}
{"title":"Deep Sets","short_title":"Deep Sets","arxiv_id":"1703.06114","tex_dir":"arXiv-Deep-Sets","pdf_file":"Deep-Sets.pdf","url":"https://papers.nips.cc/paper/6931-deep-sets","relevance_rank":4,"relevance_category":"permutation-invariant candidate-set baseline","adoptable_ideas":["Use pooled DeepSets as a required candidate-context control.","Verify candidate-row permutation invariance/equivariance in Q_H tests."]}
{"title":"Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks","short_title":"Set Transformer","arxiv_id":"1810.00825","tex_dir":"arXiv-Set-Transformer","pdf_file":"Set-Transformer.pdf","url":"https://proceedings.mlr.press/v97/lee19d.html","relevance_rank":5,"relevance_category":"candidate-set attention architecture","adoptable_ideas":["Use masked Set Transformer interaction as the default finite-candidate Q_H candidate-context model.","Compare against independent scorer and DeepSets before attributing interaction gains."]}
{"title":"E(n) Equivariant Graph Neural Networks","short_title":"EGNN","arxiv_id":"2102.09844","tex_dir":"arXiv-EGNN","pdf_file":"EGNN.pdf","url":"https://proceedings.mlr.press/v139/satorras21a.html","relevance_rank":3,"relevance_category":"equivariant candidate-graph ablation","adoptable_ideas":["Use EGNN-style candidate/target graph processing as an ablation.","Test whether local-frame features make heavier equivariance unnecessary."]}
{"title":"SE(3)-Transformers: 3D Roto-Translation Equivariant Attention Networks","short_title":"SE(3)-Transformer","arxiv_id":"2006.10503","tex_dir":"arXiv-SE3-Transformer","pdf_file":"SE3-Transformer.pdf","relevance_rank":3,"relevance_category":"SE(3)-equivariant attention ablation","adoptable_ideas":["Use SE(3)-equivariant attention as a high-cost pose-aware ablation.","Keep it out of the minimum thesis path unless simpler relative encodings fail."]}
{"title":"e3nn Spherical Harmonics Documentation","short_title":"e3nn SH","arxiv_id":"","tex_dir":"","pdf_file":"","url":"https://docs.e3nn.org/en/stable/api/o3/o3_sh.html","relevance_rank":3,"relevance_category":"spherical-harmonic directional memory implementation","adoptable_ideas":["Use low-order spherical harmonics for target visibility memory over S^2.","Compare SH memory against simpler second-moment summaries."]}
{"title":"SCONE: Surface Coverage Optimization in Unknown Environments by Volumetric Integration","short_title":"SCONE","arxiv_id":"2208.10449","tex_dir":"arXiv-SCONE","pdf_file":"SCONE.pdf","url":"https://proceedings.neurips.cc/paper_files/paper/2022/hash/828c6d69bdf91fca7f2b97c4dc214e94-Abstract-Conference.html","relevance_rank":4,"relevance_category":"surface-coverage NBV baseline and support diagnostic","adoptable_ideas":["Use coverage/support overlap as a diagnostic bias or ablation.","Do not replace target-RRI with surface coverage as the thesis objective."]}
{"title":"MACARONS: Mapping And Coverage Anticipation with RGB Online Self-Supervision","short_title":"MACARONS","arxiv_id":"2303.03315","tex_dir":"arXiv-MACARONS","pdf_file":"MACARONS.pdf","relevance_rank":3,"relevance_category":"coverage anticipation and RGB self-supervision bridge","adoptable_ideas":["Use RGB online coverage anticipation as future self-supervised bridge.","Contrast coverage anticipation with supervised target-RRI labels."]}
{"title":"ProcTHOR: Large-Scale Embodied AI Using Procedural Generation","short_title":"ProcTHOR","arxiv_id":"2206.06994","tex_dir":"arXiv-ProcTHOR","pdf_file":"ProcTHOR.pdf","relevance_rank":2,"relevance_category":"procedural embodied simulation bridge","adoptable_ideas":["Use procedural scenes only as future stress tests if target-RRI supervision is preserved.","Do not move thesis core away from ASE mesh/oracle assets."]}
{"title":"Habitat: A Platform for Embodied AI Research","short_title":"Habitat","arxiv_id":"1904.01201","tex_dir":"arXiv-Habitat","pdf_file":"Habitat.pdf","relevance_rank":2,"relevance_category":"embodied simulator bridge","adoptable_ideas":["Consider Habitat only if mesh/oracle target-RRI labels can be preserved.","Keep external simulator work as bridge/future scope."]}
{"title":"Isaac Sim Sensors Documentation","short_title":"Isaac Sim Sensors","arxiv_id":"","tex_dir":"","pdf_file":"","url":"https://docs.isaacsim.omniverse.nvidia.com/latest/sensors/index.html","relevance_rank":2,"relevance_category":"sensor simulation bridge","adoptable_ideas":["Use Isaac sensor simulation for future modality and noise studies.","Do not make simulator fidelity a core thesis dependency."]}
{"title":"4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural Networks","short_title":"MinkowskiEngine","arxiv_id":"1904.08755","tex_dir":"arXiv-MinkowskiEngine","pdf_file":"MinkowskiEngine.pdf","url":"https://nvidia.github.io/MinkowskiEngine/","relevance_rank":3,"relevance_category":"sparse 3D convolution backbone","adoptable_ideas":["Use sparse convolutions as a geometry-encoder ablation.","Keep the minimum value model independent of heavy sparse backbones."]}
{"title":"Point Transformer","short_title":"Point Transformer","arxiv_id":"2012.09164","tex_dir":"arXiv-Point-Transformer","pdf_file":"Point-Transformer.pdf","relevance_rank":4,"relevance_category":"point-cloud representation backbone","adoptable_ideas":["Use point-cloud transformer features for semidense/fused geometry ablations.","Compare against simpler pooled geometry features before adding backbone cost."]}
{"title":"Point Transformer V3: Simpler, Faster, Stronger","short_title":"Point Transformer V3","arxiv_id":"2312.10035","tex_dir":"arXiv-Point-Transformer-V3","pdf_file":"Point-Transformer-V3.pdf","relevance_rank":4,"relevance_category":"efficient point-cloud transformer backbone","adoptable_ideas":["Use PTv3 as the scalable point-encoder candidate if point features become necessary.","Treat it as representation ablation, not initial Q_H requirement."]}
{"title":"KPConv: Flexible and Deformable Convolution for Point Clouds","short_title":"KPConv","arxiv_id":"1904.08889","tex_dir":"arXiv-KPConv","pdf_file":"KPConv.pdf","url":"https://openaccess.thecvf.com/content_ICCV_2019/html/Thomas_KPConv_Flexible_and_Deformable_Convolution_for_Point_Clouds_ICCV_2019_paper.html","relevance_rank":3,"relevance_category":"point-cloud convolution backbone","adoptable_ideas":["Use kernel point convolution as a local-geometry encoder ablation.","Compare local convolutional bias against point transformer features."]}
{"title":"Déjà View: Looping Transformers for Multi-View 3D Reconstruction","short_title":"Déjà View","arxiv_id":"2605.30215","tex_dir":"arXiv-Deja-View","pdf_file":"Deja-View.pdf","url":"https://arxiv.org/abs/2605.30215","relevance_rank":3,"relevance_category":"iterative multi-view reconstruction transformer bridge","adoptable_ideas":["Use iterative multi-view refinement as future reconstruction bridge.","Keep looping transformer reconstruction separate from current oracle-scored target-RRI rollouts."]}
{"title":"VectorNet: Encoding HD Maps and Agent Dynamics from Vectorized Representation","short_title":"VectorNet","arxiv_id":"2005.04259","tex_dir":"","pdf_file":"","url":"https://arxiv.org/abs/2005.04259","relevance_rank":3,"relevance_category":"vectorized local-to-global scene aggregation precedent","adoptable_ideas":["Use vectorized local-to-global aggregation as a precedent for frustum/support path encoding.","Do not import ordered road-polyline semantics as candidate-row order."]}
{"title":"Perceiver: General Perception with Iterative Attention","short_title":"Perceiver","arxiv_id":"2103.03206","tex_dir":"","pdf_file":"","url":"https://arxiv.org/abs/2103.03206","relevance_rank":3,"relevance_category":"latent bottleneck support-memory compression ablation","adoptable_ideas":["Use asymmetric cross-attention into latent memory only when support or point tokens exceed direct set-attention scale.","Audit whether latent compression hides source provenance or calibration failures."]}
{"title":"Wayformer: Motion Forecasting via Simple & Efficient Attention Networks","short_title":"Wayformer","arxiv_id":"2207.05844","tex_dir":"","pdf_file":"","url":"https://arxiv.org/abs/2207.05844","relevance_rank":3,"relevance_category":"early late and hierarchical fusion ablation taxonomy","adoptable_ideas":["Use early, late, and hierarchical fusion as explicit ablations for target, support, history, and candidate streams.","Reject modality-agnostic fusion unless token type, source provenance, masks, and one-step calibration remain testable."]}
{"title":"HiVT: Hierarchical Vector Transformer for Multi-Agent Motion Prediction","short_title":"HiVT","arxiv_id":"","tex_dir":"","pdf_file":"","url":"https://openaccess.thecvf.com/content/CVPR2022/html/Zhou_HiVT_Hierarchical_Vector_Transformer_for_Multi-Agent_Motion_Prediction_CVPR_2022_paper.html","relevance_rank":3,"relevance_category":"local global temporal scene hierarchy precedent","adoptable_ideas":["Use local context extraction before global interaction as a support/candidate hierarchy ablation.","Keep autonomous-driving agent and lane priors outside the ARIA finite-candidate value contract."]}
{"title":"Scene Transformer: A Unified Architecture for Predicting Future Trajectories of Multiple Agents","short_title":"Scene Transformer","arxiv_id":"2106.08417","tex_dir":"","pdf_file":"","url":"https://openreview.net/forum?id=Wm3EA5OlHsG","relevance_rank":2,"relevance_category":"scene-centric masked attention and pooling cautionary source","adoptable_ideas":["Use masked sequence queries and typed attention as inspiration for support summarization.","Do not collapse ARIA candidate-row Q_H into one scene-level trajectory score."]}