@inproceedings{ses,author={Kim, Ji-yoon and Baek, Eunsu and Kim, Hyung-Sin},title={ImageNet-sES: A First Systematic Study of Sensor-Environment Simulation Anchored by Real Recaptures},booktitle={The IEEE/CVF Winter Conference on Applications of Computer Vision 2026 (WACV 2026)},year={2026},month=mar,tags={s4d}}
The rising prevalence of colorectal cancer necessitates early and accurate optical diagnosis of colorectal polyps. Despite advances in Computer-Aided Diagnosis (CAD) systems, challenges like data variability and inconsistent clinical performance hinder their widespread use. To address these limitations, we propose ColonOOD, an integrated CAD system for polyp localization, uncertainty-aware polyp classification, and Out-of-Distribution (OOD) polyp detection during colonoscopy. ColonOOD ensures robust classification of adenomatous, hyperplastic, and OOD polyps while providing calibrated uncertainty scores to support clinical decisions. Extensive evaluations across four medical centers and two public datasets demonstrate ColonOOD’s strong performance, achieving up to 79.69 % classification and 75.53 % OOD detection accuracy. This system offers reliable insights for endoscopists, marking a significant step toward broader clinical adoption of automated diagnostic tools in colorectal cancer care.
@inproceedings{colonood,author={Park*, Sehyun and Lee*, Dongheon and Lee, Ji Young and Chun, Jaeyoung and Chang, Ji Young and Baek, Eunsu and Jin, Eun Hyo and Kim, Hyung‑Sin},title={ColonOOD: A Complete Pipeline for Optical Diagnosis of Colorectal Polyps Integrating Out‑of‑Distribution Detection and
Uncertainty Quantification},booktitle={Expert Systems with Applications, Volume 295, 128756},year={2026},month=jan,tags={colonood}}
2025
NeuIPS
AI Should Sense Better, Not Just Scale Bigger: Adaptive Sensing as a Paradigm Shift
Eunsu Baek, Keondo Park, JeongGil Ko, and 3 more authors
In The Thirty-Ninth Annual Conference on Neural Information Processing Systems Position Paper Track , Dec 2025
Current AI advances largely rely on scaling neural models and expanding training datasets to achieve generalization and robustness. Despite notable successes, this paradigm incurs significant environmental, economic, and ethical costs, limiting sustainability and equitable access. Inspired by biological sensory systems, where adaptation occurs dynamically at the input (e.g., adjusting pupil size, refocusing vision)–we advocate for adaptive sensing as a necessary and foundational shift. Adaptive sensing proactively modulates sensor parameters (e.g., exposure, sensitivity, multimodal configurations) at the input level, significantly mitigating covariate shifts and improving efficiency. Empirical evidence from recent studies demonstrates that adaptive sensing enables small models (e.g., EfficientNet-B0) to surpass substantially larger models (e.g., OpenCLIP-H) trained with significantly more data and compute. We (i) outline a roadmap for broadly integrating adaptive sensing into real-world applications spanning humanoid, healthcare, autonomous systems, agriculture, and environmental monitoring, (ii) critically assess technical and ethical integration challenges, and (iii) propose targeted research directions, such as standardized benchmarks, real-time adaptive algorithms, multimodal integration, and privacy-preserving methods. Collectively, these efforts aim to transition the AI community toward sustainable, robust, and equitable artificial intelligence systems.
@inproceedings{adaptivesensing,author={Baek, Eunsu and Park, Keondo and Ko, JeongGil and Oh, Min-hwan and Gong, Taesik and Kim, Hyung-Sin},title={AI Should Sense Better, Not Just Scale Bigger: Adaptive Sensing as a Paradigm Shift},booktitle={The Thirty-Ninth Annual Conference on Neural Information Processing Systems Position Paper Track
},year={2025},month=dec,tags={s4d}}
Domain shift remains a persistent challenge in deep-learning-based computer vision, often requiring extensive model modifications or large labeled datasets to address. Inspired by human visual perception, which adjusts input quality through corrective lenses rather than over-training the brain, we propose Lens, a novel camera sensor control method that enhances model performance by capturing high-quality images from the model’s perspective rather than relying on traditional human-centric sensor control. Lens is lightweight and adapts sensor parameters to specific models and scenes in real-time. At its core, Lens utilizes VisiT, a training-free, model-specific quality indicator that evaluates individual unlabeled samples at test time using confidence scores without additional adaptation costs. To validate Lens, we introduce ImageNet-ES Diverse, a new benchmark dataset capturing natural perturbations from varying sensor and lighting conditions. Extensive experiments on both ImageNet-ES and our new ImageNet-ES Diverse show that Lens significantly improves model accuracy across various baseline schemes for sensor control and model modification while maintaining low latency in image captures. Lens effectively compensates for large model size differences and integrates synergistically with model improvement techniques. Our code and dataset are available at github.com/Edw2n/Lens.git.
@inproceedings{lens,author={Baek, Eunsu and Gong, Taesik and Kim, Hyung‑Sin},title={Adaptive Camera Sensor for Vision Model},booktitle={The 13th International Conference on Learning Representations},year={2025},month=apr,tags={s4d}}
Computer vision applications predict on digital images acquired by a camera from physical scenes through light. However, conventional robustness benchmarks rely on perturbations in digitized images, diverging from distribution shifts occurring in the image acquisition process. To bridge this gap, we introduce a new distribution shift dataset, ImageNet-ES, comprising variations in environmental and camera sensor factors by directly capturing 202k images with a real camera in a controllable testbed. With the new dataset, we evaluate out-of-distribution (OOD) detection and model robustness. We find that existing OOD detection methods do not cope with the covariate shifts in ImageNet-ES, implying that the definition and detection of OOD should be revisited to embrace real-world distribution shifts. We also observe that the model becomes more robust in both ImageNet-C and -ES by learning environment and sensor variations in addition to existing digital augmentations. Lastly, our results suggest that effective shift mitigation via camera sensor control can significantly improve performance without increasing model size. With these findings, our benchmark may aid future research on robustness, OOD, and camera sensor control for computer vision. Our code and dataset are available at this https://github.com/Edw2n/ImageNet-ES
@inproceedings{imagenet-es,author={Baek, Eunsu and Park, Keondo and Kim, Jiyoon and Kim, Hyung-Sin},title={Unexplored Faces of Robustness and Out-of-Distribution: Covariate Shifts in Environment and Sensor Domains},booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},year={2024},month=jun,tags={s4d}}
Virtual try-on (VTO) superimposes clothing over user image or video, enhancing online shopping experience. On-device VTO can preserve user privacy but most VTO techniques cannot be run on resource-constrained devices due to excessive computation overhead. In this demo, we demonstrate a novel Android application for on-device video VTO referring to MIRROR, the state-of-the-art mobile VTO system. The application minimizes video generation time by splitting the process into two phases, achieving 0.76 minutes to convert 10-second-long video on Galaxy S24 Ultra. Our application was evaluated as 78.5 score (above average) in SUS usability test. A companion video is provided at: https://youtu.be/YTExc8W5BzM
@inproceedings{10.1145/3643832.3661842,author={Ahn, Dongha and Kang, Dong-Sig and Baek, Eunsu and Kim, Hyung-Sin},title={Demo: On-Device Video Virtual Try-On for Mobile Shopping},year={2024},month=jun,isbn={9798400705816},publisher={Association for Computing Machinery},address={New York, NY, USA},url={https://doi.org/10.1145/3643832.3661842},doi={10.1145/3643832.3661842},booktitle={Proceedings of the 22nd Annual International Conference on Mobile Systems, Applications and Services},pages={610–611},numpages={2},keywords={virtual try-on, video, mobile system, on-device computing},location={<conf-loc> <city>Minato-ku, Tokyo</city>, <country>Japan</country> </conf-loc>},series={MOBISYS '24},tags={mirror}}
We present MIRROR, an on-device video virtual try-on (VTO) system that provides realistic, private, and rapid experiences in mobile clothes shopping. Despite recent advancements in generative adversarial networks (GANs) for VTO, designing MIRROR involves two challenges: (1) data discrepancy due to restricted training data that miss various poses, body sizes, and backgrounds and (2) local computation overhead that uses up 24% of battery for converting only a single video. To alleviate the problems, we propose a generalizable VTO GAN that not only discerns intricate human body semantics but also captures domain-invariant features without requiring additional training data. In addition, we craft lightweight, reliable clothes/pose-tracking that generates refined pixel-wise warping flow without neural-net computation. As a holistic system, MIRROR integrates the new VTO GAN and tracking method with meticulous pre/post-processing, operating in two distinct phases (on/offline). Our results on Android smartphones and real-world user videos show that compared to a cutting-edge VTO GAN, MIRROR achieves 6.5x better accuracy with 20.1x faster video conversion and 16.9x less energy consumption.
@article{10.1145/3631420,author={Kang, Dong-Sig and Baek, Eunsu and Son, Sungwook and Lee, Youngki and Gong, Taesik and Kim, Hyung-Sin},title={MIRROR: Towards Generalizable On-Device Video Virtual Try-On for Mobile Shopping},year={2023},issue_date={December 2023},publisher={Association for Computing Machinery},address={New York, NY, USA},volume={7},number={4},url={https://doi.org/10.1145/3631420},doi={10.1145/3631420},journal={Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies (also published in ACM UbiComp, Oct 2024)},month=dec,articleno={163},numpages={27},tags={mirror}}