Publications

Preprints

How (Mis)calibrated is Your Federated CLIP and What To Do About It?
Mainak Singha, Masih Aminbeidokhti, Paolo Casari, Gianni Franchi, Elisa Ricci, Subhankar Roy
arXiv Preprint, 2025.
[ arXiv ] [ Code ] [ BibTeX ]

@article{singha2025mis,
  title={How (Mis) calibrated is Your Federated CLIP and What To Do About It?},
  author={Singha, Mainak and Aminbeidokhti, Masih and Casari, Paolo and Ricci, Elisa and Roy, Subhankar},
  journal={arXiv preprint arXiv:2512.04305},
  year={2025}
}

Conferences

CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation
Mainak Singha, Sarthak Mehrotra, Paolo Casari, Subhasis Chaudhuri, Elisa Ricci, Biplab Banerjee
Computer Vision and Pattern Recognition (CVPR), 2026.
[ arXiv ] [ Code ] [ Project ] [ BibTeX ]

@article{singha2026clipoint3d,
  title={CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation},
  author={Singha, Mainak and Mehrotra, Sarthak and Casari, Paolo and Chaudhuri, Subhasis and Ricci, Elisa and Banerjee, Biplab},
  journal={arXiv preprint arXiv:2602.20409},
  year={2026}
}

BioVLM: Routing Prompts, Not Parameters, for Cross-Modality Generalization in Biomedical VLMs
Mainak Singha, Tanisha Gupta; Ankit Jha, Muhammad Haris Khan, Sayantani Ghosh, Biplab Banerjee
Findings of the Association for Computational Linguistics (ACL Findings), 2026.
[ arXiv ] [ Code ] [ BibTeX ]

@article{singha2026biovlm,
  title={BioVLM: Routing Prompts, Not Parameters, for Cross-Modality Generalization in Biomedical VLMs},
  author={Singha, Mainak and Gupta, Tanisha and Jha, Ankit and Khan, Muhammad Haris and Ghosh, Sayantani and Banerjee, Biplab},
  journal={arXiv preprint arXiv:2604.17629},
  year={2026}
}

GeoMeld: Toward Semantically Grounded Foundation Models for Remote Sensing
Maram Hassan, Aminur Hossain, Savitra Roy, Souparna Bhowmik, Ayush Patel, Mainak Singha, Subhasis Chaudhuri, Muhammad Haris Khan, Biplab Banerjee
Computer Vision and Pattern Recognition (CVPR) Workshops, 2026.
[ arXiv ] [ Code ] [ Datasets ] [ BibTeX ]

@article{hasan2026geomeld,
  title={GeoMeld: Toward Semantically Grounded Foundation Models for Remote Sensing},
  author={Hasan, Maram and Hossain, Md Aminur and Roy, Savitra and Bhowmik, Souparna and Patel, Ayush V and Singha, Mainak and Chaudhuri, Subhasis and Khan, Muhammad Haris and Banerjee, Biplab},
  journal={arXiv preprint arXiv:2604.10591},
  year={2026}
}

Bi-Modal Textual Prompt Learning for Vision-Language Models in Remote Sensing
Pankhi Kashyap, Mainak Singha, Biplab Banerjee
IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2026.
[ arXiv ] [ Code ] [ BibTeX ]

@article{kashyap2026bi,
  title={bi-modal textual prompt learning for vision-language models in remote sensing},
  author={Kashyap, Pankhi and Singha, Mainak and Banerjee, Biplab},
  journal={arXiv preprint arXiv:2601.20675},
  year={2026}
}

FedMVP: Federated Multi-modal Visual Prompt Tuning for Vision-Language Models
Mainak Singha, Subhankar Roy, Sarthak Mehrotra, Ankit Jha, Moloud Abdar, Biplab Banerjee, Elisa Ricci
International Conference on Computer Vision (ICCV), 2025.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@inproceedings{singha2025fedmvp,
  title={FedMVP: Federated Multimodal Visual Prompt Tuning for Vision-Language Models},
  author={Singha, Mainak and Roy, Subhankar and Mehrotra, Sarthak and Jha, Ankit and Abdar, Moloud and Banerjee, Biplab and Ricci, Elisa},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={17869--17878},
  year={2025}
}

OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP
Mohamad Hassan N C, Divyam Gupta, Mainak Singha, Sai Bhargav Rongali, Ankit Jha, Muhammad Haris Khan, Biplab Banerjee
Computer Vision and Pattern Recognition (CVPR), 2025.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@inproceedings{gupta2025osloprompt,
  title={OSLoPrompt: Bridging Low-Supervision Challenges and Open-Set Domain Generalization in CLIP},
  author={Gupta, Divyam and Singha, Mainak and Rongali, Sai Bhargav and Jha, Ankit and Khan, Muhammad Haris and Banerjee, Biplab and others},
  booktitle={Proceedings of the Computer Vision and Pattern Recognition Conference},
  pages={10110--10120},
  year={2025}
}

SDHSI-Net: Learning Better Representations for Hyperspectral Images via Self-Distillation [Oral]
Prachet Dev Singh, Shyamsundar Paramasivam, Sneha Barman, Mainak Singha, Ankit Jha, Girish Mishra, Biplab Banerjee
IEEE India Geoscience and Remote Sensing Symposium (InGARSS), 2025.
[ arXiv ] [ Code ] [ BibTeX ]

@article{singh2026sdhsi,
  title={SDHSI-Net: Learning Better Representations for Hyperspectral Images via Self-Distillation},
  author={Singh, Prachet Dev and Paramasivam, Shyamsundar and Barman, Sneha and Singha, Mainak and Jha, Ankit and Mishra, Girish and Banerjee, Biplab},
  journal={arXiv preprint arXiv:2601.07416},
  year={2026}
}

Reconstruction Guided Few-shot Network For Remote Sensing Image Classification [Oral]
Mohit Jaiswal, Naman Jain, Shivani Pathak, Mainak Singha, Nikunja Bihari Kar, Ankit Jha, Biplab Banerjee
IEEE India Geoscience and Remote Sensing Symposium (InGARSS), 2025.
[ arXiv ] [ Code ] [ BibTeX ]

@article{jaiswal2026reconstruction,
  title={Reconstruction Guided Few-shot Network For Remote Sensing Image Classification},
  author={Jaiswal, Mohit and Jain, Naman and Pathak, Shivani and Singha, Mainak and Kar, Nikunja Bihari and Jha, Ankit and Banerjee, Biplab},
  journal={arXiv preprint arXiv:2601.07335},
  year={2026}
}

MMLGNet: Cross-Modal Alignment of Remote Sensing Data using CLIP [Oral]
Aditya Chaudhary, Sneha Barman, Mainak Singha, Ankit Jha, Girish Mishra, Biplab Banerjee
IEEE India Geoscience and Remote Sensing Symposium (InGARSS), 2025.
[ arXiv ] [ Code ] [ BibTeX ]

@article{chaudhary2026mmlgnet,
  title={MMLGNet: Cross-Modal Alignment of Remote Sensing Data using CLIP},
  author={Chaudhary, Aditya and Barman, Sneha and Singha, Mainak and Jha, Ankit and Mishra, Girish and Banerjee, Biplab},
  journal={arXiv preprint arXiv:2601.08420},
  year={2026}
}

Elevating All Zero-Shot Sketch-Based Image Retrieval Through Multimodal Prompt Learning
Mainak Singha, Ankit Jha, Divyam Gupta, Pranav Singla, Biplab Banerjee
European Conference on Computer Vision (ECCV), 2024.
[ PDF ] [ arXiv ] [ Code ] [ Project ] [ HTML ] [ BibTeX ]

@inproceedings{singha2024elevating,
  title={Elevating all zero-shot sketch-based image retrieval through multimodal prompt learning},
  author={Singha, Mainak and Jha, Ankit and Gupta, Divyam and Singla, Pranav and Banerjee, Biplab},
  booktitle={European Conference on Computer Vision},
  pages={1--19},
  year={2024},
  organization={Springer}
}

COSMo: CLIP Talks on Open-Set Multi-Target Domain Adaptation
Munish Monga, Sachin Kumar Giroh, Ankit Jha, Mainak Singha, Biplab Banerjee, Jocelyn Chanussot
British Machine Vision Conference (BMVC), 2024.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@article{monga2024cosmo,
  title={COSMo: CLIP Talks on Open-Set Multi-Target Domain Adaptation},
  author={Monga, Munish and Giroh, Sachin Kumar and Jha, Ankit and Singha, Mainak and Banerjee, Biplab and Chanussot, Jocelyn},
  journal={arXiv preprint arXiv:2409.00397},
  year={2024}
}

Unknown Prompt, the only Lacuna: Unveiling CLIP’s Potential for Open Domain Generalization
Mainak Singha, Ankit Jha, Shirsha Bose, Ashwin Nair, Moloud Abdar, Biplab Banerjee
Computer Vision and Pattern Recognition (CVPR), 2024.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@inproceedings{singha2024unknown,
  title={Unknown Prompt the only Lacuna: Unveiling CLIP's Potential for Open Domain Generalization},
  author={Singha, Mainak and Jha, Ankit and Bose, Shirsha and Nair, Ashwin and Abdar, Moloud and Banerjee, Biplab},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={13309--13319},
  year={2024}
}

CDAD-Net: Bridging Domain Gaps in Generalized Category Discovery
Sai Bhargav Rongali, Sarthak Mehrotra, Ankit Jha, Mohamad Hassan N C, Shirsha Bose, Tanisha Gupta, Mainak Singha, Biplab Banerjee
Computer Vision and Pattern Recognition (CVPR) Workshops, 2024.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@inproceedings{rongali2024cdad,
  title={Cdad-net: Bridging domain gaps in generalized category discovery},
  author={Rongali, Sai Bhargav and Mehrotra, Sarthak and Jha, Ankit and Bose, Shirsha and Gupta, Tanisha and Singha, Mainak and Banerjee, Biplab and others},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={2616--2626},
  year={2024}
}

GraphVL: Graph-Enhanced Semantic Modeling via Vision-Language Models for Generalized Class Discovery
Bhupendra Solanki, Ashwin R Nair, Mainak Singha, Souradeep Mukhopadhyay, Ankit Jha, Biplab Banerjee
Indian Conference on Computer Vision Graphics and Image Processing (ICVGIP), 2024.
[ PDF ] [ arXiv ] [ HTML ] [ BibTeX ]

@inproceedings{solanki2024graphvl,
  title={GraphVL: graph-enhanced semantic modeling via vision-language models for generalized class discovery},
  author={Solanki, Bhupendra and Nair, Ashwin R and Singha, Mainak and Mukhopadhyay, Souradeep and Jha, Ankit and Banerjee, Biplab},
  booktitle={Proceedings of the Fifteenth Indian Conference on Computer Vision Graphics and Image Processing},
  pages={1--10},
  year={2024}
}

StyLIP: Multi-Scale Style-Conditioned Prompt Learning for CLIP-based Domain Generalization
Shirsha Bose, Ankit Jha, Enrico Fini, Mainak Singha, Biplab Banerjee, Elisa Ricci
Winter Conference on Applications of Computer Vision (WACV), 2024.
[ PDF ] [ arXiv ] [ HTML ] [ BibTeX ]

@inproceedings{bose2024stylip,
  title={Stylip: Multi-scale style-conditioned prompt learning for clip-based domain generalization},
  author={Bose, Shirsha and Jha, Ankit and Fini, Enrico and Singha, Mainak and Ricci, Elisa and Banerjee, Biplab},
  booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
  pages={5542--5552},
  year={2024}
}

C-SAW: Self-Supervised Prompt Learning for Image Generalization in Remote Sensing [Best Paper Award]
Avigyan Bhattacharya, Mainak Singha, Ankit Jha, Biplab Banerjee
Indian Conference on Computer Vision Graphics and Image Processing (ICVGIP), 2023.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@inproceedings{bhattacharya2023c,
  title={C-SAW: self-supervised prompt learning for image generalization in remote sensing},
  author={Bhattacharya, Avigyan and Singha, Mainak and Jha, Ankit and Banerjee, Biplab},
  booktitle={Proceedings of the Fourteenth Indian Conference on Computer Vision, Graphics and Image Processing},
  pages={1--10},
  year={2023}
}

GOPro: Generate and Optimize Prompts in CLIP using Self-Supervised Learning
Mainak Singha, Ankit Jha, Biplab Banerjee
British Machine Vision Conference (BMVC), 2023.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@article{singha2023gopro,
  title={Gopro: Generate and optimize prompts in clip using self-supervised learning},
  author={Singha, Mainak and Jha, Ankit and Banerjee, Biplab},
  journal={arXiv preprint arXiv:2308.11605},
  year={2023}
}

AD-CLIP: Adapting Domains in Prompt Space Using CLIP
Mainak Singha, Harsh Pal, Ankit Jha, Biplab Banerjee
International Conference on Computer Vision (ICCV) Workshops, 2023.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@inproceedings{singha2023ad,
  title={Ad-clip: Adapting domains in prompt space using clip},
  author={Singha, Mainak and Pal, Harsh and Jha, Ankit and Banerjee, Biplab},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={4355--4364},
  year={2023}
}

HAVE-Net: Hallucinated Audio-Visual Embeddings for Few-Shot Classification with Unimodal Cues [Best Paper Award]
Ankit Jha, Debabrata Pal, Mainak Singha, Naman Agarwal, Biplab Banerjee
European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD) Workshops, 2023.
[ PDF ] [ arXiv ] [ HTML ] [ BibTeX ]

@inproceedings{jha2023have,
  title={HAVE-Net: Hallucinated Audio-Visual Embeddings for Few-Shot Classification with Unimodal Cues},
  author={Jha, Ankit and Pal, Debabrata and Singha, Mainak and Agarwal, Naman and Banerjee, Biplab},
  booktitle={Joint European Conference on Machine Learning and Knowledge Discovery in Databases},
  pages={390--398},
  year={2023},
  organization={Springer}
}

APPLeNet: Visual Attention Parameterized Prompt Learning for Few-Shot Remote Sensing Image Generalization Using CLIP
Mainak Singha, Ankit Jha, Bhupendra Solanki, Shirsha Bose, Biplab Banerjee
Computer Vision and Pattern Recognition (CVPR) Workshops, 2023.
[ PDF ] [ arXiv ] [ Code ] [ HTML ] [ BibTeX ]

@inproceedings{singha2023applenet,
  title={Applenet: Visual attention parameterized prompt learning for few-shot remote sensing image generalization using clip},
  author={Singha, Mainak and Jha, Ankit and Solanki, Bhupendra and Bose, Shirsha and Banerjee, Biplab},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={2024--2034},
  year={2023}
}

Journals

Meta-Learning to Teach Semantic Prompts for Open Domain Generalization in Vision-Language Models
Shirsha Bose, Mainak Singha, Ankit Jha, Souradeep Mukhopadhyay, Biplab Banerjee
Transactions on Machine Learning Research (TMLR), 2025.
[ PDF ] [ HTML ] [ BibTeX ]

@article{bose2025meta,
  title={Meta-Learning to Teach Semantic Prompts for Open Domain Generalization in Vision-Language Models},
  author={Bose, Shirsha and Singha, Mainak and Jha, Ankit and Mukhopadhyay, Souradeep and Banerjee, Biplab},
  journal={Transactions on Machine Learning Research},
  year={2025}
}

RS³Lip: Consistency for Remote Sensing Image Classification on Part Embeddings using Self-Supervised Learning and CLIP
Ankit Jha, Mainak Singha, Avigyan Bhattacharya, Biplab Banerjee
Computer Vision and Image Understanding (CVIU), 2025.
[ PDF ] [ HTML ] [ BibTeX ]

@article{jha2025rs3lip,
  title={RS3Lip: Consistency for remote sensing image classification on part embeddings using self-supervised learning and CLIP},
  author={Jha, Ankit and Singha, Mainak and Bhattacharya, Avigyan and Banerjee, Biplab},
  journal={Computer Vision and Image Understanding},
  volume={251},
  pages={104254},
  year={2025},
  publisher={Elsevier}
}

Towards Molecular Structure Discovery from Cryo-ET Density Volumes via Modelling Auxiliary Semantic Prototypes
Ashwin Nair, Xingjian Li, Bhupendra Solanki, Souradeep Mukhopadhyay, Ankit Jha, Mostofa Rafid Uddin, Mainak Singha, Biplab Banerjee, Min Xu
Briefings in Bioinformatics, 2024.
[ PDF ] [ HTML ] [ BibTeX ]

@article{nair2024towards,
  title={Towards molecular structure discovery from cryo-ET density volumes via modelling auxiliary semantic prototypes},
  author={Nair, Ashwin and Li, Xingjian and Solanki, Bhupendra and Mukhopadhyay, Souradeep and Jha, Ankit and Rafid\_Uddin, Mostofa and Singha, Mainak and Banerjee, Biplab and Xu, Min},
  journal={Briefings in Bioinformatics},
  volume={26},
  number={1},
  year={2024},
  publisher={Oxford University Press}
}