Publications
2025
- NAACLNavigating the Cultural Kaleidoscope: A Hitchhiker’s Guide to Sensitivity in Large Language ModelsSomnath Banerjee, Sayan Layek, Hari Shrawgi, and 7 more authors2025
@misc{banerjee2025navigatingculturalkaleidoscopehitchhikers, title = {Navigating the Cultural Kaleidoscope: A Hitchhiker's Guide to Sensitivity in Large Language Models}, author = {Banerjee, Somnath and Layek, Sayan and Shrawgi, Hari and Mandal, Rajarshi and Halder, Avik and Kumar, Shanu and Basu, Sagnik and Agrawal, Parag and Hazra, Rima and Mukherjee, Animesh}, year = {2025}, eprint = {2410.12880}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2410.12880} }
- NAACLBreaking Boundaries: Investigating the Effects of Model Editing on Cross-linguistic PerformanceSomnath Banerjee, Avik Halder, Rajarshi Mandal, and 4 more authors2025
@misc{banerjee2025breakingboundariesinvestigatingeffects, title = {Breaking Boundaries: Investigating the Effects of Model Editing on Cross-linguistic Performance}, author = {Banerjee, Somnath and Halder, Avik and Mandal, Rajarshi and Layek, Sayan and Soboroff, Ian and Hazra, Rima and Mukherjee, Animesh}, year = {2025}, eprint = {2406.11139}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2406.11139} }
- New PaperMemeSense: An Adaptive In-Context Framework for Social Commonsense Driven Meme ModerationSayantan Adak, Somnath Banerjee, Rajarshi Mandal, and 4 more authors2025
@misc{adak2025memesenseadaptiveincontextframework, title = {MemeSense: An Adaptive In-Context Framework for Social Commonsense Driven Meme Moderation}, author = {Adak, Sayantan and Banerjee, Somnath and Mandal, Rajarshi and Halder, Avik and Layek, Sayan and Hazra, Rima and Mukherjee, Animesh}, year = {2025}, eprint = {2502.11246}, archiveprefix = {arXiv}, primaryclass = {cs.IR}, url = {https://arxiv.org/abs/2502.11246} }
- New PaperSoteria: Language-Specific Functional Parameter Steering for Multilingual Safety AlignmentSomnath Banerjee, Sayan Layek, Pratyush Chatterjee, and 2 more authors2025
@misc{banerjee2025soterialanguagespecificfunctionalparameter, title = {Soteria: Language-Specific Functional Parameter Steering for Multilingual Safety Alignment}, author = {Banerjee, Somnath and Layek, Sayan and Chatterjee, Pratyush and Mukherjee, Animesh and Hazra, Rima}, year = {2025}, eprint = {2502.11244}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2502.11244} }
2024
- AAAISafeInfer: Context Adaptive Decoding Time Safety Alignment for Large Language ModelsSomnath Banerjee, Sayan Layek, Soham Tripathy, and 3 more authors2024
@misc{banerjee2024safeinfercontextadaptivedecoding, title = {SafeInfer: Context Adaptive Decoding Time Safety Alignment for Large Language Models}, author = {Banerjee, Somnath and Layek, Sayan and Tripathy, Soham and Kumar, Shanu and Mukherjee, Animesh and Hazra, Rima}, year = {2024}, eprint = {2406.12274}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2406.12274} }
- ACLSowing the Wind, Reaping the Whirlwind: The Impact of Editing Language ModelsRima Hazra, Sayan Layek, Somnath Banerjee, and 1 more authorIn Findings of the Association for Computational Linguistics: ACL 2024, Aug 2024
In the rapidly advancing field of artificial intelligence, the concept of ‘Red-Teaming’ or ‘Jailbreaking’ large language models (LLMs) has emerged as a crucial area of study. This approach is especially significant in terms of assessing and enhancing the safety and robustness of these models. This paper investigates the intricate consequences of such modifications through model editing, uncovering a complex relationship between enhancing model accuracy and preserving its ethical integrity. Our in-depth analysis reveals a striking paradox: while injecting accurate information is crucial for model reliability, it can paradoxically destabilize the model‘s foundational framework, resulting in unpredictable and potentially unsafe behaviors. Additionally, we propose a benchmark dataset NicheHazardQA to investigate this unsafe behavior both within the same and cross topical domain. This aspect of our research sheds light on how the edits, impact the model‘s safety metrics and guardrails. Our findings show that model editing serves as a cost-effective tool for topical red-teaming by methodically applying targeted edits and evaluating the resultant model behavior.
@inproceedings{hazra-etal-2024-sowing, title = {Sowing the Wind, Reaping the Whirlwind: The Impact of Editing Language Models}, author = {Hazra, Rima and Layek, Sayan and Banerjee, Somnath and Poria, Soujanya}, editor = {Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek}, booktitle = {Findings of the Association for Computational Linguistics: ACL 2024}, month = aug, year = {2024}, address = {Bangkok, Thailand}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2024.findings-acl.960/}, doi = {10.18653/v1/2024.findings-acl.960}, pages = {16227--16239} }
- EMNLPSafety Arithmetic: A Framework for Test-time Safety Alignment of Language Models by Steering Parameters and ActivationsRima Hazra, Sayan Layek, Somnath Banerjee, and 1 more authorIn Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, Nov 2024
Ensuring the safe alignment of large language models (LLMs) with human values is critical as they become integral to applications like translation and question answering. Current alignment methods struggle with dynamic user intentions and complex objectives, making models vulnerable to generating harmful content. We propose Safety Arithmetic, a training-free framework enhancing LLM safety across different scenarios: Base models, Supervised fine-tuned models (SFT), and Edited models. Safety Arithmetic involves Harm Direction Removal to avoid harmful content and Safety Alignment to promote safe responses. Additionally, we present NoIntentEdit, a dataset highlighting edit instances that could compromise model safety if used unintentionally. Our experiments show that Safety Arithmetic significantly improves safety measures, reduces over-safety, and maintains model utility, outperforming existing methods in ensuring safe content generation.
@inproceedings{hazra-etal-2024-safety, title = {Safety Arithmetic: A Framework for Test-time Safety Alignment of Language Models by Steering Parameters and Activations}, author = {Hazra, Rima and Layek, Sayan and Banerjee, Somnath and Poria, Soujanya}, editor = {Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung}, booktitle = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing}, month = nov, year = {2024}, address = {Miami, Florida, USA}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2024.emnlp-main.1212/}, doi = {10.18653/v1/2024.emnlp-main.1212}, pages = {21759--21776} }
- EMNLPContext Matters: Pushing the Boundaries of Open-Ended Answer Generation with Graph-Structured Knowledge ContextSomnath Banerjee, Amruit Sahoo, Sayan Layek, and 3 more authorsIn Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track, Nov 2024
This paper introduces a novel framework that combines graph-driven context retrieval in conjunction to knowledge graphs based enhancement, honing the proficiency of LLMs, especially in domain specific community question answering platforms like AskUbuntu, Unix, and ServerFault. We conduct experiments on various LLMs with different parameter sizes to evaluate their ability to ground knowledge and determine factual accuracy in answers to open-ended questions. Our methodology GraphContextGen consistently outperforms dominant text-based retrieval systems, demonstrating its robustness and adaptability to a larger number of use cases. This advancement highlights the importance of pairing context rich data retrieval with LLMs, offering a renewed approach to knowledge sourcing and generation in AI systems. We also show that, due to rich contextual data retrieval, the crucial entities, along with the generated answer, remain factually coherent with the gold answer. We shall release the source code and datasets upon acceptance.
@inproceedings{banerjee-etal-2024-context, title = {Context Matters: Pushing the Boundaries of Open-Ended Answer Generation with Graph-Structured Knowledge Context}, author = {Banerjee, Somnath and Sahoo, Amruit and Layek, Sayan and Dutta, Avik and Hazra, Rima and Mukherjee, Animesh}, editor = {Dernoncourt, Franck and Preo{\c{t}}iuc-Pietro, Daniel and Shimorina, Anastasia}, booktitle = {Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track}, month = nov, year = {2024}, address = {Miami, Florida, US}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2024.emnlp-industry.23/}, doi = {10.18653/v1/2024.emnlp-industry.23}, pages = {290--302} }
- ICWSMHow (un)ethical are instruction-centric responses of LLMs? Unveiling the vulnerabilities of safety guardrails to harmful queriesSomnath Banerjee, Sayan Layek, Rima Hazra, and 1 more authorNov 2024
@misc{banerjee2024unethicalinstructioncentricresponsesllms, title = {How (un)ethical are instruction-centric responses of LLMs? Unveiling the vulnerabilities of safety guardrails to harmful queries}, author = {Banerjee, Somnath and Layek, Sayan and Hazra, Rima and Mukherjee, Animesh}, year = {2024}, eprint = {2402.15302}, archiveprefix = {arXiv}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2402.15302} }
- ECML PKDDDistALANER: Distantly Supervised Active Learning Augmented Named Entity Recognition in the Open Source Software EcosystemSomnath Banerjee, Avik Dutta, Aaditya Agrawal, and 2 more authorsIn Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track, Nov 2024
As the AI revolution unfolds, the push toward automating support systems in diverse professional fields ranging from open-source software to healthcare, and banking to transportation has become more pronounced. Central to the automation of these systems is the early detection of named entities, a task that is foundational yet fraught with challenges due to the need for domain-specific expert annotations amid a backdrop of specialized terminologies, making the process both costly and complex. In response to this challenge, our paper presents an innovative named entity recognition (NER) framework (https://github.com/NeuralSentinel/DistALANER) tailored for the open-source software domain. Our method stands out by employing a distantly supervised, two-step annotation process that cleverly exploits language heuristics, bespoke lookup tables, external knowledge bases, and an active learning model. This multifaceted strategy not only elevates model performance but also addresses the critical hurdles of high costs and the dearth of expert annotators. A notable achievement of our approach is its capability to enable pre-large language models (pre-LLMs) to significantly outperform specially designed generic/domain specific LLMs for NER tasks. We also show the effectiveness of NER in the downstream task of relation extraction.
@inproceedings{10.1007/978-3-031-70381-2_20, author = {Banerjee, Somnath and Dutta, Avik and Agrawal, Aaditya and Hazra, Rima and Mukherjee, Animesh}, editor = {Bifet, Albert and Krilavi{\v{c}}ius, Tomas and Miliou, Ioanna and Nowaczyk, Slawomir}, title = {DistALANER: Distantly Supervised Active Learning Augmented Named Entity Recognition in the Open Source Software Ecosystem}, booktitle = {Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track}, year = {2024}, publisher = {Springer Nature Switzerland}, address = {Cham}, pages = {313--331}, isbn = {978-3-031-70381-2} }
- COLINGInfFeed: Influence Functions as a Feedback to Improve the Performance of Subjective TasksSomnath Banerjee, Maulindu Sarkar, Punyajoy Saha, and 2 more authorsIn Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), May 2024
Recently, influence functions present an apparatus for achieving explainability for deep neural models by quantifying the perturbation of individual train instances that might impact a test prediction. Our objectives in this paper are twofold. First we incorporate influence functions as a feedback into the model to improve its performance. Second, in a dataset extension exercise, using influence functions to automatically identify data points that have been initially ‘silver’ annotated by some existing method and need to be cross-checked (and corrected) by annotators to improve the model performance. To meet these objectives, in this paper, we introduce InfFeed, which uses influence functions to compute the influential instances for a target instance. Toward the first objective, we adjust the label of the target instance based on its influencer(s) label. In doing this, InfFeed outperforms the state-of-the-art baselines (including LLMs) by a maximum macro F1-score margin of almost 4% for hate speech classification, 3.5% for stance classification, and 3% for irony and 2% for sarcasm detection. Toward the second objective we show that manually re-annotating only those silver annotated data points in the extension set that have a negative influence can immensely improve the model performance bringing it very close to the scenario where all the data points in the extension set have gold labels. This allows for huge reduction of the number of data points that need to be manually annotated since out of the silver annotated extension dataset, the influence function scheme picks up ~1/1000 points that need manual correction.
@inproceedings{banerjee-etal-2024-inffeed, title = {{I}nf{F}eed: Influence Functions as a Feedback to Improve the Performance of Subjective Tasks}, author = {Banerjee, Somnath and Sarkar, Maulindu and Saha, Punyajoy and Mathew, Binny and Mukherjee, Animesh}, editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste, Veronique and Lenci, Alessandro and Sakti, Sakriani and Xue, Nianwen}, booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)}, month = may, year = {2024}, address = {Torino, Italia}, publisher = {ELRA and ICCL}, url = {https://aclanthology.org/2024.lrec-main.794/}, pages = {9061--9072} }
- ASONAMDuplicate Question Retrieval and Confirmation Time Prediction in Software CommunitiesRima Hazra, Debanjan Saha, Amruit Sahoo, and 2 more authorsIn Proceedings of the 2023 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining, Kusadasi, Turkiye, May 2024
Community Question Answering (CQA) in different domains is growing at a large scale because of the availability of several platforms and huge shareable information among users. With the rapid growth of such online platforms, a massive amount of archived data makes it difficult for moderators to retrieve possible duplicates for a new question and identify and confirm existing question pairs as duplicates at the right time. This problem is even more critical in CQAs corresponding to large software systems like askubuntu where moderators need to be experts to comprehend something as a duplicate. Note that the prime challenge in such CQA platforms is that the moderators are themselves experts and are therefore usually extremely busy with their time being extraordinarily expensive. To facilitate the task of the moderators, in this work, we have tackled two significant issues for the askubuntu CQA platform: (1) retrieval of duplicate questions given a new question and (2) duplicate question confirmation time prediction. In the first task, we focus on retrieving duplicate questions from a question pool for a particular newly posted question. In the second task, we solve a regression problem to rank a pair of questions that could potentially take a long time to get confirmed as duplicates. For duplicate question retrieval, we propose a Siamese neural network based approach by exploiting both text and network-based features, which outperforms several state-of-the-art baseline techniques. Our method outperforms DupPredictor [33] and DUPE [1] by 5% and 7% respectively. For duplicate confirmation time prediction, we have used both the standard machine learning models and neural network along with the text and graph-based features. We obtain Spearman’s rank correlation of 0.20 and 0.213 (statistically significant) for text and graph based features respectively. We shall place all our codes and data in the public domain upon acceptance.
@inproceedings{10.1145/3625007.3627310, author = {Hazra, Rima and Saha, Debanjan and Sahoo, Amruit and Banerjee, Somnath and Mukherjee, Animesh}, title = {Duplicate Question Retrieval and Confirmation Time Prediction in Software Communities}, year = {2024}, isbn = {9798400704093}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3625007.3627310}, doi = {10.1145/3625007.3627310}, booktitle = {Proceedings of the 2023 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining}, pages = {203–212}, numpages = {10}, keywords = {community question answering, duplicate retrieval, duplicate confirmation time, tag co-occurrence network, negative sampling}, location = {Kusadasi, Turkiye}, series = {ASONAM '23} }
- IEEE BigdataEvaluating the Ebb and Flow: An In-depth Analysis of Question-Answering Trends across Diverse PlatformsRima Hazra, Agnik Saha, Somnath Banerjee, and 1 more authorMay 2024
@misc{hazra2024evaluatingebbflowindepth, title = {Evaluating the Ebb and Flow: An In-depth Analysis of Question-Answering Trends across Diverse Platforms}, author = {Hazra, Rima and Saha, Agnik and Banerjee, Somnath and Mukherjee, Animesh}, year = {2024}, eprint = {2309.05961}, archiveprefix = {arXiv}, primaryclass = {cs.SI}, url = {https://arxiv.org/abs/2309.05961} }
2022
- NeurIPSMultilingual Abusive Comment Detection at Scale for Indic LanguagesVikram Gupta, Sumegh Roychowdhury, Mithun Das, and 5 more authorsIn Advances in Neural Information Processing Systems, May 2022
@inproceedings{NEURIPS2022_a7c4163b, author = {Gupta, Vikram and Roychowdhury, Sumegh and Das, Mithun and Banerjee, Somnath and Saha, Punyajoy and Mathew, Binny and vanchinathan, hastagiri prakash and Mukherjee, Animesh}, booktitle = {Advances in Neural Information Processing Systems}, editor = {Koyejo, S. and Mohamed, S. and Agarwal, A. and Belgrave, D. and Cho, K. and Oh, A.}, pages = {26176--26191}, publisher = {Curran Associates, Inc.}, title = {Multilingual Abusive Comment Detection at Scale for Indic Languages}, url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/a7c4163b33286261b24c72fd3d1707c9-Paper-Datasets_and_Benchmarks.pdf}, volume = {35}, year = {2022} }
- ACM HypertextData Bootstrapping Approaches to Improve Low Resource Abusive Language Detection for Indic LanguagesMithun Das, Somnath Banerjee, and Animesh MukherjeeIn Proceedings of the 33rd ACM Conference on Hypertext and Social Media, Barcelona, Spain, May 2022
Abusive language is a growing concern in many social media platforms. Repeated exposure to abusive speech has created physiological effects on the target users. Thus, the problem of abusive language should be addressed in all forms for online peace and safety. While extensive research exists in abusive speech detection, most studies focus on English. Recently, many smearing incidents have occurred in India, which provoked diverse forms of abusive speech in online space in various languages based on the geographic location. Therefore it is essential to deal with such malicious content. In this paper, to bridge the gap, we demonstrate a large-scale analysis of multilingual abusive speech in Indic languages. We examine different interlingual transfer mechanisms and observe the performance of various multilingual models for abusive speech detection for eight different Indic languages. We also experiment to show how robust these models are on adversarial attacks. Finally, we conduct an in-depth error analysis by looking into the models’ misclassified posts across various settings. We have made our code and models public for other researchers1.
@inproceedings{10.1145/3511095.3531277, author = {Das, Mithun and Banerjee, Somnath and Mukherjee, Animesh}, title = {Data Bootstrapping Approaches to Improve Low Resource Abusive Language Detection for Indic Languages}, year = {2022}, isbn = {9781450392334}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3511095.3531277}, doi = {10.1145/3511095.3531277}, booktitle = {Proceedings of the 33rd ACM Conference on Hypertext and Social Media}, pages = {32–42}, numpages = {11}, keywords = {Abusive language, detection, multilingual, social media}, location = {Barcelona, Spain}, series = {HT '22} }
- AACL IJCNLPHate Speech and Offensive Language Detection in BengaliMithun Das, Somnath Banerjee, Punyajoy Saha, and 1 more authorIn Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), Nov 2022
Social media often serves as a breeding ground for various hateful and offensive content. Identifying such content on social media is crucial due to its impact on the race, gender, or religion in an unprejudiced society. However, while there is extensive research in hate speech detection in English, there is a gap in hateful content detection in low-resource languages like Bengali. Besides, a current trend on social media is the use of Romanized Bengali for regular interactions. To overcome the existing research‘s limitations, in this study, we develop an annotated dataset of 10K Bengali posts consisting of 5K actual and 5K Romanized Bengali tweets. We implement several baseline models for the classification of such hateful posts. We further explore the interlingual transfer mechanism to boost classification performance. Finally, we perform an in-depth error analysis by looking into the misclassified posts by the models. While training actual and Romanized datasets separately, we observe that XLM-Roberta performs the best. Further, we witness that on joint training and few-shot training, MuRIL outperforms other models by interpreting the semantic expressions better. We make our code and dataset public for others.
@inproceedings{das-etal-2022-hate-speech, title = {Hate Speech and Offensive Language Detection in {B}engali}, author = {Das, Mithun and Banerjee, Somnath and Saha, Punyajoy and Mukherjee, Animesh}, editor = {He, Yulan and Ji, Heng and Li, Sujian and Liu, Yang and Chang, Chua-Hui}, booktitle = {Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)}, month = nov, year = {2022}, address = {Online only}, publisher = {Association for Computational Linguistics}, url = {https://aclanthology.org/2022.aacl-main.23/}, doi = {10.18653/v1/2022.aacl-main.23}, pages = {286--296} }
2020
- GAICRandom Forest Boosted CNN: An Empirical Technique for Plant ClassificationSomnath Banerjee, and Rajendra PamulaIn Proceedings of the Global AI Congress 2019, Nov 2020
Banerjee, SomnathPamula, RajendraPlant identification and classification is one of the toughest jobs for inexperienced botanists. This paper proposes a hybrid model by combining random forest and along with the convolutional neural network (CNN) to classify the images. Our proposed method comprises of two phases; feature extraction using CNN and training the random forest model. The layers in convolutional neural network are useful to extract essential features. In this work, we have used PlantCLEF 2019 dataset (Amazonian Rainforest) to train and evaluate our model. We compare our method with the state-of-the-art methods for plant classification. The experimented method produces relatively higher accuracy than earlier methods.
@inproceedings{10.1007/978-981-15-2188-1_20, author = {Banerjee, Somnath and Pamula, Rajendra}, editor = {Mandal, Jyotsna Kumar and Mukhopadhyay, Somnath}, title = {Random Forest Boosted CNN: An Empirical Technique for Plant Classification}, booktitle = {Proceedings of the Global AI Congress 2019}, year = {2020}, publisher = {Springer Singapore}, address = {Singapore}, pages = {251--261}, isbn = {978-981-15-2188-1} }