publications

Publications with the keyword: image captioning

View all publications

2019
[7]
Transfer learning from language models to image caption generators: Better models may not transfer better (; and ), arXiv preprint, volume 1901.01216, .
@article{transfer2019,
	Title = {Transfer learning from language models to image caption generators: Better models may not transfer better},
	Author = {Tanti, M and Gatt, A and Camilleri, KP},
	Journal = {arXiv preprint},
	Year = {2019},
	Volume={1901.01216},
	eprinttype={arxiv},
	eprint={https://arxiv.org/pdf/1901.01216.pdf},
	Keywords = {vision and language, natural language generation, image captioning, transfer learning}}
[arxiv]
[6]
Quantifying the amount of visual information used by neural caption generators (; and ), In Computer Vision – ECCV 2018 Workshops: Proceedings of the Workshop on Shortcomings in Vision and Language (Leal-Taixé, L; Roth, S, eds.), Springer, .
@inproceedings{sivl2019sensitivity,
	Author = {Tanti, M and Gatt, A and Camilleri, K},
	Title = {Quantifying the amount of visual information used by neural caption generators},
	Url = {https://link.springer.com/chapter/10.1007/978-3-030-11018-5_11},
	Booktitle = {Computer Vision – ECCV 2018 Workshops: Proceedings of the Workshop on Shortcomings in Vision and Language},
	Year = {2019},
	Editor = {L Leal-Taixé and S Roth},
	Address = {Munich, Germany},
	Publisher = {Springer},
	eprinttype={arxiv},
	eprint={https://arxiv.org/abs/1810.05475},
	Pages = {124-132},
	Doi = {https://doi.org/10.1007/978-3-030-11018-5_11},
	Keywords = {vision and language, image captioning}}
[doi] [arxiv]
[5]
Pre-gen metrics: Predicting caption quality metrics without generating captions (; and ), In Computer Vision – ECCV 2018 Workshops: Proceedings of the Workshop on Shortcomings in Vision and Language (Leal-Taixé, L; Roth, S, eds.), Springer, .
@inproceedings{sivl2019pre-gen,
	Author = {Tanti, M and Gatt, A and Muscat, A},
	Title = {Pre-gen metrics: Predicting caption quality metrics without generating captions},
	Booktitle = {Computer Vision – ECCV 2018 Workshops: Proceedings of the Workshop on Shortcomings in Vision and Language},
	Year = {2019},
	Url = {https://link.springer.com/chapter/10.1007/978-3-030-11018-5_10},
	Address = {Munich, Germany},
	Publisher = {Springer},
	Pages = {114-123},
	Editor = {L Leal-Taixé and S Roth},
	Doi = {https://doi.org/10.1007/978-3-030-11018-5_10},
	eprinttype={arxiv},
	eprint={https://arxiv.org/abs/1810.05474},
	Keywords = {vision and language, image captioning}}
[doi] [arxiv]
2018
[4]
Where to put the image in an image caption generator. (; and ), Natural Language Engineering, volume 24, .
@article{tanti2017image,
	Author = {Tanti, M and Gatt, A and Camilleri, K},
	Journal = {Natural Language Engineering},
	Pages = {467-489},
	Title = {Where to put the image in an image caption generator.},
	Year = {2018},
	Volume = {24},
	Issue = {3},
	eprinttype  = {arxiv},
  	eprint      = {https://arxiv.org/abs/1703.09137},
  	Url = {https://www.cambridge.org/core/journals/natural-language-engineering/article/where-to-put-the-image-in-an-image-caption-generator/A5B0ACFFE8E4AEAA5840DC61F93153F3},
  	Doi = {10.1017/S1351324918000098},
  	Keywords = {vision and language, natural language generation, image captioning}}
[doi] [arxiv]
[3]
Face2Text: Collecting an Annotated Image Description Corpus for the Generation of Rich Face Descriptions (; ; ; ; ; ; ; and ), In Proceedings of the 11th edition of the Language Resources and Evaluation Conference (LREC'18), .
@inproceedings{lrec2018,
	author = {Gatt, A and Tanti, M and Muscat, A and Paggio, P and Farrugia, R and Borg, C and Camilleri, K and Rosner, M and van der Plas, L},
	year = {2018},
	title = {Face2Text: Collecting an Annotated Image Description Corpus for the Generation of Rich Face Descriptions},
	booktitle = {Proceedings of the 11th edition of the Language Resources and Evaluation Conference (LREC'18)},
	Url = {http://www.lrec-conf.org/proceedings/lrec2018/pdf/226.pdf},
	eprinttype={arxiv},
	eprint={https://arxiv.org/abs/1803.03827},
	keywords={vision and language,image captioning,face description}}
[arxiv]
[2]
Predicting visual spatial relations in the Maltese language ( and ), In Breaking Barriers: Junior College Multidisciplinary Conference, University of Malta Junior College, .
@inproceedings{jc2019-prepositions,
	Author = {Muscat, A and Gatt, A},
	Year = {2018},
	Title = {Predicting visual spatial relations in the {M}altese language},
	Booktitle = {Breaking Barriers: Junior College Multidisciplinary Conference},
	Pages = {414-450},
	Address = {Malta},
	Publisher = {University of Malta Junior College},
	Url = {https://staff.um.edu.mt/albert.gatt/pubs/jc2018-prepositions.pdf},
	Keywords = {vision and language, maltese, image captioning, spatial relations}
}
2017
[1]
What is the Role of Recurrent Neural Networks (RNNs) in an Image Caption Generator? (; and ), In Proceedings of the 10th International Conference on Natural Language Generation (INLG'17), Association for Computational Linguistics, .
@inproceedings{inlg2017,
	Address = {Santiago de Compostela, Spain},
	Author = {Tanti, M and Gatt, A and Camilleri, K},
	Booktitle = {Proceedings of the 10th International Conference on Natural Language Generation (INLG'17)},
	Publisher = {Association for Computational Linguistics},
	Url = {http://aclweb.org/anthology/W/W17/W17-3506.pdf},	
	Title = {What is the Role of Recurrent Neural Networks (RNNs) in an Image Caption Generator?},
	Year = {2017},
	Eprinttype = {arxiv},
	Eprint  = {https://arxiv.org/abs/1708.02043},
	Keywords = {vision and language, natural language generation, image captioning}}
[arxiv]